Repository: alibaba/OpenSandbox
Branch: main
Commit: 4cfdce25f870
Files: 1108
Total size: 5.8 MB

Directory structure:
gitextract_ee6cl8k3/

├── .github/
│   ├── CODEOWNERS
│   ├── ISSUE_TEMPLATE/
│   │   ├── FEATURE_REQUEST.md
│   │   └── config.yml
│   ├── pull_request_template.md
│   └── workflows/
│       ├── deploy-docs-pages.yml
│       ├── egress-test.yaml.yml
│       ├── execd-test.yml
│       ├── ingress-test.yaml
│       ├── publish-components.yml
│       ├── publish-csharp-sdks.yml
│       ├── publish-helm-chart.yml
│       ├── publish-java-sdks.yml
│       ├── publish-js-sdks.yml
│       ├── publish-python-sdks.yml
│       ├── publish-server.yml
│       ├── real-e2e.yml
│       ├── sandbox-k8s-e2e.yml
│       ├── sandbox-k8s-test.yml
│       ├── sdk-tests.yml
│       ├── server-test.yml
│       └── verify-license.yml
├── .gitignore
├── .pre-commit-config.yaml
├── AGENTS.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
├── cli/
│   ├── README.md
│   ├── pyproject.toml
│   ├── src/
│   │   └── opensandbox_cli/
│   │       ├── __init__.py
│   │       ├── __main__.py
│   │       ├── client.py
│   │       ├── commands/
│   │       │   ├── __init__.py
│   │       │   ├── code.py
│   │       │   ├── command.py
│   │       │   ├── config_cmd.py
│   │       │   ├── file.py
│   │       │   └── sandbox.py
│   │       ├── config.py
│   │       ├── main.py
│   │       ├── output.py
│   │       └── utils.py
│   └── tests/
│       ├── __init__.py
│       ├── conftest.py
│       ├── test_cli_help.py
│       ├── test_commands.py
│       ├── test_config.py
│       ├── test_output.py
│       ├── test_resolve_id.py
│       └── test_utils.py
├── components/
│   ├── egress/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── TODO.md
│   │   ├── build.sh
│   │   ├── docs/
│   │   │   └── benchmark.md
│   │   ├── go.mod
│   │   ├── go.sum
│   │   ├── main.go
│   │   ├── nameserver.go
│   │   ├── nameserver_test.go
│   │   ├── nft.go
│   │   ├── pkg/
│   │   │   ├── constants/
│   │   │   │   ├── configuration.go
│   │   │   │   └── constants.go
│   │   │   ├── dnsproxy/
│   │   │   │   ├── exempt.go
│   │   │   │   ├── exempt_test.go
│   │   │   │   ├── proxy.go
│   │   │   │   ├── proxy_linux.go
│   │   │   │   ├── proxy_other.go
│   │   │   │   └── proxy_test.go
│   │   │   ├── events/
│   │   │   │   ├── broadcaster.go
│   │   │   │   ├── events_test.go
│   │   │   │   └── webhook.go
│   │   │   ├── iptables/
│   │   │   │   └── redirect.go
│   │   │   ├── log/
│   │   │   │   └── logger.go
│   │   │   ├── nftables/
│   │   │   │   ├── dynamic.go
│   │   │   │   ├── manager.go
│   │   │   │   └── manager_test.go
│   │   │   └── policy/
│   │   │       ├── policy.go
│   │   │       └── policy_test.go
│   │   ├── policy_server.go
│   │   ├── policy_server_test.go
│   │   └── tests/
│   │       ├── bench-dns-nft.sh
│   │       ├── egress-in-webhook.sh
│   │       ├── hostname.txt
│   │       ├── smoke-dns.sh
│   │       ├── smoke-dynamic-ip.sh
│   │       ├── smoke-nft.sh
│   │       └── webhook-server.py
│   ├── execd/
│   │   ├── .golangci.yml
│   │   ├── DEVELOPMENT.md
│   │   ├── Dockerfile
│   │   ├── Makefile
│   │   ├── README.md
│   │   ├── README_zh.md
│   │   ├── bootstrap.sh
│   │   ├── build.sh
│   │   ├── go.mod
│   │   ├── go.sum
│   │   ├── main.go
│   │   ├── pkg/
│   │   │   ├── flag/
│   │   │   │   ├── flags.go
│   │   │   │   └── parser.go
│   │   │   ├── jupyter/
│   │   │   │   ├── auth/
│   │   │   │   │   ├── auth.go
│   │   │   │   │   ├── auth_test.go
│   │   │   │   │   ├── client.go
│   │   │   │   │   └── types.go
│   │   │   │   ├── client.go
│   │   │   │   ├── debug_integration_test.go
│   │   │   │   ├── execute/
│   │   │   │   │   ├── events.json
│   │   │   │   │   ├── execute.go
│   │   │   │   │   ├── execute_test.go
│   │   │   │   │   ├── executor.go
│   │   │   │   │   ├── types.go
│   │   │   │   │   └── zz_generated.deepcopy.go
│   │   │   │   ├── integration_test.go
│   │   │   │   ├── kernel/
│   │   │   │   │   ├── kernel.go
│   │   │   │   │   ├── kernelspecs.json
│   │   │   │   │   └── types.go
│   │   │   │   ├── live_integration_test.go
│   │   │   │   ├── session/
│   │   │   │   │   ├── session.go
│   │   │   │   │   ├── session_test.go
│   │   │   │   │   ├── sessions.json
│   │   │   │   │   └── types.go
│   │   │   │   └── transport.go
│   │   │   ├── log/
│   │   │   │   └── log.go
│   │   │   ├── runtime/
│   │   │   │   ├── bash_session.go
│   │   │   │   ├── bash_session_test.go
│   │   │   │   ├── bash_session_windows.go
│   │   │   │   ├── command.go
│   │   │   │   ├── command_common.go
│   │   │   │   ├── command_status.go
│   │   │   │   ├── command_status_test.go
│   │   │   │   ├── command_test.go
│   │   │   │   ├── command_windows.go
│   │   │   │   ├── context.go
│   │   │   │   ├── context_test.go
│   │   │   │   ├── ctrl.go
│   │   │   │   ├── env.go
│   │   │   │   ├── env_test.go
│   │   │   │   ├── errors.go
│   │   │   │   ├── helpers_test.go
│   │   │   │   ├── interrupt.go
│   │   │   │   ├── interrupt_windows.go
│   │   │   │   ├── jupyter.go
│   │   │   │   ├── language.go
│   │   │   │   ├── sql.go
│   │   │   │   ├── sql_test.go
│   │   │   │   ├── types.go
│   │   │   │   └── types_test.go
│   │   │   ├── util/
│   │   │   │   ├── glob/
│   │   │   │   │   ├── index.go
│   │   │   │   │   ├── match.go
│   │   │   │   │   ├── match_benchmark_test.go
│   │   │   │   │   ├── match_test.go
│   │   │   │   │   └── pattern.go
│   │   │   │   └── safego/
│   │   │   │       ├── safe.go
│   │   │   │       └── safe_test.go
│   │   │   └── web/
│   │   │       ├── controller/
│   │   │       │   ├── basic.go
│   │   │       │   ├── basic_test.go
│   │   │       │   ├── codeinterpreting.go
│   │   │       │   ├── codeinterpreting_test.go
│   │   │       │   ├── command.go
│   │   │       │   ├── command_test.go
│   │   │       │   ├── filesystem.go
│   │   │       │   ├── filesystem_download.go
│   │   │       │   ├── filesystem_test.go
│   │   │       │   ├── filesystem_upload.go
│   │   │       │   ├── filesystem_windows.go
│   │   │       │   ├── metric.go
│   │   │       │   ├── metric_test.go
│   │   │       │   ├── mock_test.go
│   │   │       │   ├── ping.go
│   │   │       │   ├── sse.go
│   │   │       │   ├── syscall_linux.go
│   │   │       │   ├── syscall_others.go
│   │   │       │   ├── test_helpers.go
│   │   │       │   ├── utils.go
│   │   │       │   ├── utils_test.go
│   │   │       │   └── utils_windows.go
│   │   │       ├── model/
│   │   │       │   ├── codeinterpreting.go
│   │   │       │   ├── codeinterpreting_test.go
│   │   │       │   ├── command.go
│   │   │       │   ├── error.go
│   │   │       │   ├── filesystem.go
│   │   │       │   ├── header.go
│   │   │       │   ├── metric.go
│   │   │       │   └── session.go
│   │   │       ├── proxy.go
│   │   │       └── router.go
│   │   └── tests/
│   │       ├── jupyter.sh
│   │       ├── smoke.sh
│   │       └── smoke_api.py
│   ├── ingress/
│   │   ├── .golangci.yml
│   │   ├── DEVELOPMENT.md
│   │   ├── Dockerfile
│   │   ├── Makefile
│   │   ├── README.md
│   │   ├── build.sh
│   │   ├── go.mod
│   │   ├── go.sum
│   │   ├── main.go
│   │   └── pkg/
│   │       ├── flag/
│   │       │   ├── flags.go
│   │       │   └── parser.go
│   │       ├── proxy/
│   │       │   ├── header.go
│   │       │   ├── healthz.go
│   │       │   ├── healthz_test.go
│   │       │   ├── host.go
│   │       │   ├── http.go
│   │       │   ├── http_test.go
│   │       │   ├── logger.go
│   │       │   ├── proxy.go
│   │       │   ├── proxy_test.go
│   │       │   ├── websocket.go
│   │       │   └── websocket_test.go
│   │       ├── renewintent/
│   │       │   ├── intent.go
│   │       │   ├── intent_test.go
│   │       │   ├── publisher.go
│   │       │   ├── redis.go
│   │       │   └── redis_bench_test.go
│   │       └── sandbox/
│   │           ├── agent_sandbox_provider.go
│   │           ├── agent_sandbox_provider_test.go
│   │           ├── batchsandbox_provider.go
│   │           ├── batchsandbox_provider_test.go
│   │           ├── errors_test.go
│   │           ├── factory.go
│   │           └── provider.go
│   └── internal/
│       ├── go.mod
│       ├── go.sum
│       ├── logger/
│       │   ├── logger.go
│       │   └── zap.go
│       └── version/
│           └── version.go
├── docs/
│   ├── .nvmrc
│   ├── .vitepress/
│   │   ├── config.mts
│   │   ├── scripts/
│   │   │   └── docs-manifest.mjs
│   │   └── theme/
│   │       ├── index.ts
│   │       └── styles.css
│   ├── README.md
│   ├── README_zh.md
│   ├── RELEASE_NOTE_TEMPLATE.md
│   ├── architecture.md
│   ├── index.md
│   ├── manual-cleanup-refactor-guide.md
│   ├── package.json
│   ├── secure-container.md
│   ├── single_host_network.md
│   └── zh/
│       └── index.md
├── examples/
│   ├── README.md
│   ├── agent-sandbox/
│   │   ├── README.md
│   │   └── main.py
│   ├── aio-sandbox/
│   │   ├── README.md
│   │   └── main.py
│   ├── chrome/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── build.sh
│   │   ├── chrome.sh
│   │   ├── go.mod
│   │   ├── go.sum
│   │   ├── main.go
│   │   └── main.py
│   ├── claude-code/
│   │   ├── README.md
│   │   └── main.py
│   ├── code-interpreter/
│   │   ├── README.md
│   │   ├── main.py
│   │   └── main_use_pool.py
│   ├── codex-cli/
│   │   ├── README.md
│   │   └── main.py
│   ├── desktop/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── build.sh
│   │   └── main.py
│   ├── docker-ossfs-volume-mount/
│   │   ├── README.md
│   │   ├── README_zh.md
│   │   └── main.py
│   ├── docker-pvc-volume-mount/
│   │   ├── README.md
│   │   ├── README_zh.md
│   │   └── main.py
│   ├── gemini-cli/
│   │   ├── README.md
│   │   └── main.py
│   ├── google-adk/
│   │   ├── README.md
│   │   └── main.py
│   ├── host-volume-mount/
│   │   ├── README.md
│   │   ├── README_zh.md
│   │   └── main.py
│   ├── kimi-cli/
│   │   ├── README.md
│   │   └── main.py
│   ├── kubernetes-pvc-volume-mount/
│   │   ├── README.md
│   │   └── main.py
│   ├── langgraph/
│   │   ├── README.md
│   │   └── main.py
│   ├── nullclaw/
│   │   ├── README.md
│   │   └── main.py
│   ├── openclaw/
│   │   ├── README.md
│   │   ├── README_zh.md
│   │   └── main.py
│   ├── playwright/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── build.sh
│   │   └── main.py
│   ├── rl-training/
│   │   ├── README.md
│   │   ├── main.py
│   │   └── requirements.txt
│   └── vscode/
│       ├── Dockerfile
│       ├── README.md
│       ├── build.sh
│       └── main.py
├── kubernetes/
│   ├── .golangci.yml
│   ├── Dockerfile
│   ├── Dockerfile.debug
│   ├── Makefile
│   ├── PROJECT
│   ├── README-ZH.md
│   ├── README.md
│   ├── apis/
│   │   └── sandbox/
│   │       └── v1alpha1/
│   │           ├── batchsandbox_types.go
│   │           ├── doc.go
│   │           ├── groupversion_info.go
│   │           ├── pool_types.go
│   │           └── zz_generated.deepcopy.go
│   ├── build.sh
│   ├── charts/
│   │   ├── opensandbox-controller/
│   │   │   ├── .helmignore
│   │   │   ├── Chart.yaml
│   │   │   ├── README.md
│   │   │   ├── templates/
│   │   │   │   ├── NOTES.txt
│   │   │   │   ├── _helpers.tpl
│   │   │   │   ├── clusterrole.yaml
│   │   │   │   ├── clusterrolebinding.yaml
│   │   │   │   ├── crds/
│   │   │   │   │   ├── batchsandboxes.yaml
│   │   │   │   │   └── pools.yaml
│   │   │   │   ├── deployment.yaml
│   │   │   │   └── serviceaccount.yaml
│   │   │   └── values.yaml
│   │   └── opensandbox-server/
│   │       ├── .helmignore
│   │       ├── Chart.yaml
│   │       ├── README.md
│   │       ├── templates/
│   │       │   ├── NOTES.txt
│   │       │   ├── _helpers.tpl
│   │       │   ├── ingress-gateway.yaml
│   │       │   └── server.yaml
│   │       └── values.yaml
│   ├── cmd/
│   │   ├── controller/
│   │   │   └── main.go
│   │   └── task-executor/
│   │       └── main.go
│   ├── config/
│   │   ├── crd/
│   │   │   ├── bases/
│   │   │   │   ├── sandbox.opensandbox.io_batchsandboxes.yaml
│   │   │   │   └── sandbox.opensandbox.io_pools.yaml
│   │   │   ├── kustomization.yaml
│   │   │   └── kustomizeconfig.yaml
│   │   ├── default/
│   │   │   ├── cert_metrics_manager_patch.yaml
│   │   │   ├── kustomization.yaml
│   │   │   ├── manager_metrics_patch.yaml
│   │   │   └── metrics_service.yaml
│   │   ├── manager/
│   │   │   ├── kustomization.yaml
│   │   │   └── manager.yaml
│   │   ├── manifests/
│   │   │   └── kustomization.yaml
│   │   ├── network-policy/
│   │   │   ├── allow-metrics-traffic.yaml
│   │   │   └── kustomization.yaml
│   │   ├── prometheus/
│   │   │   ├── kustomization.yaml
│   │   │   ├── monitor.yaml
│   │   │   └── monitor_tls_patch.yaml
│   │   ├── rbac/
│   │   │   ├── batchsandbox_admin_role.yaml
│   │   │   ├── batchsandbox_editor_role.yaml
│   │   │   ├── batchsandbox_viewer_role.yaml
│   │   │   ├── kustomization.yaml
│   │   │   ├── leader_election_role.yaml
│   │   │   ├── leader_election_role_binding.yaml
│   │   │   ├── metrics_auth_role.yaml
│   │   │   ├── metrics_auth_role_binding.yaml
│   │   │   ├── metrics_reader_role.yaml
│   │   │   ├── pool_admin_role.yaml
│   │   │   ├── pool_editor_role.yaml
│   │   │   ├── pool_viewer_role.yaml
│   │   │   ├── role.yaml
│   │   │   ├── role_binding.yaml
│   │   │   └── service_account.yaml
│   │   ├── samples/
│   │   │   ├── kustomization.yaml
│   │   │   ├── sandbox_v1alpha1_batchsandbox-with-task.yaml
│   │   │   ├── sandbox_v1alpha1_batchsandbox.yaml
│   │   │   ├── sandbox_v1alpha1_pool.yaml
│   │   │   └── sandbox_v1alpha1_pooled_batchsandbox.yaml
│   │   └── scorecard/
│   │       ├── bases/
│   │       │   └── config.yaml
│   │       ├── kustomization.yaml
│   │       └── patches/
│   │           ├── basic.config.yaml
│   │           └── olm.config.yaml
│   ├── docs/
│   │   ├── BUILD-IMAGES.md
│   │   ├── HELM-DEPLOYMENT.md
│   │   └── logging.md
│   ├── examples/
│   │   ├── controller/
│   │   │   ├── README-ZH.md
│   │   │   ├── README.md
│   │   │   └── main.go
│   │   └── task-executor/
│   │       ├── README.md
│   │       ├── README_zh-CN.md
│   │       └── main.go
│   ├── go.mod
│   ├── go.sum
│   ├── hack/
│   │   ├── boilerplate.go.txt
│   │   ├── debug-task.sh
│   │   ├── pool-perf.py
│   │   └── update-codegen.sh
│   ├── internal/
│   │   ├── controller/
│   │   │   ├── allocator.go
│   │   │   ├── allocator_mock.go
│   │   │   ├── allocator_test.go
│   │   │   ├── apis.go
│   │   │   ├── batchsandbox_controller.go
│   │   │   ├── batchsandbox_controller_test.go
│   │   │   ├── pool_controller.go
│   │   │   ├── pool_controller_test.go
│   │   │   ├── strategy/
│   │   │   │   ├── pool_strategy.go
│   │   │   │   ├── pool_strategy_default.go
│   │   │   │   ├── pool_strategy_factory.go
│   │   │   │   ├── pool_strategy_test.go
│   │   │   │   ├── task_scheduling_strategy.go
│   │   │   │   ├── task_scheduling_strategy_default.go
│   │   │   │   ├── task_scheduling_strategy_default_test.go
│   │   │   │   └── task_scheduling_strategy_factory.go
│   │   │   └── suite_test.go
│   │   ├── scheduler/
│   │   │   ├── default_scheduler.go
│   │   │   ├── default_scheduler_mock.go
│   │   │   ├── default_scheduler_test.go
│   │   │   ├── interface.go
│   │   │   ├── mock/
│   │   │   │   ├── interface.go
│   │   │   │   └── types.go
│   │   │   ├── recovery.go
│   │   │   ├── recovery_test.go
│   │   │   ├── status_collector.go
│   │   │   ├── status_collector_mock.go
│   │   │   └── types.go
│   │   ├── task-executor/
│   │   │   ├── config/
│   │   │   │   └── config.go
│   │   │   ├── manager/
│   │   │   │   ├── interface.go
│   │   │   │   ├── task_manager.go
│   │   │   │   └── task_manager_test.go
│   │   │   ├── runtime/
│   │   │   │   ├── composite.go
│   │   │   │   ├── container.go
│   │   │   │   ├── interface.go
│   │   │   │   ├── process.go
│   │   │   │   └── process_test.go
│   │   │   ├── server/
│   │   │   │   ├── handler.go
│   │   │   │   ├── handler_test.go
│   │   │   │   └── router.go
│   │   │   ├── storage/
│   │   │   │   ├── file_store.go
│   │   │   │   ├── file_store_test.go
│   │   │   │   └── interface.go
│   │   │   ├── types/
│   │   │   │   └── task.go
│   │   │   └── utils/
│   │   │       ├── pathutil.go
│   │   │       └── pathutil_test.go
│   │   └── utils/
│   │       ├── controller/
│   │       │   └── util.go
│   │       ├── expectations/
│   │       │   ├── init.go
│   │       │   ├── resource_version_expectation.go
│   │       │   ├── resource_version_expectation_test.go
│   │       │   ├── scale_expectations.go
│   │       │   └── scale_expectations_test.go
│   │       ├── fieldindex/
│   │       │   └── register.go
│   │       ├── finalizer.go
│   │       ├── helper.go
│   │       ├── json.go
│   │       ├── logging/
│   │       │   └── logger.go
│   │       ├── pod.go
│   │       ├── pod_test.go
│   │       └── requeueduration/
│   │           └── duration.go
│   ├── pkg/
│   │   ├── client/
│   │   │   ├── clientset/
│   │   │   │   └── versioned/
│   │   │   │       ├── clientset.go
│   │   │   │       ├── fake/
│   │   │   │       │   ├── clientset_generated.go
│   │   │   │       │   ├── doc.go
│   │   │   │       │   └── register.go
│   │   │   │       ├── scheme/
│   │   │   │       │   ├── doc.go
│   │   │   │       │   └── register.go
│   │   │   │       └── typed/
│   │   │   │           └── sandbox/
│   │   │   │               └── v1alpha1/
│   │   │   │                   ├── batchsandbox.go
│   │   │   │                   ├── doc.go
│   │   │   │                   ├── fake/
│   │   │   │                   │   ├── doc.go
│   │   │   │                   │   ├── fake_batchsandbox.go
│   │   │   │                   │   ├── fake_pool.go
│   │   │   │                   │   └── fake_sandbox_client.go
│   │   │   │                   ├── generated_expansion.go
│   │   │   │                   ├── pool.go
│   │   │   │                   └── sandbox_client.go
│   │   │   ├── informers/
│   │   │   │   └── externalversions/
│   │   │   │       ├── factory.go
│   │   │   │       ├── generic.go
│   │   │   │       ├── internalinterfaces/
│   │   │   │       │   └── factory_interfaces.go
│   │   │   │       └── sandbox/
│   │   │   │           ├── interface.go
│   │   │   │           └── v1alpha1/
│   │   │   │               ├── batchsandbox.go
│   │   │   │               ├── interface.go
│   │   │   │               └── pool.go
│   │   │   └── listers/
│   │   │       └── sandbox/
│   │   │           └── v1alpha1/
│   │   │               ├── batchsandbox.go
│   │   │               ├── expansion_generated.go
│   │   │               └── pool.go
│   │   ├── task-executor/
│   │   │   ├── client.go
│   │   │   └── types.go
│   │   └── utils/
│   │       ├── endpoints.go
│   │       └── endpoints_test.go
│   └── test/
│       ├── e2e/
│       │   ├── e2e_suite_test.go
│       │   ├── e2e_test.go
│       │   └── testdata/
│       │       ├── batchsandbox-non-pooled-expire.yaml
│       │       ├── batchsandbox-non-pooled.yaml
│       │       ├── batchsandbox-pooled-no-expire.yaml
│       │       ├── batchsandbox-pooled.yaml
│       │       ├── batchsandbox-with-process-task.yaml
│       │       ├── pool-basic.yaml
│       │       ├── pool-with-env.yaml
│       │       ├── pool-with-task-executor.yaml
│       │       └── runtimeclass/
│       │           └── gvisor.yaml
│       ├── e2e_runtime/
│       │   └── gvisor/
│       │       ├── gvisor_test.go
│       │       ├── suite_test.go
│       │       └── testdata/
│       │           ├── gvisor.yaml.tmpl
│       │           └── runtimeclass.yaml
│       ├── e2e_task/
│       │   ├── suite_test.go
│       │   └── task_e2e_test.go
│       └── utils/
│           ├── image.go
│           └── utils.go
├── oseps/
│   ├── 0001-fqdn-based-egress-control.md
│   ├── 0002-kubernetes-sigs-agent-sandbox-support.md
│   ├── 0003-volume-and-volumebinding-support.md
│   ├── 0004-secure-container-runtime.md
│   ├── 0005-client-side-sandbox-pool.md
│   ├── 0006-developer-console.md
│   ├── 0007-fast-sandbox-runtime-support.md
│   ├── 0008-pause-resume-rootfs-snapshot.md
│   ├── 0009-auto-renew-sandbox-on-ingress-access.md
│   ├── 0010-opentelemetry-instrumentation.md
│   ├── CONTRIBUTING.md
│   ├── README.md
│   ├── init-osep.sh
│   └── osep-template.md.template
├── sandboxes/
│   └── code-interpreter/
│       ├── Dockerfile
│       ├── Dockerfile_base
│       ├── README.md
│       ├── README_zh.md
│       ├── build.sh
│       └── scripts/
│           ├── code-interpreter-env.sh
│           ├── code-interpreter.sh
│           └── jupyter_notebook_config.py
├── scripts/
│   ├── add-license.sh
│   ├── bump-component-version.sh
│   ├── csharp-e2e.sh
│   ├── java-e2e.sh
│   ├── javascript-e2e.sh
│   ├── python-e2e.sh
│   ├── spec-doc/
│   │   ├── generate-spec.js
│   │   └── index.html
│   └── verify-license.sh
├── sdks/
│   ├── Directory.Build.props
│   ├── code-interpreter/
│   │   ├── csharp/
│   │   │   ├── OpenSandbox.CodeInterpreter.sln
│   │   │   ├── README.md
│   │   │   ├── README_zh.md
│   │   │   ├── src/
│   │   │   │   └── OpenSandbox.CodeInterpreter/
│   │   │   │       ├── Adapters/
│   │   │   │       │   └── CodesAdapter.cs
│   │   │   │       ├── CodeInterpreter.cs
│   │   │   │       ├── Factory/
│   │   │   │       │   ├── DefaultCodeInterpreterAdapterFactory.cs
│   │   │   │       │   └── ICodeInterpreterAdapterFactory.cs
│   │   │   │       ├── Models/
│   │   │   │       │   └── CodeModels.cs
│   │   │   │       ├── OpenSandbox.CodeInterpreter.csproj
│   │   │   │       └── Services/
│   │   │   │           └── ICodes.cs
│   │   │   └── tests/
│   │   │       └── OpenSandbox.CodeInterpreter.Tests/
│   │   │           ├── CodeInterpreterTests.cs
│   │   │           ├── CodesAdapterTests.cs
│   │   │           ├── FactoryTests.cs
│   │   │           ├── ModelsTests.cs
│   │   │           └── OpenSandbox.CodeInterpreter.Tests.csproj
│   │   ├── javascript/
│   │   │   ├── .nvmrc
│   │   │   ├── README.md
│   │   │   ├── README_zh.md
│   │   │   ├── eslint.config.mjs
│   │   │   ├── package.json
│   │   │   ├── src/
│   │   │   │   ├── adapters/
│   │   │   │   │   ├── codesAdapter.ts
│   │   │   │   │   ├── openapiError.ts
│   │   │   │   │   └── sse.ts
│   │   │   │   ├── factory/
│   │   │   │   │   ├── adapterFactory.ts
│   │   │   │   │   └── defaultAdapterFactory.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interpreter.ts
│   │   │   │   ├── models.ts
│   │   │   │   └── services/
│   │   │   │       └── codes.ts
│   │   │   ├── tests/
│   │   │   │   ├── defaultAdapterFactory.headers.test.mjs
│   │   │   │   └── interpreter.headers.test.mjs
│   │   │   ├── tsconfig.json
│   │   │   └── tsup.config.ts
│   │   ├── kotlin/
│   │   │   ├── LICENSE
│   │   │   ├── README.md
│   │   │   ├── README_zh.md
│   │   │   ├── build.gradle.kts
│   │   │   ├── code-interpreter/
│   │   │   │   ├── build.gradle.kts
│   │   │   │   └── src/
│   │   │   │       ├── main/
│   │   │   │       │   └── kotlin/
│   │   │   │       │       └── com/
│   │   │   │       │           └── alibaba/
│   │   │   │       │               └── opensandbox/
│   │   │   │       │                   └── codeinterpreter/
│   │   │   │       │                       ├── CodeInterpreter.kt
│   │   │   │       │                       ├── domain/
│   │   │   │       │                       │   ├── models/
│   │   │   │       │                       │   │   └── execd/
│   │   │   │       │                       │   │       └── executions/
│   │   │   │       │                       │   │           └── CodeModels.kt
│   │   │   │       │                       │   └── services/
│   │   │   │       │                       │       └── Codes.kt
│   │   │   │       │                       └── infrastructure/
│   │   │   │       │                           ├── adapters/
│   │   │   │       │                           │   ├── converter/
│   │   │   │       │                           │   │   └── CodeExecutionConverter.kt
│   │   │   │       │                           │   └── service/
│   │   │   │       │                           │       └── CodesAdapter.kt
│   │   │   │       │                           └── factory/
│   │   │   │       │                               └── AdapterFactory.kt
│   │   │   │       └── test/
│   │   │   │           └── kotlin/
│   │   │   │               └── com/
│   │   │   │                   └── alibaba/
│   │   │   │                       └── opensandbox/
│   │   │   │                           └── codeinterpreter/
│   │   │   │                               ├── CodeInterpreterTest.kt
│   │   │   │                               └── infrastructure/
│   │   │   │                                   └── adapters/
│   │   │   │                                       └── service/
│   │   │   │                                           └── CodesAdapterTest.kt
│   │   │   ├── code-interpreter-bom/
│   │   │   │   └── build.gradle.kts
│   │   │   ├── gradle/
│   │   │   │   ├── libs.versions.toml
│   │   │   │   └── wrapper/
│   │   │   │       ├── gradle-wrapper.jar
│   │   │   │       └── gradle-wrapper.properties
│   │   │   ├── gradle.properties
│   │   │   ├── gradlew
│   │   │   └── settings.gradle.kts
│   │   └── python/
│   │       ├── LICENSE
│   │       ├── Makefile
│   │       ├── README.md
│   │       ├── README_zh.md
│   │       ├── pyproject.toml
│   │       ├── src/
│   │       │   └── code_interpreter/
│   │       │       ├── __init__.py
│   │       │       ├── adapters/
│   │       │       │   ├── __init__.py
│   │       │       │   ├── code_adapter.py
│   │       │       │   ├── converter/
│   │       │       │   │   ├── __init__.py
│   │       │       │   │   └── code_execution_converter.py
│   │       │       │   └── factory.py
│   │       │       ├── code_interpreter.py
│   │       │       ├── models/
│   │       │       │   ├── __init__.py
│   │       │       │   ├── code.py
│   │       │       │   └── code_sync.py
│   │       │       ├── py.typed
│   │       │       ├── services/
│   │       │       │   ├── __init__.py
│   │       │       │   └── code.py
│   │       │       └── sync/
│   │       │           ├── __init__.py
│   │       │           ├── adapters/
│   │       │           │   ├── __init__.py
│   │       │           │   ├── code_adapter.py
│   │       │           │   └── factory.py
│   │       │           ├── code_interpreter.py
│   │       │           └── services/
│   │       │               ├── __init__.py
│   │       │               └── code.py
│   │       └── tests/
│   │           ├── test_adapter_eager_init.py
│   │           ├── test_code_interpreter_create_and_delegation.py
│   │           ├── test_code_service_adapter_openapi_calls.py
│   │           └── test_code_service_adapter_streaming.py
│   ├── eslint.base.mjs
│   ├── mcp/
│   │   └── sandbox/
│   │       └── python/
│   │           ├── LICENSE
│   │           ├── README.md
│   │           ├── README_zh.md
│   │           ├── pyproject.toml
│   │           └── src/
│   │               └── opensandbox_mcp/
│   │                   ├── __init__.py
│   │                   ├── __main__.py
│   │                   ├── py.typed
│   │                   └── server.py
│   ├── package.json
│   ├── pnpm-workspace.yaml
│   ├── sandbox/
│   │   ├── csharp/
│   │   │   ├── .editorconfig
│   │   │   ├── Directory.Build.props
│   │   │   ├── OpenSandbox.sln
│   │   │   ├── OpenSandbox.sln.DotSettings.user
│   │   │   ├── README.md
│   │   │   ├── README_zh.md
│   │   │   ├── src/
│   │   │   │   └── OpenSandbox/
│   │   │   │       ├── Adapters/
│   │   │   │       │   ├── CommandsAdapter.cs
│   │   │   │       │   ├── EgressAdapter.cs
│   │   │   │       │   ├── FilesystemAdapter.cs
│   │   │   │       │   ├── HealthAdapter.cs
│   │   │   │       │   ├── MetricsAdapter.cs
│   │   │   │       │   ├── SandboxesAdapter.cs
│   │   │   │       │   └── SseParser.cs
│   │   │   │       ├── Config/
│   │   │   │       │   ├── ConnectionConfig.cs
│   │   │   │       │   └── DiagnosticsOptions.cs
│   │   │   │       ├── Core/
│   │   │   │       │   ├── Constants.cs
│   │   │   │       │   └── Exceptions.cs
│   │   │   │       ├── Factory/
│   │   │   │       │   ├── DefaultAdapterFactory.cs
│   │   │   │       │   └── IAdapterFactory.cs
│   │   │   │       ├── HttpClientProvider.cs
│   │   │   │       ├── Internal/
│   │   │   │       │   ├── ExecutionEventDispatcher.cs
│   │   │   │       │   └── HttpClientWrapper.cs
│   │   │   │       ├── Models/
│   │   │   │       │   ├── Execd.cs
│   │   │   │       │   ├── Execution.cs
│   │   │   │       │   ├── Filesystem.cs
│   │   │   │       │   └── Sandboxes.cs
│   │   │   │       ├── OpenSandbox.csproj
│   │   │   │       ├── Options.cs
│   │   │   │       ├── Sandbox.cs
│   │   │   │       ├── SandboxManager.cs
│   │   │   │       └── Services/
│   │   │   │           ├── IEgress.cs
│   │   │   │           ├── IExecdCommands.cs
│   │   │   │           ├── IExecdHealth.cs
│   │   │   │           ├── IExecdMetrics.cs
│   │   │   │           ├── ISandboxFiles.cs
│   │   │   │           └── ISandboxes.cs
│   │   │   └── tests/
│   │   │       └── OpenSandbox.Tests/
│   │   │           ├── CommandsAdapterTests.cs
│   │   │           ├── ConnectionConfigTests.cs
│   │   │           ├── ConstantsTests.cs
│   │   │           ├── ExceptionTests.cs
│   │   │           ├── ModelsTests.cs
│   │   │           ├── OpenSandbox.Tests.csproj
│   │   │           ├── OptionsTests.cs
│   │   │           ├── SandboxEgressLifecycleTests.cs
│   │   │           ├── SandboxReadinessDiagnosticsTests.cs
│   │   │           ├── SandboxesAdapterTests.cs
│   │   │           └── SseParserTests.cs
│   │   ├── javascript/
│   │   │   ├── .nvmrc
│   │   │   ├── README.md
│   │   │   ├── README_zh.md
│   │   │   ├── eslint.config.mjs
│   │   │   ├── package.json
│   │   │   ├── scripts/
│   │   │   │   └── generate-api.mjs
│   │   │   ├── src/
│   │   │   │   ├── adapters/
│   │   │   │   │   ├── commandsAdapter.ts
│   │   │   │   │   ├── egressAdapter.ts
│   │   │   │   │   ├── filesystemAdapter.ts
│   │   │   │   │   ├── healthAdapter.ts
│   │   │   │   │   ├── metricsAdapter.ts
│   │   │   │   │   ├── openapiError.ts
│   │   │   │   │   ├── sandboxesAdapter.ts
│   │   │   │   │   └── sse.ts
│   │   │   │   ├── api/
│   │   │   │   │   ├── egress.ts
│   │   │   │   │   ├── execd.ts
│   │   │   │   │   └── lifecycle.ts
│   │   │   │   ├── config/
│   │   │   │   │   └── connection.ts
│   │   │   │   ├── core/
│   │   │   │   │   ├── constants.ts
│   │   │   │   │   └── exceptions.ts
│   │   │   │   ├── factory/
│   │   │   │   │   ├── adapterFactory.ts
│   │   │   │   │   └── defaultAdapterFactory.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── internal.ts
│   │   │   │   ├── manager.ts
│   │   │   │   ├── models/
│   │   │   │   │   ├── execd.ts
│   │   │   │   │   ├── execution.ts
│   │   │   │   │   ├── executionEventDispatcher.ts
│   │   │   │   │   ├── filesystem.ts
│   │   │   │   │   └── sandboxes.ts
│   │   │   │   ├── openapi/
│   │   │   │   │   ├── egressClient.ts
│   │   │   │   │   ├── execdClient.ts
│   │   │   │   │   └── lifecycleClient.ts
│   │   │   │   ├── sandbox.ts
│   │   │   │   └── services/
│   │   │   │       ├── egress.ts
│   │   │   │       ├── execdCommands.ts
│   │   │   │       ├── execdHealth.ts
│   │   │   │       ├── execdMetrics.ts
│   │   │   │       ├── filesystem.ts
│   │   │   │       └── sandboxes.ts
│   │   │   ├── tests/
│   │   │   │   └── sandbox.create.test.mjs
│   │   │   ├── tsconfig.json
│   │   │   └── tsup.config.ts
│   │   ├── kotlin/
│   │   │   ├── LICENSE
│   │   │   ├── README.md
│   │   │   ├── README_zh.md
│   │   │   ├── build.gradle.kts
│   │   │   ├── gradle/
│   │   │   │   ├── libs.versions.toml
│   │   │   │   └── wrapper/
│   │   │   │       ├── gradle-wrapper.jar
│   │   │   │       └── gradle-wrapper.properties
│   │   │   ├── gradle.properties
│   │   │   ├── gradlew
│   │   │   ├── sandbox/
│   │   │   │   ├── Module.md
│   │   │   │   ├── build.gradle.kts
│   │   │   │   └── src/
│   │   │   │       ├── main/
│   │   │   │       │   └── kotlin/
│   │   │   │       │       └── com/
│   │   │   │       │           └── alibaba/
│   │   │   │       │               └── opensandbox/
│   │   │   │       │                   └── sandbox/
│   │   │   │       │                       ├── HttpClientProvider.kt
│   │   │   │       │                       ├── Sandbox.kt
│   │   │   │       │                       ├── SandboxManager.kt
│   │   │   │       │                       ├── config/
│   │   │   │       │                       │   └── ConnectionConfig.kt
│   │   │   │       │                       ├── domain/
│   │   │   │       │                       │   ├── exceptions/
│   │   │   │       │                       │   │   └── SandboxException.kt
│   │   │   │       │                       │   ├── models/
│   │   │   │       │                       │   │   ├── execd/
│   │   │   │       │                       │   │   │   ├── Constants.kt
│   │   │   │       │                       │   │   │   ├── executions/
│   │   │   │       │                       │   │   │   │   ├── CommandModels.kt
│   │   │   │       │                       │   │   │   │   ├── ExecutionModels.kt
│   │   │   │       │                       │   │   │   │   └── RunCommandRequest.kt
│   │   │   │       │                       │   │   │   └── filesystem/
│   │   │   │       │                       │   │   │       └── FilesystemModels.kt
│   │   │   │       │                       │   │   └── sandboxes/
│   │   │   │       │                       │   │       └── SandboxModels.kt
│   │   │   │       │                       │   └── services/
│   │   │   │       │                       │       ├── Commands.kt
│   │   │   │       │                       │       ├── Egress.kt
│   │   │   │       │                       │       ├── Filesystem.kt
│   │   │   │       │                       │       ├── Health.kt
│   │   │   │       │                       │       ├── Metrics.kt
│   │   │   │       │                       │       └── Sandboxes.kt
│   │   │   │       │                       └── infrastructure/
│   │   │   │       │                           ├── adapters/
│   │   │   │       │                           │   ├── converter/
│   │   │   │       │                           │   │   ├── ExceptionConverter.kt
│   │   │   │       │                           │   │   ├── ExecutionConverter.kt
│   │   │   │       │                           │   │   ├── ExecutionEventDispatcher.kt
│   │   │   │       │                           │   │   ├── FilesystemConverter.kt
│   │   │   │       │                           │   │   ├── SandboxModelConverter.kt
│   │   │   │       │                           │   │   └── Serializer.kt
│   │   │   │       │                           │   └── service/
│   │   │   │       │                           │       ├── CommandsAdapter.kt
│   │   │   │       │                           │       ├── EgressAdapter.kt
│   │   │   │       │                           │       ├── FilesystemAdapter.kt
│   │   │   │       │                           │       ├── HealthAdapter.kt
│   │   │   │       │                           │       ├── MetricsAdapter.kt
│   │   │   │       │                           │       └── SandboxesAdapter.kt
│   │   │   │       │                           └── factory/
│   │   │   │       │                               └── AdapterFactory.kt
│   │   │   │       └── test/
│   │   │   │           └── kotlin/
│   │   │   │               └── com/
│   │   │   │                   └── alibaba/
│   │   │   │                       └── opensandbox/
│   │   │   │                           └── sandbox/
│   │   │   │                               ├── SandboxManagerTest.kt
│   │   │   │                               ├── SandboxTest.kt
│   │   │   │                               ├── domain/
│   │   │   │                               │   ├── exceptions/
│   │   │   │                               │   │   └── SandboxExceptionCompatibilityTest.kt
│   │   │   │                               │   └── models/
│   │   │   │                               │       └── VolumeModelsTest.kt
│   │   │   │                               └── infrastructure/
│   │   │   │                                   └── adapters/
│   │   │   │                                       └── service/
│   │   │   │                                           ├── CommandsAdapterTest.kt
│   │   │   │                                           └── SandboxesAdapterTest.kt
│   │   │   ├── sandbox-api/
│   │   │   │   ├── build.gradle.kts
│   │   │   │   └── src/
│   │   │   │       └── main/
│   │   │   │           └── kotlin/
│   │   │   │               └── com/
│   │   │   │                   └── alibaba/
│   │   │   │                       └── opensandbox/
│   │   │   │                           └── sandbox/
│   │   │   │                               └── api/
│   │   │   │                                   ├── models/
│   │   │   │                                   │   └── execd/
│   │   │   │                                   │       └── ExecutionModels.kt
│   │   │   │                                   └── openapitools.json
│   │   │   ├── sandbox-bom/
│   │   │   │   └── build.gradle.kts
│   │   │   └── settings.gradle.kts
│   │   └── python/
│   │       ├── LICENSE
│   │       ├── Makefile
│   │       ├── README.md
│   │       ├── README_zh.md
│   │       ├── pyproject.toml
│   │       ├── scripts/
│   │       │   ├── generate_api.py
│   │       │   ├── openapi_egress_config.yaml
│   │       │   ├── openapi_execd_config.yaml
│   │       │   └── openapi_lifecycle_config.yaml
│   │       ├── src/
│   │       │   └── opensandbox/
│   │       │       ├── __init__.py
│   │       │       ├── adapters/
│   │       │       │   ├── __init__.py
│   │       │       │   ├── command_adapter.py
│   │       │       │   ├── converter/
│   │       │       │   │   ├── __init__.py
│   │       │       │   │   ├── command_model_converter.py
│   │       │       │   │   ├── event_node.py
│   │       │       │   │   ├── exception_converter.py
│   │       │       │   │   ├── execution_converter.py
│   │       │       │   │   ├── execution_event_dispatcher.py
│   │       │       │   │   ├── filesystem_model_converter.py
│   │       │       │   │   ├── metrics_model_converter.py
│   │       │       │   │   ├── response_handler.py
│   │       │       │   │   └── sandbox_model_converter.py
│   │       │       │   ├── egress_adapter.py
│   │       │       │   ├── factory.py
│   │       │       │   ├── filesystem_adapter.py
│   │       │       │   ├── health_adapter.py
│   │       │       │   ├── metrics_adapter.py
│   │       │       │   └── sandboxes_adapter.py
│   │       │       ├── api/
│   │       │       │   ├── __init__.py
│   │       │       │   ├── egress/
│   │       │       │   │   ├── __init__.py
│   │       │       │   │   ├── api/
│   │       │       │   │   │   ├── __init__.py
│   │       │       │   │   │   └── policy/
│   │       │       │   │   │       ├── __init__.py
│   │       │       │   │   │       ├── get_policy.py
│   │       │       │   │   │       └── patch_policy.py
│   │       │       │   │   ├── client.py
│   │       │       │   │   ├── errors.py
│   │       │       │   │   ├── models/
│   │       │       │   │   │   ├── __init__.py
│   │       │       │   │   │   ├── network_policy.py
│   │       │       │   │   │   ├── network_policy_default_action.py
│   │       │       │   │   │   ├── network_rule.py
│   │       │       │   │   │   ├── network_rule_action.py
│   │       │       │   │   │   └── policy_status_response.py
│   │       │       │   │   ├── py.typed
│   │       │       │   │   └── types.py
│   │       │       │   ├── execd/
│   │       │       │   │   ├── __init__.py
│   │       │       │   │   ├── api/
│   │       │       │   │   │   ├── __init__.py
│   │       │       │   │   │   ├── code_interpreting/
│   │       │       │   │   │   │   ├── __init__.py
│   │       │       │   │   │   │   ├── create_code_context.py
│   │       │       │   │   │   │   ├── delete_context.py
│   │       │       │   │   │   │   ├── delete_contexts_by_language.py
│   │       │       │   │   │   │   ├── get_context.py
│   │       │       │   │   │   │   ├── interrupt_code.py
│   │       │       │   │   │   │   ├── list_contexts.py
│   │       │       │   │   │   │   └── run_code.py
│   │       │       │   │   │   ├── command/
│   │       │       │   │   │   │   ├── __init__.py
│   │       │       │   │   │   │   ├── get_background_command_logs.py
│   │       │       │   │   │   │   ├── get_command_status.py
│   │       │       │   │   │   │   ├── interrupt_command.py
│   │       │       │   │   │   │   └── run_command.py
│   │       │       │   │   │   ├── filesystem/
│   │       │       │   │   │   │   ├── __init__.py
│   │       │       │   │   │   │   ├── chmod_files.py
│   │       │       │   │   │   │   ├── download_file.py
│   │       │       │   │   │   │   ├── get_files_info.py
│   │       │       │   │   │   │   ├── make_dirs.py
│   │       │       │   │   │   │   ├── remove_dirs.py
│   │       │       │   │   │   │   ├── remove_files.py
│   │       │       │   │   │   │   ├── rename_files.py
│   │       │       │   │   │   │   ├── replace_content.py
│   │       │       │   │   │   │   ├── search_files.py
│   │       │       │   │   │   │   └── upload_file.py
│   │       │       │   │   │   ├── health/
│   │       │       │   │   │   │   ├── __init__.py
│   │       │       │   │   │   │   └── ping.py
│   │       │       │   │   │   └── metric/
│   │       │       │   │   │       ├── __init__.py
│   │       │       │   │   │       ├── get_metrics.py
│   │       │       │   │   │       └── watch_metrics.py
│   │       │       │   │   ├── client.py
│   │       │       │   │   ├── errors.py
│   │       │       │   │   ├── models/
│   │       │       │   │   │   ├── __init__.py
│   │       │       │   │   │   ├── chmod_files_body.py
│   │       │       │   │   │   ├── code_context.py
│   │       │       │   │   │   ├── code_context_request.py
│   │       │       │   │   │   ├── command_status_response.py
│   │       │       │   │   │   ├── error_response.py
│   │       │       │   │   │   ├── file_info.py
│   │       │       │   │   │   ├── file_metadata.py
│   │       │       │   │   │   ├── get_files_info_response_200.py
│   │       │       │   │   │   ├── make_dirs_body.py
│   │       │       │   │   │   ├── metrics.py
│   │       │       │   │   │   ├── permission.py
│   │       │       │   │   │   ├── rename_file_item.py
│   │       │       │   │   │   ├── replace_content_body.py
│   │       │       │   │   │   ├── replace_file_content_item.py
│   │       │       │   │   │   ├── run_code_request.py
│   │       │       │   │   │   ├── run_command_request.py
│   │       │       │   │   │   ├── run_command_request_envs.py
│   │       │       │   │   │   ├── server_stream_event.py
│   │       │       │   │   │   ├── server_stream_event_error.py
│   │       │       │   │   │   ├── server_stream_event_results.py
│   │       │       │   │   │   ├── server_stream_event_type.py
│   │       │       │   │   │   └── upload_file_body.py
│   │       │       │   │   ├── py.typed
│   │       │       │   │   └── types.py
│   │       │       │   └── lifecycle/
│   │       │       │       ├── __init__.py
│   │       │       │       ├── api/
│   │       │       │       │   ├── __init__.py
│   │       │       │       │   └── sandboxes/
│   │       │       │       │       ├── __init__.py
│   │       │       │       │       ├── delete_sandboxes_sandbox_id.py
│   │       │       │       │       ├── get_sandboxes.py
│   │       │       │       │       ├── get_sandboxes_sandbox_id.py
│   │       │       │       │       ├── get_sandboxes_sandbox_id_endpoints_port.py
│   │       │       │       │       ├── post_sandboxes.py
│   │       │       │       │       ├── post_sandboxes_sandbox_id_pause.py
│   │       │       │       │       ├── post_sandboxes_sandbox_id_renew_expiration.py
│   │       │       │       │       └── post_sandboxes_sandbox_id_resume.py
│   │       │       │       ├── client.py
│   │       │       │       ├── errors.py
│   │       │       │       ├── models/
│   │       │       │       │   ├── __init__.py
│   │       │       │       │   ├── create_sandbox_request.py
│   │       │       │       │   ├── create_sandbox_request_env.py
│   │       │       │       │   ├── create_sandbox_request_extensions.py
│   │       │       │       │   ├── create_sandbox_request_metadata.py
│   │       │       │       │   ├── create_sandbox_response.py
│   │       │       │       │   ├── create_sandbox_response_metadata.py
│   │       │       │       │   ├── endpoint.py
│   │       │       │       │   ├── endpoint_headers.py
│   │       │       │       │   ├── error_response.py
│   │       │       │       │   ├── host.py
│   │       │       │       │   ├── image_spec.py
│   │       │       │       │   ├── image_spec_auth.py
│   │       │       │       │   ├── list_sandboxes_response.py
│   │       │       │       │   ├── network_policy.py
│   │       │       │       │   ├── network_policy_default_action.py
│   │       │       │       │   ├── network_rule.py
│   │       │       │       │   ├── network_rule_action.py
│   │       │       │       │   ├── ossfs.py
│   │       │       │       │   ├── ossfs_version.py
│   │       │       │       │   ├── pagination_info.py
│   │       │       │       │   ├── pvc.py
│   │       │       │       │   ├── renew_sandbox_expiration_request.py
│   │       │       │       │   ├── renew_sandbox_expiration_response.py
│   │       │       │       │   ├── resource_limits.py
│   │       │       │       │   ├── sandbox.py
│   │       │       │       │   ├── sandbox_metadata.py
│   │       │       │       │   ├── sandbox_status.py
│   │       │       │       │   └── volume.py
│   │       │       │       ├── py.typed
│   │       │       │       └── types.py
│   │       │       ├── config/
│   │       │       │   ├── __init__.py
│   │       │       │   ├── connection.py
│   │       │       │   └── connection_sync.py
│   │       │       ├── constants.py
│   │       │       ├── exceptions/
│   │       │       │   ├── __init__.py
│   │       │       │   └── sandbox.py
│   │       │       ├── manager.py
│   │       │       ├── models/
│   │       │       │   ├── __init__.py
│   │       │       │   ├── execd.py
│   │       │       │   ├── execd_sync.py
│   │       │       │   ├── filesystem.py
│   │       │       │   └── sandboxes.py
│   │       │       ├── py.typed
│   │       │       ├── sandbox.py
│   │       │       ├── services/
│   │       │       │   ├── __init__.py
│   │       │       │   ├── command.py
│   │       │       │   ├── egress.py
│   │       │       │   ├── filesystem.py
│   │       │       │   ├── health.py
│   │       │       │   ├── metrics.py
│   │       │       │   └── sandbox.py
│   │       │       └── sync/
│   │       │           ├── __init__.py
│   │       │           ├── adapters/
│   │       │           │   ├── __init__.py
│   │       │           │   ├── command_adapter.py
│   │       │           │   ├── converter/
│   │       │           │   │   ├── __init__.py
│   │       │           │   │   └── execution_event_dispatcher.py
│   │       │           │   ├── egress_adapter.py
│   │       │           │   ├── factory.py
│   │       │           │   ├── filesystem_adapter.py
│   │       │           │   ├── health_adapter.py
│   │       │           │   ├── metrics_adapter.py
│   │       │           │   └── sandboxes_adapter.py
│   │       │           ├── manager.py
│   │       │           ├── sandbox.py
│   │       │           └── services/
│   │       │               ├── __init__.py
│   │       │               ├── command.py
│   │       │               ├── egress.py
│   │       │               ├── filesystem.py
│   │       │               ├── health.py
│   │       │               ├── metrics.py
│   │       │               └── sandbox.py
│   │       └── tests/
│   │           ├── test_adapters_eager_init.py
│   │           ├── test_command_service_adapter_streaming.py
│   │           ├── test_command_service_sse_client_config.py
│   │           ├── test_connection_config.py
│   │           ├── test_connection_config_env_and_timeout.py
│   │           ├── test_converters_and_error_handling.py
│   │           ├── test_filesystem_search_error_handling.py
│   │           ├── test_models_stability.py
│   │           ├── test_sandbox_business_logic.py
│   │           ├── test_sandbox_close_and_connect_validation.py
│   │           ├── test_sandbox_manager_business_logic.py
│   │           ├── test_sandbox_manager_sync_business_logic.py
│   │           ├── test_sandbox_service_adapter_lifecycle.py
│   │           └── test_sandbox_sync_business_logic.py
│   └── tsconfig.base.json
├── server/
│   ├── .python-version
│   ├── DEVELOPMENT.md
│   ├── Dockerfile
│   ├── LICENSE
│   ├── README.md
│   ├── README_zh.md
│   ├── TROUBLESHOOTING.md
│   ├── TROUBLESHOOTING_zh.md
│   ├── build.sh
│   ├── docker-compose.example.yaml
│   ├── example.batchsandbox-template.yaml
│   ├── example.config.k8s.toml
│   ├── example.config.k8s.zh.toml
│   ├── example.config.toml
│   ├── example.config.zh.toml
│   ├── pyproject.toml
│   ├── src/
│   │   ├── __init__.py
│   │   ├── api/
│   │   │   ├── __init__.py
│   │   │   ├── lifecycle.py
│   │   │   └── schema.py
│   │   ├── cli.py
│   │   ├── config.py
│   │   ├── main.py
│   │   ├── middleware/
│   │   │   ├── __init__.py
│   │   │   ├── auth.py
│   │   │   └── request_id.py
│   │   ├── py.typed
│   │   └── services/
│   │       ├── __init__.py
│   │       ├── constants.py
│   │       ├── docker.py
│   │       ├── endpoint_auth.py
│   │       ├── factory.py
│   │       ├── helpers.py
│   │       ├── k8s/
│   │       │   ├── __init__.py
│   │       │   ├── agent_sandbox_provider.py
│   │       │   ├── agent_sandbox_template.py
│   │       │   ├── batchsandbox_provider.py
│   │       │   ├── batchsandbox_template.py
│   │       │   ├── client.py
│   │       │   ├── egress_helper.py
│   │       │   ├── image_pull_secret_helper.py
│   │       │   ├── informer.py
│   │       │   ├── kubernetes_service.py
│   │       │   ├── provider_factory.py
│   │       │   ├── rate_limiter.py
│   │       │   ├── security_context.py
│   │       │   ├── template_manager.py
│   │       │   ├── volume_helper.py
│   │       │   └── workload_provider.py
│   │       ├── ossfs_mixin.py
│   │       ├── runtime_resolver.py
│   │       ├── sandbox_service.py
│   │       └── validators.py
│   └── tests/
│       ├── __init__.py
│       ├── conftest.py
│       ├── k8s/
│       │   ├── __init__.py
│       │   ├── conftest.py
│       │   ├── fixtures/
│       │   │   ├── __init__.py
│       │   │   └── k8s_fixtures.py
│       │   ├── test_agent_sandbox_provider.py
│       │   ├── test_agent_sandbox_template.py
│       │   ├── test_batchsandbox_provider.py
│       │   ├── test_batchsandbox_template.py
│       │   ├── test_egress_helper.py
│       │   ├── test_image_pull_secret_helper.py
│       │   ├── test_informer.py
│       │   ├── test_k8s_client.py
│       │   ├── test_kubernetes_service.py
│       │   ├── test_provider_factory.py
│       │   └── test_rate_limiter.py
│       ├── smoke.sh
│       ├── test_agent_sandbox_service.py
│       ├── test_auth_middleware.py
│       ├── test_config.py
│       ├── test_docker_endpoint.py
│       ├── test_docker_path_fix.py
│       ├── test_docker_service.py
│       ├── test_endpoint.py
│       ├── test_endpoint_auth.py
│       ├── test_helpers.py
│       ├── test_ingress.py
│       ├── test_routes.py
│       ├── test_routes_create_delete.py
│       ├── test_routes_endpoint_behavior.py
│       ├── test_routes_get_sandbox.py
│       ├── test_routes_list_sandboxes.py
│       ├── test_routes_pause_resume.py
│       ├── test_routes_proxy.py
│       ├── test_routes_renew_expiration.py
│       ├── test_schema.py
│       ├── test_validators.py
│       └── testdata/
│           ├── config.toml
│           └── k8s_config.toml
├── specs/
│   ├── README.md
│   ├── README_zh.md
│   ├── egress-api.yaml
│   ├── execd-api.yaml
│   └── sandbox-lifecycle.yml
└── tests/
    ├── csharp/
    │   └── OpenSandbox.E2ETests/
    │       ├── CodeInterpreterE2ETests.cs
    │       ├── E2ETestFixture.cs
    │       ├── OpenSandbox.E2ETests.csproj
    │       ├── SandboxE2ETests.cs
    │       └── SandboxManagerE2ETests.cs
    ├── java/
    │   ├── build.gradle.kts
    │   ├── gradle/
    │   │   ├── libs.versions.toml
    │   │   └── wrapper/
    │   │       ├── gradle-wrapper.jar
    │   │       └── gradle-wrapper.properties
    │   ├── gradle.properties
    │   ├── gradlew
    │   ├── settings.gradle.kts
    │   └── src/
    │       └── test/
    │           ├── java/
    │           │   └── com/
    │           │       └── alibaba/
    │           │           └── opensandbox/
    │           │               └── e2e/
    │           │                   ├── BaseE2ETest.java
    │           │                   ├── CodeInterpreterE2ETest.java
    │           │                   ├── SandboxE2ETest.java
    │           │                   └── SandboxManagerE2ETest.java
    │           └── resources/
    │               └── test.properties
    ├── javascript/
    │   ├── README.md
    │   ├── eslint.config.mjs
    │   ├── package.json
    │   ├── tests/
    │   │   ├── base_e2e.ts
    │   │   ├── test_code_interpreter_e2e.test.ts
    │   │   ├── test_sandbox_e2e.test.ts
    │   │   ├── test_sandbox_manager_e2e.test.ts
    │   │   └── test_wait_until_ready_diagnostics.test.ts
    │   ├── tsconfig.json
    │   └── vitest.config.ts
    └── python/
        ├── Makefile
        ├── README.md
        ├── pyproject.toml
        └── tests/
            ├── __init__.py
            ├── base_e2e_test.py
            ├── test_code_interpreter_e2e.py
            ├── test_code_interpreter_e2e_sync.py
            ├── test_sandbox_e2e.py
            ├── test_sandbox_e2e_sync.py
            ├── test_sandbox_manager_e2e.py
            └── test_sandbox_manager_e2e_sync.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/CODEOWNERS
================================================
# CODEOWNERS for OpenSandbox
# Rules are evaluated top-to-bottom; the last matching pattern wins.

# Default owners (fallback for files not matched by specific rules)
* @jwx0925 @hittyt @hellomypastor @Pangjiping @ninan-nn

# Control plane (server)
/server/ @Pangjiping @hittyt @jwx0925 @Generalwin @ninan-nn

# Runtime agent (execd) and sandbox images
/components/execd/ @Pangjiping @hittyt @ninan-nn
/components/ingress/ @Pangjiping @hittyt @Generalwin @Spground
/components/egress/ @Pangjiping @hittyt @jwx0925
/sandboxes/ @Pangjiping @ninan-nn @jwx0925 @hittyt @hellomypastor

# Kubernetes controller
/kubernetes/ @Spground @Generalwin @fengcone @kevinlynx @ninan-nn @hittyt @Pangjiping

# SDKs
/sdks/ @ninan-nn @jwx0925 @hittyt @hellomypastor

# Specs and docs
/specs/ @jwx0925 @hittyt @ninan-nn

# OpenSandbox Enhancement Proposals
/oseps/ @Spground @Generalwin @fengcone @kevinlynx @Pangjiping @ninan-nn @jwx0925 @hittyt


================================================
FILE: .github/ISSUE_TEMPLATE/FEATURE_REQUEST.md
================================================
---
name: Feature Request
about: Suggest an idea for OpenSandbox
title: ''
labels: ''
assignees: ''

---

## Why do you need it?
Is your feature request related to a problem? Please describe in details


## How could it be?
A clear and concise description of what you want to happen. You can explain more about input of the feature, and output of it.


## Other related information
Add any other context or screenshots about the feature request here.


================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
contact_links:
  - name: Any Questions or Suggestions?
    url: https://github.com/alibaba/OpenSandbox/issues
    about: Please ask and answer questions here.


================================================
FILE: .github/pull_request_template.md
================================================
# Summary
- What is changing and why?

# Testing
- [ ] Not run (explain why)
- [ ] Unit tests
- [ ] Integration tests
- [ ] e2e / manual verification

# Breaking Changes
- [ ] None
- [ ] Yes (describe impact and migration path)

# Checklist
- [ ] Linked Issue or clearly described motivation
- [ ] Added/updated docs (if needed)
- [ ] Added/updated tests (if needed)
- [ ] Security impact considered
- [ ] Backward compatibility considered


================================================
FILE: .github/workflows/deploy-docs-pages.yml
================================================
name: Deploy Docs Pages

on:
  push:
    branches:
      - main
    paths:
      - "docs/**"
      - "specs/**"
      - "scripts/spec-doc/**"
      - "README.md"
      - "CONTRIBUTING.md"
      - "CODE_OF_CONDUCT.md"
      - "server/**/README*.md"
      - "server/**/DEVELOPMENT.md"
      - "components/**/README*.md"
      - "components/**/DEVELOPMENT.md"
      - "sdks/**/README*.md"
      - "sandboxes/**/README*.md"
      - "kubernetes/**/README*.md"
      - "examples/**/README*.md"
      - "specs/**/README*.md"
      - "oseps/**/*.md"
  workflow_dispatch:

permissions:
  contents: read
  pages: write
  id-token: write

concurrency:
  group: pages
  cancel-in-progress: false

jobs:
  build:
    runs-on: ubuntu-latest
    environment:
      name: github-pages
    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Setup Pages
        id: pages
        uses: actions/configure-pages@v5

      - name: Setup Node 22
        uses: actions/setup-node@v6
        with:
          node-version: "22"

      - name: Setup pnpm
        uses: pnpm/action-setup@v4
        with:
          version: 9.15.0

      - name: Enable corepack
        run: corepack enable

      - name: Install docs dependencies
        working-directory: docs
        run: pnpm install --frozen-lockfile

      - name: Build docs
        working-directory: docs
        env:
          # Use root base when custom domain is configured via CNAME.
          DOCS_BASE: ${{ hashFiles('docs/public/CNAME') != '' && '/' || steps.pages.outputs.base_path }}
        run: pnpm docs:build

      - name: Upload artifact
        uses: actions/upload-pages-artifact@v4
        with:
          path: docs/.vitepress/dist

  deploy:
    runs-on: ubuntu-latest
    needs: build
    environment:
      name: github-pages
      url: ${{ steps.deployment.outputs.page_url }}
    steps:
      - name: Deploy to GitHub Pages
        id: deployment
        uses: actions/deploy-pages@v4


================================================
FILE: .github/workflows/egress-test.yaml.yml
================================================
name: Egress Tests

on:
  pull_request:
    branches: [ main ]
    paths:
      - 'components/egress/**'
      - 'components/internal/**'

permissions:
  contents: read

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Go
        uses: actions/setup-go@v6
        with:
          go-version: '1.24.0'

      - name: Run Build
        working-directory: components/egress
        run: |
          go vet ./...
          go build .

      - name: Run tests
        working-directory: components/egress
        run: |
          go test ./...

  smoke:
    runs-on: self-hosted
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Run dns test
        working-directory: components/egress
        run: |
          chmod +x tests/smoke-dns.sh
          ./tests/smoke-dns.sh

      - name: Run nft test
        working-directory: components/egress
        run: |
          chmod +x tests/smoke-nft.sh
          ./tests/smoke-nft.sh

      - name: Run dynamic ip test
        working-directory: components/egress
        run: |
          chmod +x tests/smoke-dynamic-ip.sh
          ./tests/smoke-dynamic-ip.sh

  bench:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Run bench test
        working-directory: components/egress
        run: |
          chmod +x tests/bench-dns-nft.sh
          ./tests/bench-dns-nft.sh
        env:
          BENCH_SAMPLE_SIZE: "20"

      - name: Upload egress logs
        if: always()
        uses: actions/upload-artifact@v7
        with:
          name: egress-log-for-bench
          path: /tmp/egress-logs/
          retention-days: 5


================================================
FILE: .github/workflows/execd-test.yml
================================================
name: Execd Tests

on:
  pull_request:
    branches: [ main ]
    paths:
      - 'components/execd/**'
      - 'components/internal/**'

permissions:
  contents: read

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Go
        uses: actions/setup-go@v6
        with:
          go-version: '1.24.0'

      - name: Run golint
        run: |
          cd components/execd
          make golint

      - name: Build (Multi platform compile)
        run: |
          cd components/execd
          #
          make multi-build

      - name: Run tests with coverage
        run: |
          cd components/execd
          go test -v -coverpkg=./... -coverprofile=coverage.out -covermode=atomic ./pkg/...

      - name: Calculate coverage and generate summary
        id: coverage
        run: |
          cd components/execd
          # Extract total coverage percentage
          TOTAL_COVERAGE=$(go tool cover -func=coverage.out | grep total | awk '{print $3}')
          echo "total_coverage=$TOTAL_COVERAGE" >> $GITHUB_OUTPUT
          
          # Generate GitHub Actions job summary
          echo "## 📊 execd Test Coverage Report" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "**Total Line Coverage:** $TOTAL_COVERAGE" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "Coverage report generated for commit \`${{ github.sha }}\`" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "---" >> $GITHUB_STEP_SUMMARY
          echo "*Coverage targets: Core packages >80%, API layer >70%*" >> $GITHUB_STEP_SUMMARY

  smoke:
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, windows-latest]
    runs-on: ${{ matrix.os }}
    defaults:
      run:
        shell: bash
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Go
        uses: actions/setup-go@v6
        with:
          go-version: '1.24.0'

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.10'

      - name: Install make (Windows)
        if: matrix.os == 'windows-latest'
        shell: powershell
        run: choco install make -y

      - name: Build
        run: |
          cd components/execd
          make build

      - name: Run smoke test
        run: |
          cd components/execd
          chmod +x tests/smoke.sh
          ./tests/smoke.sh

          sleep 5
          python3 tests/smoke_api.py
      - name: Show logs
        if: always()
        run: |
          set -x
          cat components/execd/startup.log || true
          cat components/execd/execd.log || true


================================================
FILE: .github/workflows/ingress-test.yaml
================================================
name: Ingress Tests

on:
  pull_request:
    branches: [ main ]
    paths:
      - 'components/ingress/**'
      - 'components/internal/**'

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

jobs:
  test:
    permissions:
      contents: read
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Go
        uses: actions/setup-go@v6
        with:
          go-version: '1.24.0'

      - name: Run golint
        working-directory: components/ingress
        run: |
          make golint

      - name: Run Build
        working-directory: components/ingress
        run: |
          make build

      - name: Run tests
        working-directory: components/ingress
        run: |
          make test


================================================
FILE: .github/workflows/publish-components.yml
================================================
name: Publish Components Image

permissions:
  # required for bump step to push branch and create PR
  contents: write
  pull-requests: write

on:
  workflow_dispatch:
    inputs:
      component:
        description: 'Component to build'
        required: true
        type: choice
        options:
          - execd
          - code-interpreter
          - ingress
          - egress
          - controller
          - task-executor
        default: 'execd'
      image_tag:
        description: 'Docker image tag'
        required: true
        default: 'latest'
  push:
    tags:
      - 'docker/execd/**'
      - 'docker/code-interpreter/**'
      - 'docker/ingress/**'
      - 'docker/egress/**'
      - 'k8s/controller/**'
      - 'k8s/task-executor/**'

jobs:
  publish:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to DockerHub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_PASSWORD }}

      - name: Login to ACR
        uses: docker/login-action@v3
        with:
          registry: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com
          username: ${{ secrets.ACR_USERNAME }}
          password: ${{ secrets.ACR_PASSWORD }}

      - name: Parse tag and set variables
        id: parse_tag
        run: |
          if [[ "${{ github.ref }}" == refs/tags/docker/* ]]; then
            TAG_PATH="${{ github.ref }}"
            TAG_PATH="${TAG_PATH#refs/tags/}"

            COMPONENT=$(echo "$TAG_PATH" | cut -d'/' -f2)
            IMAGE_TAG=$(echo "$TAG_PATH" | cut -d'/' -f3)

            echo "component=$COMPONENT" >> $GITHUB_OUTPUT
            echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
          elif [[ "${{ github.ref }}" == refs/tags/k8s/* ]]; then
            TAG_PATH="${{ github.ref }}"
            TAG_PATH="${TAG_PATH#refs/tags/}"

            COMPONENT=$(echo "$TAG_PATH" | cut -d'/' -f2)
            IMAGE_TAG=$(echo "$TAG_PATH" | cut -d'/' -f3)

            echo "component=$COMPONENT" >> $GITHUB_OUTPUT
            echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
          else
            echo "component=${{ inputs.component }}" >> $GITHUB_OUTPUT
            echo "image_tag=${{ inputs.image_tag }}" >> $GITHUB_OUTPUT
          fi

      - name: Free disk space
        run: |
          sudo rm -rf /usr/share/dotnet /opt/ghc /opt/hostedtoolcache
          sudo apt-get clean
          sudo rm -rf /var/lib/apt/lists/*
          df -h

      - name: Build and push to registries
        run: |
          COMPONENT="${{ steps.parse_tag.outputs.component }}"
          IMAGE_TAG="${{ steps.parse_tag.outputs.image_tag }}"

          if [ "$COMPONENT" == "execd" ]; then
            cd components/execd
          elif [ "$COMPONENT" == "ingress" ]; then
            cd components/ingress
          elif [ "$COMPONENT" == "egress" ]; then
            cd components/egress
          elif [ "$COMPONENT" == "controller" ]; then
            cd kubernetes
          elif [ "$COMPONENT" == "task-executor" ]; then
            cd kubernetes
          else
            cd sandboxes/$COMPONENT
          fi

          export TAG=$IMAGE_TAG
          export COMPONENT=$COMPONENT
          chmod +x build.sh
          ./build.sh

      - name: Bump component version in repo
        if: steps.parse_tag.outputs.image_tag != 'latest' && steps.parse_tag.outputs.image_tag != ''
        env:
          GH_TOKEN: ${{ github.token }}
        run: |
          COMPONENT="${{ steps.parse_tag.outputs.component }}"
          IMAGE_TAG="${{ steps.parse_tag.outputs.image_tag }}"
          # Ensure version has 'v' prefix for bump script
          if [[ "$IMAGE_TAG" =~ ^v ]]; then
            VERSION="$IMAGE_TAG"
          else
            VERSION="v${IMAGE_TAG}"
          fi

          ./scripts/bump-component-version.sh "$COMPONENT" "$VERSION"

          BRANCH="bump/${COMPONENT}-${VERSION}"
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"
          git checkout -b "$BRANCH"
          git add -A
          git diff --staged --quiet && echo "No changes to commit" && exit 0
          git commit -m "chore: bump $COMPONENT to $VERSION"
          git push origin "$BRANCH"

          gh pr create \
            --title "chore: bump $COMPONENT to $VERSION" \
            --body "Auto-generated by Publish Components workflow after building \`$COMPONENT:$VERSION\`." \
            --base "$(gh api repos/${{ github.repository }} --jq .default_branch)"


================================================
FILE: .github/workflows/publish-csharp-sdks.yml
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Publish C# SDKs

on:
  push:
    tags:
      - "csharp/sandbox/v*"
      - "csharp/code-interpreter/v*"

permissions:
  contents: read

jobs:
  publish:
    name: Publish (${{ matrix.sdk.name }})
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        sdk:
          - name: sandbox
            tagPrefix: sandbox
            csprojPath: sdks/sandbox/csharp/src/OpenSandbox/OpenSandbox.csproj
          - name: code-interpreter
            tagPrefix: code-interpreter
            csprojPath: sdks/code-interpreter/csharp/src/OpenSandbox.CodeInterpreter/OpenSandbox.CodeInterpreter.csproj

    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up .NET
        uses: actions/setup-dotnet@v5
        with:
          dotnet-version: "10.0.x"

      - name: Parse package version from tag
        if: startsWith(github.ref, format('refs/tags/csharp/{0}/v', matrix.sdk.tagPrefix))
        shell: bash
        run: |
          VERSION="${GITHUB_REF_NAME#csharp/${{ matrix.sdk.tagPrefix }}/v}"
          echo "PACKAGE_VERSION=$VERSION" >> "$GITHUB_ENV"

      - name: Restore
        if: startsWith(github.ref, format('refs/tags/csharp/{0}/v', matrix.sdk.tagPrefix))
        run: |
          EXTRA_RESTORE_ARGS=""
          if [ "${{ matrix.sdk.name }}" = "code-interpreter" ]; then
            EXTRA_RESTORE_ARGS="-p:UseLocalOpenSandboxProjectReference=false"
          fi
          dotnet restore "${{ matrix.sdk.csprojPath }}" ${EXTRA_RESTORE_ARGS}

      - name: Pack
        if: startsWith(github.ref, format('refs/tags/csharp/{0}/v', matrix.sdk.tagPrefix))
        run: |
          EXTRA_PACK_ARGS=""
          if [ "${{ matrix.sdk.name }}" = "code-interpreter" ]; then
            EXTRA_PACK_ARGS="-p:UseLocalOpenSandboxProjectReference=false"
          fi
          dotnet pack "${{ matrix.sdk.csprojPath }}" \
            --configuration Release \
            --no-restore \
            -p:PackageVersion="${PACKAGE_VERSION}" \
            -p:ContinuousIntegrationBuild=true \
            ${EXTRA_PACK_ARGS} \
            --output ./artifacts/${{ matrix.sdk.name }}

      - name: Publish to NuGet
        if: startsWith(github.ref, format('refs/tags/csharp/{0}/v', matrix.sdk.tagPrefix))
        env:
          NUGET_API_KEY: ${{ secrets.NUGET_API_KEY }}
        run: |
          dotnet nuget push "./artifacts/${{ matrix.sdk.name }}/*.nupkg" \
            --api-key "$NUGET_API_KEY" \
            --source "https://api.nuget.org/v3/index.json" \
            --skip-duplicate


================================================
FILE: .github/workflows/publish-helm-chart.yml
================================================
name: Publish Helm Chart

on:
  workflow_dispatch:
    inputs:
      component:
        description: 'Component to release'
        required: true
        type: choice
        options:
          - opensandbox-controller
          - opensandbox-server
          - opensandbox
        default: 'opensandbox-controller'
      app_version:
        description: 'App version (without v prefix, e.g., 0.1.0)'
        required: true
        default: '0.1.0'
  push:
    tags:
      - 'helm/**'  # Format: helm/<component>/<app_version>, e.g., helm/opensandbox-controller/0.1.0

jobs:
  publish:
    runs-on: ubuntu-latest
    permissions:
      contents: write
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Configure Git
        run: |
          git config user.name "$GITHUB_ACTOR"
          git config user.email "$GITHUB_ACTOR@users.noreply.github.com"

      - name: Install Helm
        uses: azure/setup-helm@v4
        with:
          version: 'latest'

      - name: Parse tag and set variables
        id: parse_tag
        run: |
          if [[ "${{ github.ref }}" == refs/tags/helm/* ]]; then
            TAG_PATH="${{ github.ref }}"
            TAG_PATH="${TAG_PATH#refs/tags/}"
            
            COMPONENT=$(echo "$TAG_PATH" | cut -d'/' -f2)
            VERSION=$(echo "$TAG_PATH" | cut -d'/' -f3)
            
            # Remove 'v' prefix if present
            VERSION=${VERSION#v}
            
            echo "component=$COMPONENT" >> $GITHUB_OUTPUT
            echo "app_version=$VERSION" >> $GITHUB_OUTPUT
          else
            echo "component=${{ inputs.component }}" >> $GITHUB_OUTPUT
            echo "app_version=${{ inputs.app_version }}" >> $GITHUB_OUTPUT
          fi

      - name: Set chart path
        id: chart_path
        run: |
          COMPONENT="${{ steps.parse_tag.outputs.component }}"
          
          if [ "$COMPONENT" == "opensandbox-controller" ]; then
            CHART_PATH="kubernetes/charts/opensandbox-controller"
          elif [ "$COMPONENT" == "opensandbox-server" ]; then
            CHART_PATH="kubernetes/charts/opensandbox-server"
          elif [ "$COMPONENT" == "opensandbox" ]; then
            CHART_PATH="kubernetes/charts/opensandbox"
          else
            echo "Error: Unknown component: $COMPONENT"
            exit 1
          fi
          
          echo "path=$CHART_PATH" >> $GITHUB_OUTPUT

      - name: Get chart version from Chart.yaml
        id: chart_version
        run: |
          CHART_PATH="${{ steps.chart_path.outputs.path }}"
          CHART_VERSION=$(grep '^version:' $CHART_PATH/Chart.yaml | awk '{print $2}')
          echo "version=$CHART_VERSION" >> $GITHUB_OUTPUT
          echo "Chart version: $CHART_VERSION"

      - name: Update Chart.yaml with app version
        run: |
          APP_VERSION="${{ steps.parse_tag.outputs.app_version }}"
          CHART_PATH="${{ steps.chart_path.outputs.path }}"
          
          # Only update appVersion, keep chart version as-is in Chart.yaml
          sed -i "s/^appVersion:.*/appVersion: \"$APP_VERSION\"/" $CHART_PATH/Chart.yaml
          
          echo "Updated Chart.yaml:"
          cat $CHART_PATH/Chart.yaml

      - name: Build dependencies (for opensandbox all-in-one chart)
        if: ${{ steps.parse_tag.outputs.component == 'opensandbox' }}
        run: |
          CHART_PATH="${{ steps.chart_path.outputs.path }}"
          echo "Building dependencies for all-in-one chart..."
          helm dependency build $CHART_PATH

      - name: Lint Helm chart
        run: |
          CHART_PATH="${{ steps.chart_path.outputs.path }}"
          helm lint $CHART_PATH

      - name: Package Helm chart
        run: |
          CHART_PATH="${{ steps.chart_path.outputs.path }}"
          helm package $CHART_PATH

      - name: Create GitHub Release
        uses: softprops/action-gh-release@v1
        with:
          tag_name: helm/${{ steps.parse_tag.outputs.component }}/${{ steps.parse_tag.outputs.app_version }}
          name: Helm Chart ${{ steps.parse_tag.outputs.component }} ${{ steps.chart_version.outputs.version }} (App v${{ steps.parse_tag.outputs.app_version }})
          body: |
            ## ${{ steps.parse_tag.outputs.component }} Helm Chart
            
            **Chart Version:** ${{ steps.chart_version.outputs.version }}
            **App Version:** ${{ steps.parse_tag.outputs.app_version }}
            
            ### Installation
            
            直接从 GitHub Release 安装:
            
            ```bash
            helm install ${{ steps.parse_tag.outputs.component }} \
              https://github.com/${{ github.repository }}/releases/download/helm/${{ steps.parse_tag.outputs.component }}/${{ steps.parse_tag.outputs.app_version }}/${{ steps.parse_tag.outputs.component }}-${{ steps.chart_version.outputs.version }}.tgz \
              --namespace opensandbox-system \
              --create-namespace
            ```
            
            或者先下载后安装:
            
            ```bash
            # 下载
            wget https://github.com/${{ github.repository }}/releases/download/helm/${{ steps.parse_tag.outputs.component }}/${{ steps.parse_tag.outputs.app_version }}/${{ steps.parse_tag.outputs.component }}-${{ steps.chart_version.outputs.version }}.tgz
            
            # 安装
            helm install ${{ steps.parse_tag.outputs.component }} ./${{ steps.parse_tag.outputs.component }}-${{ steps.chart_version.outputs.version }}.tgz \
              --namespace opensandbox-system \
              --create-namespace
            ```
            
            ${{ steps.parse_tag.outputs.component == 'opensandbox' && '**Note**: This is an all-in-one chart that bundles controller and server. The packaged chart already includes all dependencies, no need to run `helm dependency build` when installing from release.' || '' }}
            
            ### What's Changed
            
            - Chart version: ${{ steps.chart_version.outputs.version }}
            - App version: ${{ steps.parse_tag.outputs.app_version }}
          files: |
            ${{ steps.parse_tag.outputs.component }}-*.tgz
          draft: false
          prerelease: false
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .github/workflows/publish-java-sdks.yml
================================================
name: Publish Java SDKs

on:
  push:
    tags:
      - "java/sandbox/v*"
      - "java/code-interpreter/v*"

permissions:
  contents: read

jobs:
  publish:
    name: Publish (${{ matrix.sdk.name }})
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        sdk:
          - name: sandbox
            tagPrefix: sandbox
            workingDirectory: sdks/sandbox/kotlin
          - name: code-interpreter
            tagPrefix: code-interpreter
            workingDirectory: sdks/code-interpreter/kotlin

    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Java
        uses: actions/setup-java@v5
        with:
          distribution: temurin
          java-version: "17"

      - name: Set up Gradle
        uses: gradle/actions/setup-gradle@v5

      - name: Publish to Maven Central
        working-directory: ${{ matrix.sdk.workingDirectory }}
        if: startsWith(github.ref, format('refs/tags/java/{0}/v', matrix.sdk.tagPrefix))
        env:
          ORG_GRADLE_PROJECT_mavenCentralUsername: ${{ secrets.ORG_GRADLE_PROJECT_MAVENCENTRALUSERNAME }}
          ORG_GRADLE_PROJECT_mavenCentralPassword: ${{ secrets.ORG_GRADLE_PROJECT_MAVENCENTRALPASSWORD }}
          ORG_GRADLE_PROJECT_signingInMemoryKey: ${{ secrets.ORG_GRADLE_PROJECT_SIGNINGINMEMORYKEY }}
          ORG_GRADLE_PROJECT_signingInMemoryKeyPassword: ${{ secrets.ORG_GRADLE_PROJECT_SIGNINGINMEMORYKEYPASSWORD }}
        run: |
          ./gradlew publishAndReleaseToMavenCentral


================================================
FILE: .github/workflows/publish-js-sdks.yml
================================================
name: Publish JavaScript SDKs

on:
  push:
    tags:
      - "js/sandbox/v*"
      - "js/code-interpreter/v*"

permissions:
  contents: read

jobs:
  publish:
    name: Publish (${{ matrix.sdk.name }})
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        sdk:
          - name: sandbox
            tagPrefix: sandbox
            workingDirectory: sdks/sandbox/javascript
            packageName: "@alibaba-group/opensandbox"
          - name: code-interpreter
            tagPrefix: code-interpreter
            workingDirectory: sdks/code-interpreter/javascript
            packageName: "@alibaba-group/opensandbox-code-interpreter"

    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Node
        uses: actions/setup-node@v6
        with:
          node-version: "20"
          registry-url: "https://registry.npmjs.org"

      - name: Set up pnpm
        uses: pnpm/action-setup@v4
        with:
          version: latest

      - name: Enable corepack
        run: corepack enable

      - name: Get pnpm store path
        id: pnpm-store
        run: echo "STORE_PATH=$(corepack pnpm store path)" >> "$GITHUB_OUTPUT"

      - name: Cache pnpm store
        uses: actions/cache@v5
        with:
          path: ${{ steps.pnpm-store.outputs.STORE_PATH }}
          key: ${{ runner.os }}-pnpm-${{ hashFiles('sdks/pnpm-lock.yaml') }}
          restore-keys: ${{ runner.os }}-pnpm-

      - name: Install workspace dependencies
        working-directory: sdks
        run: corepack pnpm install --frozen-lockfile

      - name: Build SDK
        working-directory: sdks
        run: corepack pnpm --filter ${{ matrix.sdk.packageName }}... --sort run build

      - name: Publish to npm
        if: startsWith(github.ref, format('refs/tags/js/{0}/v', matrix.sdk.tagPrefix))
        working-directory: ${{ matrix.sdk.workingDirectory }}
        env:
          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
        run: |
          corepack pnpm publish --access public --no-git-checks


================================================
FILE: .github/workflows/publish-python-sdks.yml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Publish Python SDKs

permissions:
  contents: read

on:
  push:
    tags:
      - "python/sandbox/v*"
      - "python/code-interpreter/v*"
      - "python/mcp/sandbox/v*"

jobs:
  publish-sandbox:
    if: startsWith(github.ref, 'refs/tags/python/sandbox/v')
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.10'

      - name: Install uv
        uses: astral-sh/setup-uv@v7
        with:
          version: "latest"

      - name: Generate API
        working-directory: sdks/sandbox/python
        run: |
          uv run python scripts/generate_api.py

      - name: Build package
        working-directory: sdks/sandbox/python
        run: |
          uv build

      - name: Publish to PyPI
        working-directory: sdks/sandbox/python
        env:
          UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
        run: |
          uv publish

  publish-code-interpreter:
    if: startsWith(github.ref, 'refs/tags/python/code-interpreter/v')
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.10'

      - name: Install uv
        uses: astral-sh/setup-uv@v7
        with:
          version: "latest"

      - name: Build package
        working-directory: sdks/code-interpreter/python
        run: |
          uv build

      - name: Publish to PyPI
        working-directory: sdks/code-interpreter/python
        env:
          UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
        run: |
          uv publish

  publish-mcp-sandbox:
    if: startsWith(github.ref, 'refs/tags/python/mcp/sandbox/v')
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.10"

      - name: Install uv
        uses: astral-sh/setup-uv@v7
        with:
          version: "latest"

      - name: Build package
        working-directory: sdks/mcp/sandbox/python
        run: |
          uv build

      - name: Publish to PyPI
        working-directory: sdks/mcp/sandbox/python
        env:
          UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
        run: |
          uv publish


================================================
FILE: .github/workflows/publish-server.yml
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Publish Server

on:
  push:
    tags:
      - 'server/v*'

permissions:
  contents: read

jobs:
  publish-pypi:
    if: startsWith(github.ref, 'refs/tags/server/v')
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.10'

      - name: Install uv
        uses: astral-sh/setup-uv@v7
        with:
          version: "latest"

      - name: Build package
        working-directory: server
        run: |
          uv build

      - name: Publish to PyPI
        working-directory: server
        env:
          UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
        run: |
          uv publish

  publish-image:
    if: startsWith(github.ref, 'refs/tags/server/v')
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to DockerHub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_PASSWORD }}

      - name: Login to ACR
        uses: docker/login-action@v3
        with:
          registry: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com
          username: ${{ secrets.ACR_USERNAME }}
          password: ${{ secrets.ACR_PASSWORD }}

      - name: Parse tag and set variables
        id: parse_tag
        run: |
          if [[ "${{ github.ref }}" == refs/tags/server/* ]]; then
            TAG_PATH="${{ github.ref }}"
            TAG_PATH="${TAG_PATH#refs/tags/}"

            IMAGE_TAG="${TAG_PATH#server/}"

            if [ -z "$IMAGE_TAG" ]; then
              echo "failed to parse image tag from $TAG_PATH" >&2
              exit 1
            fi

            echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
          else
            echo "cannot parse tag"
            exit 1
          fi

      - name: Build and push to registries
        working-directory: server
        env:
          TAG: ${{ steps.parse_tag.outputs.image_tag }}
        run: |
          chmod +x build.sh
          ./build.sh


================================================
FILE: .github/workflows/real-e2e.yml
================================================
name: Real E2E Tests

permissions:
  contents: read

on:
  pull_request:
    branches: [ main ]
    paths:
      - 'server/src/**'
      - 'components/execd/**'
      - 'components/egress/**'
      - 'sdks/code-interpreter/**'
      - 'sdks/sandbox/**'
      - 'tests/**'
  push:
    branches: [ main ]

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

jobs:
  python-e2e:
    name: Python E2E (docker bridge)
    runs-on: self-hosted
    env:
      UV_BIN: /home/admin/.local/bin
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up uv PATH and verify
        run: |
          echo "${UV_BIN}" >> "$GITHUB_PATH"
          export PATH="${UV_BIN}:${PATH}"
          uv --version
          uv run python --version

      - name: Clean up previous E2E resources
        run: |
          docker ps -aq --filter "label=opensandbox" | xargs -r docker rm -f || true
          # Remove root-owned files from previous sandbox runs by mounting parent dir
          docker run --rm -v /tmp:/host_tmp alpine rm -rf /host_tmp/opensandbox-e2e || true

      - name: Build local egress image
        run: docker build -t opensandbox/egress:local -f components/egress/Dockerfile .

      - name: Run tests
        run: |
          set -e

          # Create config file
          cat <<EOF > ~/.sandbox.toml
          [server]
          host = "127.0.0.1"
          port = 8080
          log_level = "INFO"
          api_key = ""
          [runtime]
          type = "docker"
          execd_image = "opensandbox/execd:local"
          [egress]
          image = "opensandbox/egress:local"
          mode = "dns"
          [docker]
          network_mode = "bridge"
          [storage]
          allowed_host_paths = ["/tmp/opensandbox-e2e"]
          EOF

          ./scripts/python-e2e.sh

      - name: Eval server logs
        if: ${{ always() }}
        run: cat server/server.log

      - name: Upload execd logs
        if: always()
        uses: actions/upload-artifact@v7
        with:
          name: execd-log-for-python-e2e
          path: /tmp/opensandbox-e2e/logs/
          retention-days: 5

      - name: Clean up after E2E
        if: always()
        run: |
          docker ps -aq --filter "label=opensandbox" | xargs -r docker rm -f || true
          docker run --rm -v /tmp:/host_tmp alpine rm -rf /host_tmp/opensandbox-e2e || true
          pkill -f "python -m src.main" || true

  java-e2e:
    name: Java E2E (docker bridge)
    runs-on: self-hosted
    env:
      UV_BIN: /home/admin/.local/bin
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up uv PATH and verify
        run: |
          echo "${UV_BIN}" >> "$GITHUB_PATH"
          export PATH="${UV_BIN}:${PATH}"
          uv --version
          uv run python --version

      - name: Set up JDK 8
        uses: actions/setup-java@v5
        with:
          distribution: temurin
          java-version: "8"

      - name: Set up JDK 17
        uses: actions/setup-java@v5
        with:
          distribution: temurin
          java-version: "17"

      - name: Clean up previous E2E resources
        run: |
          docker ps -aq --filter "label=opensandbox" | xargs -r docker rm -f || true
          docker run --rm -v /tmp:/host_tmp alpine rm -rf /host_tmp/opensandbox-e2e || true

      - name: Build local egress image
        run: docker build -t opensandbox/egress:local -f components/egress/Dockerfile .

      - name: Run tests
        env:
          GRADLE_USER_HOME: ${{ github.workspace }}/.gradle-user-home
        run: |
          set -e
          export GRADLE_OPTS="-Dorg.gradle.java.installations.auto-detect=true -Dorg.gradle.java.installations.auto-download=false -Dorg.gradle.java.installations.paths=${JAVA_HOME_8_X64},${JAVA_HOME_17_X64}"

          # Create config file
          cat <<EOF > ~/.sandbox.toml
          [server]
          host = "127.0.0.1"
          port = 8080
          log_level = "INFO"
          api_key = ""
          [runtime]
          type = "docker"
          execd_image = "opensandbox/execd:local"
          [egress]
          image = "opensandbox/egress:local"
          mode = "dns+nft"
          [docker]
          network_mode = "bridge"
          [storage]
          allowed_host_paths = ["/tmp/opensandbox-e2e"]
          EOF

          bash ./scripts/java-e2e.sh

      - name: Eval server logs
        if: ${{ always() }}
        run: cat server/server.log

      - name: Upload Test Report
        if: always()
        uses: actions/upload-artifact@v7
        with:
          name: java-test-report
          path: tests/java/build/reports/tests/test/
          retention-days: 5

      - name: Upload execd logs
        if: always()
        uses: actions/upload-artifact@v7
        with:
          name: execd-log-for-java-e2e
          path: /tmp/opensandbox-e2e/logs/
          retention-days: 5

      - name: Clean up after E2E
        if: always()
        run: |
          docker ps -aq --filter "label=opensandbox" | xargs -r docker rm -f || true
          docker run --rm -v /tmp:/host_tmp alpine rm -rf /host_tmp/opensandbox-e2e || true
          pkill -f "python -m src.main" || true

  javascript-e2e:
    name: JavaScript E2E (docker bridge)
    runs-on: self-hosted
    env:
      UV_BIN: /home/admin/.local/bin
      NODE_VERSION: "20.19.0"
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up uv PATH and verify
        run: |
          echo "${UV_BIN}" >> "$GITHUB_PATH"
          export PATH="${UV_BIN}:${PATH}"
          uv --version
          uv run python --version

      - name: Set up Node.js
        run: |
          NODE_DIR="/home/admin/.local/node-v${NODE_VERSION}-linux-x64"
          if [ -x "${NODE_DIR}/bin/node" ]; then
            echo "Node.js ${NODE_VERSION} already cached"
          else
            echo "Downloading Node.js ${NODE_VERSION}..."
            mkdir -p /home/admin/.local
            curl -fsSL "https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-linux-x64.tar.xz" \
              | tar -xJ -C /home/admin/.local/
          fi
          echo "${NODE_DIR}/bin" >> "$GITHUB_PATH"
          export PATH="${NODE_DIR}/bin:${PATH}"
          node --version
          npm --version

      - name: Clean up previous E2E resources
        run: |
          docker ps -aq --filter "label=opensandbox" | xargs -r docker rm -f || true
          docker run --rm -v /tmp:/host_tmp alpine rm -rf /host_tmp/opensandbox-e2e || true

      - name: Build local egress image
        run: docker build -t opensandbox/egress:local -f components/egress/Dockerfile .

      - name: Run tests
        run: |
          set -e

          # Create config file (match other E2E jobs)
          cat <<EOF > ~/.sandbox.toml
          [server]
          host = "127.0.0.1"
          port = 8080
          log_level = "INFO"
          api_key = ""
          [runtime]
          type = "docker"
          execd_image = "opensandbox/execd:local"
          [egress]
          image = "opensandbox/egress:local"
          [docker]
          network_mode = "bridge"
          [storage]
          allowed_host_paths = ["/tmp/opensandbox-e2e"]
          EOF

          bash ./scripts/javascript-e2e.sh

      - name: Eval server logs
        if: ${{ always() }}
        run: cat server/server.log

      - name: Upload Test Report
        if: always()
        uses: actions/upload-artifact@v7
        with:
          name: javascript-test-report
          path: tests/javascript/build/test-results/junit.xml
          retention-days: 5

      - name: Upload execd logs
        if: always()
        uses: actions/upload-artifact@v7
        with:
          name: execd-log-for-js-e2e
          path: /tmp/opensandbox-e2e/logs/
          retention-days: 5

      - name: Clean up after E2E
        if: always()
        run: |
          docker ps -aq --filter "label=opensandbox" | xargs -r docker rm -f || true
          docker run --rm -v /tmp:/host_tmp alpine rm -rf /host_tmp/opensandbox-e2e || true
          pkill -f "python -m src.main" || true

  csharp-e2e:
    name: C# E2E (docker bridge)
    runs-on: self-hosted
    env:
      UV_BIN: /home/admin/.local/bin
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up uv PATH and verify
        run: |
          echo "${UV_BIN}" >> "$GITHUB_PATH"
          export PATH="${UV_BIN}:${PATH}"
          uv --version
          uv run python --version

      - name: Set up .NET SDK
        uses: actions/setup-dotnet@v5
        env:
          DOTNET_INSTALL_DIR: /home/admin/.local/dotnet
        with:
          dotnet-version: "10.0.x"

      - name: Clean up previous E2E resources
        run: |
          docker ps -aq --filter "label=opensandbox" | xargs -r docker rm -f || true
          docker run --rm -v /tmp:/host_tmp alpine rm -rf /host_tmp/opensandbox-e2e || true

      - name: Build local egress image
        run: docker build -t opensandbox/egress:local -f components/egress/Dockerfile .

      - name: Run tests
        run: |
          set -e

          cat <<EOF > ~/.sandbox.toml
          [server]
          host = "127.0.0.1"
          port = 8080
          log_level = "INFO"
          api_key = ""
          [runtime]
          type = "docker"
          execd_image = "opensandbox/execd:local"
          [egress]
          image = "opensandbox/egress:local"
          [docker]
          network_mode = "bridge"
          [storage]
          allowed_host_paths = ["/tmp/opensandbox-e2e"]
          EOF

          bash ./scripts/csharp-e2e.sh

      - name: Eval server logs
        if: ${{ always() }}
        run: cat server/server.log

      - name: Upload Test Report
        if: always()
        uses: actions/upload-artifact@v7
        with:
          name: csharp-test-report
          path: tests/csharp/build/test-results/
          retention-days: 5

      - name: Upload execd logs
        if: always()
        uses: actions/upload-artifact@v7
        with:
          name: execd-log-for-csharp-e2e
          path: /tmp/opensandbox-e2e/logs/
          retention-days: 5

      - name: Clean up after E2E
        if: always()
        run: |
          docker ps -aq --filter "label=opensandbox" | xargs -r docker rm -f || true
          docker run --rm -v /tmp:/host_tmp alpine rm -rf /host_tmp/opensandbox-e2e || true
          pkill -f "python -m src.main" || true


================================================
FILE: .github/workflows/sandbox-k8s-e2e.yml
================================================
name: Sandbox K8S E2E Tests

on:
  pull_request:
    branches: [ main ]
    paths:
      - 'kubernetes/**'

permissions:
  contents: read

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

env:
  GO_VERSION: '1.24'

jobs:
  e2e-k8s:
    name: E2E Tests (K8s v${{ matrix.k8s-version }})
    strategy:
      fail-fast: false
      matrix:
        k8s-version: ["1.21.1", "1.22.4", "1.24.4", "1.26.4", "1.28.6", "1.30.4", "1.32.2", "1.34.2"]
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Go
        uses: actions/setup-go@v6
        with:
          go-version: ${{ env.GO_VERSION }}

      - name: Run tests
        run: |
          cd kubernetes
          make test-e2e KIND_K8S_VERSION=v${{ matrix.k8s-version }}

================================================
FILE: .github/workflows/sandbox-k8s-test.yml
================================================
name: Sandbox K8S Tests

on:
  pull_request:
    branches: [ main ]
    paths:
      - 'kubernetes/**'

permissions:
  contents: read

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Go
        uses: actions/setup-go@v6
        with:
          go-version: '1.24.0'

      - name: Run golint
        run: |
          cd kubernetes
          make lint

      - name: Build binary
        run: |
          cd kubernetes
          make build
          make task-executor-build

      - name: Run tests
        run: |
          cd kubernetes
          make test


================================================
FILE: .github/workflows/sdk-tests.yml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: SDK Tests

on:
  pull_request:
    branches: [main]
    paths:
      - "sdks/sandbox/**"
      - "sdks/code-interpreter/**"
      - "specs/**"
  push:
    branches: [main]
    paths:
      - "sdks/sandbox/**"
      - "sdks/code-interpreter/**"
      - "specs/**"

permissions:
  contents: read

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

jobs:
  python-sdk-quality:
    name: Python SDK Quality (${{ matrix.package_name }})
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        include:
          - package_name: sandbox
            package_dir: sdks/sandbox/python
          - package_name: code-interpreter
            package_dir: sdks/code-interpreter/python
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.11"

      - name: Install uv
        uses: astral-sh/setup-uv@v7
        with:
          version: "latest"

      - name: Install dependencies
        working-directory: ${{ matrix.package_dir }}
        run: |
          uv sync

      - name: Generate API
        if: matrix.package_name == 'sandbox'
        working-directory: sdks/sandbox/python
        run: |
          uv run python scripts/generate_api.py

      - name: Run ruff
        working-directory: ${{ matrix.package_dir }}
        run: |
          uv run ruff check

      - name: Run pyright
        working-directory: ${{ matrix.package_dir }}
        run: |
          uv run pyright

  python-sdk:
    name: Python SDK Tests
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.11"

      - name: Install uv
        uses: astral-sh/setup-uv@v7
        with:
          version: "latest"

      - name: Generate API
        working-directory: sdks/sandbox/python
        run: |
          uv sync
          uv run python scripts/generate_api.py

      - name: Run tests
        working-directory: sdks/sandbox/python
        run: |
          uv run pytest tests/ -v

  kotlin-sdk:
    name: Kotlin SDK Tests
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Java
        uses: actions/setup-java@v5
        with:
          distribution: temurin
          java-version: "17"

      - name: Set up Gradle
        uses: gradle/actions/setup-gradle@v5

      - name: Run tests
        working-directory: sdks/sandbox/kotlin
        run: |
          ./gradlew :sandbox:test

  csharp-sdk:
    name: C# SDK Tests
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up .NET 10
        uses: actions/setup-dotnet@v5
        with:
          dotnet-version: "10.0.x"

      - name: Run sandbox tests
        working-directory: sdks/sandbox/csharp
        run: |
          dotnet test tests/OpenSandbox.Tests/OpenSandbox.Tests.csproj --configuration Release

      - name: Run code interpreter tests
        working-directory: sdks/code-interpreter/csharp
        run: |
          dotnet test tests/OpenSandbox.CodeInterpreter.Tests/OpenSandbox.CodeInterpreter.Tests.csproj --configuration Release


================================================
FILE: .github/workflows/server-test.yml
================================================
name: Server Tests

on:
  pull_request:
    branches: [ main ]
    paths:
      - 'server/src/**'
      - 'server/tests/**'

permissions:
  contents: read

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

jobs:
  test:
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, windows-latest]
    runs-on: ${{ matrix.os }}
    defaults:
      run:
        shell: bash
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.10'

      - name: Install uv
        run: |
          pip install uv

      - name: Run tests
        run: |
          cd server
          uv sync --all-groups
          uv run ruff check
          uv run pytest

  docker-smoke:
    strategy:
      matrix:
        network: [host, bridge]
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.10'

      - name: Install uv
        run: |
          pip install uv

      - name: Set up Docker
        run: |
          docker --version

      - name: Run smoke test
        run: |
          set -e
          cd server
          uv sync --all-groups

          # Create config file
          cat <<EOF > ~/.sandbox.toml
          [server]
          host = "127.0.0.1"
          port = 32888
          log_level = "INFO"
          api_key = ""
          [runtime]
          type = "docker"
          execd_image = "opensandbox/execd:latest"
          [egress]
          image = "opensandbox/egress:latest"
          [docker]
          network_mode = "${{ matrix.network }}"
          [storage]
          allowed_host_paths = ["/tmp/opensandbox-e2e"]
          EOF

          # Start server in background
          uv run python -m src.main > app.log 2>&1 &

          # Wait for server to start
          sleep 10

          # Run smoke test
          chmod +x tests/smoke.sh
          ./tests/smoke.sh
      - name: Show logs
        if: always()
        run: |
          cat server/app.log


================================================
FILE: .github/workflows/verify-license.yml
================================================
name: Verify License Headers

on:
  pull_request:
    branches: [ main ]

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

jobs:
  verify-license:
    runs-on: self-hosted
    steps:
      - name: Checkout code
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Run license verification
        run: |
          chmod +x scripts/verify-license.sh
          ./scripts/verify-license.sh


================================================
FILE: .gitignore
================================================
# IDE and Editor files
.vscode/
.idea/
*.swp
*.swo
*~
.DS_Store
Thumbs.db

# Go
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool
*.out

# Dependency directories
vendor/

# Go workspace file
go.work

# Java/Kotlin
# Compiled class file
*.class

# Log file
*.log

# BlueJ files
*.ctxt

# Mobile Tools for Java (J2ME)
.mtj.tmp/

# Package Files
*.jar
*.war
*.nar
*.ear
*.zip
*.tar.gz
*.rar

# virtual machine crash logs
hs_err_pid*
replay_pid*

# Gradle
.gradle/
build/
!**/gradle/wrapper/gradle-wrapper.jar
!**/src/main/**/build/
!**/src/test/**/build/

# Maven
target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties
dependency-reduced-pom.xml
buildNumber.properties
.mvn/timing.properties
.mvn/wrapper/maven-wrapper.jar

# Node.js
# Logs
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*

# Dependency directories
node_modules/
jspm_packages/

# TypeScript cache
*.tsbuildinfo

# Optional npm cache directory
.npm

# Optional eslint cache
.eslintcache

# Optional stylelint cache
.stylelintcache

# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/

# Optional REPL history
.node_repl_history

# Output of 'npm pack'
*.tgz

# Yarn Integrity file
.yarn-integrity

# Yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache

# Next.js build output
.next
out

# Nuxt.js build / generate output
.nuxt
dist

# Gatsby files
.cache/
public
!docs/public/
!docs/public/CNAME

# Serverless directories
.serverless/

# FuseBox cache
.fusebox/

# DynamoDB Local files
.dynamodb/

# TernJS port file
.tern-port

# Stores VSCode versions used for testing VSCode extensions
.vscode-test

# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# Virtual environments
venv/
env/
ENV/
env.bak/
venv.bak/

# Docker
*.pid
*.seed
*.pid.lock

# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

# Temporary files
*.tmp
*.temp
*~

# Environment variables
.env
.env.local
.env.development.local
.env.test.local
.env.production.local

# API keys and secrets
secrets/
*.pem
*.key
*.crt
*.p12
*.pfx

# Generated API documentation
docs/generated/
docs/.vitepress/generated/
docs/.vitepress/dist/
docs/.vitepress/cache/
apidocs/

# Test results
test-results/
coverage/
*.coverage
.nyc_output

# Backup files
*.bak
*.backup
*.old

# Flattened POM files (Maven)
.flattened-pom.xml

# Kotlin
*.kotlin_module

# JetBrains specific
.idea/
*.iml
*.ipr
*.iws
out/

# Eclipse specific
.project
.classpath
.settings/
bin/

# NetBeans specific
nbproject/
nbbuild/
nbdist/
.nb-gradle/

# Generated files
generated/
**/generated/**

# gVisor runtime binaries (downloaded dynamically)
kubernetes/test/kind/gvisor/runsc
kubernetes/test/kind/gvisor/containerd-shim-runsc-v1
bin/
obj/


================================================
FILE: .pre-commit-config.yaml
================================================
# Minimal cross-language pre-commit hooks
# Install: pip install pre-commit && pre-commit install
# Run once on all files: pre-commit run --all-files

repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.6.0
    hooks:
      - id: trailing-whitespace
      - id: end-of-file-fixer
      - id: mixed-line-ending
      - id: check-merge-conflict
      - id: check-yaml
      - id: detect-private-key

  # Language-specific formatters/linters can be added later, for example:
  # - repo: local
  #   hooks:
  #     - id: gofmt
  #       name: gofmt
  #       entry: gofmt
  #       language: system
  #       types: [go]
  #     - id: ruff
  #       name: ruff
  #       entry: ruff check
  #       language: system
  #       types: [python]


================================================
FILE: AGENTS.md
================================================
# Repository Guidelines

## Project Structure & Module Organization
- `server/`: Python FastAPI service, configs, and tests.
- `components/execd/`: Go execution daemon and related tests.
- `sdks/`: Multi-language SDKs (`sdks/sandbox/*`, `sdks/code-interpreter/*`).
- `sandboxes/`: Runtime sandbox implementations (e.g., `sandboxes/code-interpreter/`).
- `specs/`: OpenAPI specs (`specs/execd-api.yaml`, `specs/sandbox-lifecycle.yml`).
- `examples/`: End-to-end usage examples and integrations.
- `tests/`: Cross-component/E2E tests (`tests/python/`, `tests/java/`).
- `docs/`, `oseps/`, `scripts/`: Docs, proposals, and automation scripts.

## Build, Test, and Development Commands
- Server (Python):
  - `cd server && uv sync` installs deps.
  - `cp server/example.config.toml ~/.sandbox.toml` sets local config.
  - `cd server && uv run python -m src.main` runs the API server.
- execd (Go):
  - `cd components/execd && go build -o bin/execd .` builds the daemon.
  - `cd components/execd && make fmt` formats Go sources.
- SDKs:
  - Python: `cd sdks/sandbox/python && uv sync && uv run pytest`.
  - Kotlin: `cd sdks/sandbox/kotlin && ./gradlew build`.
- Specs: `node scripts/spec-doc/generate-spec.js` regenerates spec docs.

## Coding Style & Naming Conventions
- Python: PEP 8, `ruff` for lint/format, type hints on public APIs.
- Go: `gofmt`, explicit error handling, standard import grouping.
- Kotlin: Kotlin Coding Conventions, `ktlint` where configured.
- Naming: classes `PascalCase`, functions `snake_case` (Python) / `camelCase` (Go/Kotlin), constants `UPPER_SNAKE_CASE`.

## SDK API Implementation Conventions
- Keep a clear split between generated API transport code and handwritten SDK business/adaptor code.
- In adapter/infrastructure layers, default to integrating through generated API clients instead of handcrafted request wiring.
- Prefer generated OpenAPI clients for standard request/response endpoints; use handwritten transport only for streaming or protocol-specific paths (for example SSE).
- Do not manually edit generated client files. When specs change, regenerate first, then adapt handwritten layers.
- For handwritten streaming paths, keep wire contracts aligned with OpenAPI field names/models and cover behavior with focused tests (especially parsing and error mapping).

## Testing Guidelines
- Python tests use `pytest` (async tests common).
- Go tests use `go test` under `components/execd/pkg/...`.
- Kotlin tests use Gradle (`./gradlew test`).
- Coverage targets (from CONTRIBUTING): core packages >80%, API layer >70%.

## Commit & Pull Request Guidelines
- Commit messages follow Conventional Commits, e.g. `feat(server): add runtime`.
- Use feature branches (e.g., `feature/...`, `fix/...`) and keep PRs focused.
- PRs should include summary, testing status, and linked issues; follow the template in `CONTRIBUTING.md`.
- For major API or architectural changes, submit an OSEP (`oseps/`).

## Security & Configuration Tips
- Local server config lives in `~/.sandbox.toml` (copied from `server/example.config.toml`).
- Docker is required for local sandbox execution; keep images and keys out of commits.


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Code of Conduct

We are committed to a welcoming, safe, and respectful community.

## Expected Behavior
- Be respectful and inclusive.
- Assume good intent; seek to understand.
- Provide constructive feedback; critique code, not people.
- Follow project guidelines and security practices.

## Unacceptable Behavior
- Harassment, personal attacks, or discriminatory language.
- Publishing private information without consent.
- Disruptive or aggressive behavior in any project space.

## Scope
This Code applies to all project spaces, including issues, pull requests, discussions, chat, and events.

## Reporting
Report incidents to: **conduct@opensandbox.io**. Include as much detail as possible (what happened, when/where, links, screenshots if applicable).

## Enforcement
Maintainers will investigate in good faith and may take appropriate action, including warnings, temporary bans, or removal from the community.


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to OpenSandbox

Thank you for your interest in contributing to OpenSandbox! This guide will help you get started with contributing to the project, whether you're fixing bugs, adding features, improving documentation, or helping in other ways.

## Table of Contents

- [Code of Conduct](#code-of-conduct)
- [Getting Started](#getting-started)
- [Development Environment Setup](#development-environment-setup)
- [Project Structure](#project-structure)
- [Development Workflow](#development-workflow)
- [Coding Standards](#coding-standards)
- [Testing Guidelines](#testing-guidelines)
- [Submitting Contributions](#submitting-contributions)
- [Communication Channels](#communication-channels)

## Code of Conduct

OpenSandbox adheres to a [Code of Conduct](CODE_OF_CONDUCT.md) that we expect all contributors to follow. Please read it before contributing to ensure a welcoming and inclusive environment for everyone.

## Getting Started

### Ways to Contribute

There are many ways to contribute to OpenSandbox:

- **Report Bugs**: Submit detailed bug reports through [GitHub Issues](https://github.com/alibaba/OpenSandbox/issues)
- **Suggest Features**: Propose new features or improvements
- **Write Code**: Fix bugs, implement features, or improve performance
- **Improve Documentation**: Enhance README files, write tutorials, or fix typos
- **Write Tests**: Add test coverage or improve existing tests
- **Review Pull Requests**: Help review and test others' contributions
- **Answer Questions**: Help other users in GitHub Discussions or Issues

### Before You Start

1. **Search Existing Issues**: Check if your bug report or feature request already exists
2. **Check Roadmap**: Review the project roadmap to see if your idea aligns with project goals
3. **Discuss Major Changes**: For significant changes, open an issue first or submit an [OSEP](oseps/README.md) to discuss your approach
4. **Review Architecture**: Read [docs/architecture.md](docs/architecture.md) to understand the system design

## Development Environment Setup

### Prerequisites

Different components have different requirements:

#### For Server (Python)

- **Python 3.10+**
- **uv** - Python package manager ([installation guide](https://github.com/astral-sh/uv))
- **Docker** - For running sandboxes locally

#### For execd (Go)

- **Go 1.24+**
- **Make** - Build automation (optional)
- **Docker** - For building container images

#### For SDKs

- **Python SDK**: Python 3.10+, uv
- **Java/Kotlin SDK**: JDK 17+, Gradle

### Quick Setup

#### Server Development

```bash
# Navigate to server directory
cd server

# Install dependencies
uv sync

# Copy example configuration
cp example.config.toml ~/.sandbox.toml

# Edit configuration for development
# Set log_level = "DEBUG" and api_key
nano ~/.sandbox.toml

# Run server
uv run python -m src.main
```

See [server/DEVELOPMENT.md](server/DEVELOPMENT.md) for detailed server development guide.

#### execd Development

```bash
# Navigate to execd directory
cd components/execd

# Download dependencies
go mod download

# Build execd
go build -o bin/execd .

# Run execd (requires Jupyter Server)
./bin/execd --jupyter-host=http://localhost:8888 --port=44772
```

See [components/execd/DEVELOPMENT.md](components/execd/DEVELOPMENT.md) for detailed execd development guide.

#### SDK Development

**Python SDK:**

```bash
cd sdks/sandbox/python
uv sync
uv run pytest
```

**Java/Kotlin SDK:**

```bash
cd sdks/sandbox/kotlin
./gradlew build
./gradlew test
```

## Project Structure

```
OpenSandbox/
├── sdks/                     # Multi-language SDKs
│   ├── code-interpreter/     # Code Interpreter SDK (Python, Kotlin)
│   └── sandbox/              # Sandbox base SDK (Python, Kotlin)
├── specs/                    # OpenAPI specifications
│   ├── execd-api.yaml        # Execution API spec
│   └── sandbox-lifecycle.yml # Lifecycle API spec
├── server/                   # Sandbox server (Python/FastAPI)
├── components/
│   └── execd/                # Execution daemon (Go/Beego)
├── sandboxes/                # Sandbox implementations
│   └── code-interpreter/     # Code Interpreter sandbox
├── examples/                 # Example integrations
├── docs/                     # Documentation
├── tests/                    # Cross-component tests
│   └── e2e/                  # End-to-end tests
└── scripts/                  # Build and utility scripts
```

## Development Workflow

### Enhancement Proposals (OSEP)

For major features, architectural changes, or modifications to the core API/security model, we follow the **OSEP (OpenSandbox Enhancement Proposals)** process.

Please read the [OSEP README](oseps/README.md) to understand when an OSEP is required and how to submit one. Small bug fixes and minor improvements do not require an OSEP.

### Branching Strategy

- **main**: Stable production branch
- **feature/[name]**: New features
- **fix/[name]**: Bug fixes
- **docs/[name]**: Documentation updates
- **refactor/[name]**: Code refactoring
- **test/[name]**: Test additions or improvements

### Creating a Feature Branch

```bash
# Update main branch
git checkout main
git pull origin main

# Create feature branch
git checkout -b feature/my-awesome-feature

# Make your changes
# ...

# Commit your changes
git add .
git commit -m "feat: add my awesome feature"

# Push to your fork
git push origin feature/my-awesome-feature
```

### Commit Message Format

We follow [Conventional Commits](https://www.conventionalcommits.org/) specification:

```
<type>(<scope>): <description>

[optional body]

[optional footer]
```

**Types:**

- `feat`: New feature
- `fix`: Bug fix
- `docs`: Documentation changes
- `style`: Code style changes (formatting, no logic change)
- `refactor`: Code refactoring
- `test`: Adding or updating tests
- `chore`: Build process, dependencies, or tooling changes
- `perf`: Performance improvements
- `ci`: CI/CD changes

**Examples:**

```
feat(server): add Kubernetes runtime support
fix(execd): resolve memory leak in session cleanup
docs(sdk): add Python SDK usage examples
test(server): add integration tests for Docker runtime
refactor(sdk): simplify filesystem API
```

### Making Changes

1. **Write Clean Code**: Follow project coding standards (see below)
2. **Add Tests**: Ensure your changes are covered by tests
3. **Update Documentation**: Update relevant documentation files
4. **Test Locally**: Run all tests and ensure they pass
5. **Check Linting**: Run linters and fix any issues

## Coding Standards

### Python (Server, Python SDKs)

- **Style Guide**: Follow [PEP 8](https://pep8.org/)
- **Formatter**: Use `ruff` for formatting and linting
- **Type Hints**: Always use type hints for function signatures
- **Docstrings**: Use Google-style docstrings for public APIs

```python
def create_sandbox(
    image: ImageSpec,
    timeout: timedelta,
    entrypoint: Optional[List[str]] = None
) -> Sandbox:
    """Create a new sandbox instance.

    Args:
        image: Container image specification
        timeout: Sandbox timeout duration
        entrypoint: Optional custom entrypoint command

    Returns:
        Created sandbox instance

    Raises:
        ValueError: If image or timeout is invalid
    """
    # Implementation
```

**Running Linter:**

```bash
cd server
uv run ruff check src tests
uv run ruff format src tests
```

### Go (execd)

- **Style Guide**: Follow [Effective Go](https://golang.org/doc/effective_go)
- **Formatter**: Use `gofmt` for formatting
- **Imports**: Organize in three groups (stdlib, third-party, internal)
- **Error Handling**: Always handle errors explicitly

```go
// Good
result, err := someOperation()
if err != nil {
    logs.Error("operation failed: %v", err)
    return fmt.Errorf("failed to do something: %w", err)
}

// Bad - silent failure
result, _ := someOperation()
```

**Running Formatter:**

```bash
cd components/execd
gofmt -w .
# Or
make fmt
```

### Java/Kotlin (Java/Kotlin SDKs)

- **Style Guide**: Follow [Kotlin Coding Conventions](https://kotlinlang.org/docs/coding-conventions.html)
- **Formatter**: Use `ktlint`
- **Null Safety**: Use Kotlin's null safety features

```kotlin
suspend fun createSandbox(
    image: ImageSpec,
    timeout: Duration,
    entrypoint: List<String>? = null
): Sandbox {
    // Implementation
}
```

### General Guidelines

- **Naming Conventions**:
  - Functions/Methods: `snake_case` (Python), `camelCase` (Go, Kotlin)
  - Classes: `PascalCase` (all languages)
  - Constants: `UPPER_SNAKE_CASE` (all languages)
  - Private members: `_leading_underscore` (Python), `unexported` (Go)

- **Comments**: Write clear, concise comments explaining "why", not "what"
- **Error Messages**: Provide actionable error messages with context
- **Logging**: Use appropriate log levels (DEBUG, INFO, WARNING, ERROR)

## Testing Guidelines

### Test Coverage Requirements

- **Core Packages**: Aim for >80% coverage
- **API Layer**: Aim for >70% coverage
- **Utilities**: Aim for >90% coverage

### Writing Tests

#### Python Tests (pytest)

```python
import pytest
from opensandbox import Sandbox

@pytest.mark.asyncio
async def test_create_sandbox():
    """Test sandbox creation with valid parameters."""
    sandbox = await Sandbox.create(
        image="python:3.11",
        timeout=timedelta(minutes=5)
    )
    assert sandbox.id is not None
    assert sandbox.status == SandboxStatus.PENDING
    await sandbox.kill()

@pytest.mark.asyncio
async def test_invalid_timeout():
    """Test sandbox creation fails with invalid timeout."""
    with pytest.raises(ValueError):
        await Sandbox.create(
            image="python:3.11",
            timeout=timedelta(seconds=-1)
        )
```

**Running Tests:**

```bash
cd server
uv run pytest
uv run pytest --cov=src --cov-report=html
```

#### Go Tests

```go
func TestController_Execute_Python(t *testing.T) {
    ctrl := NewController("http://localhost:8888", "test-token")

    req := &ExecuteCodeRequest{
        Language: Python,
        Code:     "print('hello')",
    }

    err := ctrl.Execute(req)
    assert.NoError(t, err)
}
```

**Running Tests:**

```bash
cd components/execd
go test ./pkg/...
go test -v -cover ./pkg/...
```

#### Integration Tests

Integration tests require Docker:

```bash
# Server integration tests
cd server
uv run pytest tests/integration/

# E2E tests
cd tests/e2e/python
uv run pytest
```

### Test Best Practices

- **Test Names**: Use descriptive names that explain what is being tested
- **Arrange-Act-Assert**: Structure tests clearly
- **Isolation**: Each test should be independent
- **Mocking**: Mock external dependencies appropriately
- **Cleanup**: Always clean up resources (use fixtures, context managers)

## Submitting Contributions

### Pull Request Process

1. **Create Feature Branch**: Branch from `main`
2. **Make Changes**: Implement your feature or fix
3. **Write Tests**: Add comprehensive test coverage
4. **Update Documentation**: Update relevant docs
5. **Test Locally**: Ensure all tests pass
6. **Run Linters**: Fix any style issues
7. **Commit Changes**: Use conventional commit messages
8. **Push to Fork**: Push your branch to your fork
9. **Create Pull Request**: Submit PR with detailed description

### Pull Request Template

When creating a PR, fill out the template:

```markdown
# Summary

- What is changing and why?

# Testing

- [ ] Not run (explain why)
- [ ] Unit tests
- [ ] Integration tests
- [ ] e2e / manual verification

# Breaking Changes

- [ ] None
- [ ] Yes (describe impact and migration path)

# Checklist

- [ ] Linked Issue or clearly described motivation
- [ ] Added/updated docs (if needed)
- [ ] Added/updated tests (if needed)
- [ ] Security impact considered
- [ ] Backward compatibility considered
```

### Pull Request Guidelines

**Do:**

- Keep PRs focused and reasonably sized (< 500 lines if possible)
- Write clear PR descriptions with motivation and context
- Link related issues
- Respond to review comments promptly
- Update your PR based on feedback
- Ensure CI passes before requesting review

**Don't:**

- Mix multiple unrelated changes in one PR
- Submit PRs with failing tests
- Ignore code review feedback
- Force push after reviews have started (unless necessary)
- Include commented-out code or debug statements

### Code Review Process

1. **Automated Checks**: CI runs tests, linters, and security scans
2. **Maintainer Review**: A maintainer reviews your code
3. **Feedback Loop**: Address review comments
4. **Approval**: Once approved, a maintainer will merge your PR
5. **Cleanup**: Delete your feature branch after merge

## Communication Channels

### GitHub Issues

Use GitHub Issues for:

- Bug reports
- Feature requests
- Documentation improvements
- Questions about implementation

**Bug Report Template:**

```markdown
**Description**
A clear description of the bug.

**To Reproduce**
Steps to reproduce the behavior:

1. Create sandbox with...
2. Execute command...
3. See error

**Expected Behavior**
What you expected to happen.

**Environment**

- OpenSandbox version:
- Runtime (Docker/K8s):
- OS:
- Python/Go version:

**Additional Context**
Logs, screenshots, or other relevant information.
```

### GitHub Discussions

Use GitHub Discussions for:

- General questions
- Design discussions
- Brainstorming ideas
- Community help

### Getting Help

- **Issues**: Technical problems or bugs
- **Discussions**: Questions and community support
- **Email**: For security issues, email conduct@opensandbox.io

## Additional Resources

### Documentation

- [Architecture Overview](docs/architecture.md)
- [Server Development Guide](server/DEVELOPMENT.md)
- [execd Development Guide](components/execd/DEVELOPMENT.md)
- [OpenAPI Specifications](specs/README.md)
- [Python SDK Documentation](sdks/sandbox/python/README.md)
- [Java/Kotlin SDK Documentation](sdks/sandbox/kotlin/README.md)

### Examples

Browse [examples/](examples/) for real-world usage patterns:

- Code Interpreter integration
- AI Coding Agent integrations (Claude Code, Gemini CLI, etc.)
- Browser automation (Chrome, Playwright)
- Remote development (VS Code, Desktop)

### External Resources

- [FastAPI Documentation](https://fastapi.tiangolo.com/)
- [Beego Documentation](https://beego.wiki/)
- [Jupyter Protocol](https://jupyter-client.readthedocs.io/en/stable/messaging.html)
- [OpenAPI Specification](https://swagger.io/specification/)
- [Docker API](https://docs.docker.com/engine/api/)

## Acknowledgments

Thank you for contributing to OpenSandbox! Your contributions help make this project better for everyone in the AI and developer tools community.

If you have suggestions for improving this contributing guide, please open an issue or submit a pull request.

## License

By contributing to OpenSandbox, you agree that your contributions will be licensed under the [Apache 2.0 License](LICENSE).


================================================
FILE: LICENSE
================================================
Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
<div align="center">
  <img src="docs/assets/logo.svg" alt="OpenSandbox logo" width="150" />

  <h1>OpenSandbox</h1>

  <p align="center">
    <a href="https://trendshift.io/repositories/21828" target="_blank">
      <img src="https://trendshift.io/api/badge/repositories/21828" alt="alibaba%2FOpenSandbox | Trendshift" style="width: 320px; height: 70px;" width="320" height="70" />
    </a>
  </p>

<p align="center">
  <a href="https://github.com/alibaba/OpenSandbox">
    <img src="https://img.shields.io/github/stars/alibaba/OpenSandbox.svg?style=social" alt="GitHub stars" />
  </a>
  <a href="https://deepwiki.com/alibaba/OpenSandbox">
    <img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki" />
  </a>
  <a href="https://www.apache.org/licenses/LICENSE-2.0.html">
    <img src="https://img.shields.io/badge/license-Apache%202.0-blue.svg" alt="license" />
  </a>
  <a href="https://badge.fury.io/py/opensandbox">
    <img src="https://badge.fury.io/py/opensandbox.svg" alt="PyPI version" />
  </a>
  <a href="https://badge.fury.io/js/@alibaba-group%2Fopensandbox">
    <img src="https://badge.fury.io/js/@alibaba-group%2Fopensandbox.svg" alt="npm version" />
  </a>
  <a href="https://landscape.cncf.io/?item=orchestration-management--scheduling-orchestration--opensandbox">
    <img src="https://img.shields.io/badge/CNCF-Landscape-0C66E4" alt="CNCF Landscape" />
  </a>
  <a href="https://qr.dingtalk.com/action/joingroup?code=v1,k1,A4Bgl5q1I1eNU/r33D18YFNrMY108aFF38V+r19RJOM=&_dt_no_comment=1&origin=11">
    <img src="https://img.shields.io/badge/DingTalk-Join-0089FF?logo=dingtalk&logoColor=white" alt="DingTalk" />
  </a>
  <a href="https://github.com/alibaba/OpenSandbox/actions">
    <img src="https://github.com/alibaba/OpenSandbox/actions/workflows/real-e2e.yml/badge.svg?branch=main" alt="E2E Status" />
  </a>
</p>

  <hr />
</div>

[Documentation](https://open-sandbox.ai/) | [中文文档](https://open-sandbox.ai/zh/)

OpenSandbox is a **general-purpose sandbox platform** for AI applications, offering multi-language SDKs, unified sandbox APIs, and Docker/Kubernetes runtimes for scenarios like Coding Agents, GUI Agents, Agent Evaluation, AI Code Execution, and RL Training.

OpenSandbox is now listed in the [CNCF Landscape](https://landscape.cncf.io/?item=orchestration-management--scheduling-orchestration--opensandbox).

## Features

- **Multi-language SDKs**: Provides sandbox SDKs in Python, Java/Kotlin, JavaScript/TypeScript, C#/.NET, Go (Roadmap), and more.
- **Sandbox Protocol**: Defines sandbox lifecycle management APIs and sandbox execution APIs so you can extend custom sandbox runtimes.
- **Sandbox Runtime**: Built-in lifecycle management supporting Docker and [high-performance Kubernetes runtime](./kubernetes), enabling both local runs and large-scale distributed scheduling.
- **Sandbox Environments**: Built-in Command, Filesystem, and Code Interpreter implementations. Examples cover Coding Agents (e.g., Claude Code), browser automation (Chrome, Playwright), and desktop environments (VNC, VS Code).
- **Network Policy**: Unified [Ingress Gateway](components/ingress) with multiple routing strategies plus per-sandbox [egress controls](components/egress).
- **Strong Isolation**: Supports secure container runtimes like gVisor, Kata Containers, and Firecracker microVM for enhanced isolation between sandbox workloads and the host. See [Secure Container Runtime Guide](docs/secure-container.md) for details.

## Examples

### Basic Sandbox Operations

Requirements:

- Docker (required for local execution)
- Python 3.10+ (recommended for examples and local runtime)

#### 1. Install and Configure the Sandbox Server

```bash
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
```

> If you prefer working from source, you can still clone the repo for development, but you no longer need to clone this repository just to start the server.
> You'll also require an instance of docker running.
> ```bash
> git clone https://github.com/alibaba/OpenSandbox.git
> cd OpenSandbox/server
> uv sync
> cp example.config.toml ~/.sandbox.toml # Copy configuration file
> uv run python -m src.main # Start the service
> ```

#### 2. Start the Sandbox Server

```bash
opensandbox-server

# Show help
opensandbox-server -h
```

#### 3. Create a Code Interpreter and Execute Commands

Install the Code Interpreter SDK

```bash
uv pip install opensandbox-code-interpreter
```

Create a sandbox and execute commands

```python
import asyncio
from datetime import timedelta

from code_interpreter import CodeInterpreter, SupportedLanguage
from opensandbox import Sandbox
from opensandbox.models import WriteEntry

async def main() -> None:
    # 1. Create a sandbox
    sandbox = await Sandbox.create(
        "opensandbox/code-interpreter:v1.0.2",
        entrypoint=["/opt/opensandbox/code-interpreter.sh"],
        env={"PYTHON_VERSION": "3.11"},
        timeout=timedelta(minutes=10),
    )

    async with sandbox:

        # 2. Execute a shell command
        execution = await sandbox.commands.run("echo 'Hello OpenSandbox!'")
        print(execution.logs.stdout[0].text)

        # 3. Write a file
        await sandbox.files.write_files([
            WriteEntry(path="/tmp/hello.txt", data="Hello World", mode=644)
        ])

        # 4. Read a file
        content = await sandbox.files.read_file("/tmp/hello.txt")
        print(f"Content: {content}") # Content: Hello World

        # 5. Create a code interpreter
        interpreter = await CodeInterpreter.create(sandbox)

        # 6. Execute Python code (single-run, pass language directly)
        result = await interpreter.codes.run(
              """
                  import sys
                  print(sys.version)
                  result = 2 + 2
                  result
              """,
              language=SupportedLanguage.PYTHON,
        )

        print(result.result[0].text) # 4
        print(result.logs.stdout[0].text) # 3.11.14

    # 7. Cleanup the sandbox
    await sandbox.kill()

if __name__ == "__main__":
    asyncio.run(main())
```

### More Examples

OpenSandbox provides examples covering SDK usage, agent integrations, browser automation, and training workloads. All example code is located in the `examples/` directory.

#### 🎯 Basic Examples

- **[code-interpreter](examples/code-interpreter/README.md)** - End-to-end Code Interpreter SDK workflow in a sandbox.
- **[aio-sandbox](examples/aio-sandbox/README.md)** - All-in-One sandbox setup using the OpenSandbox SDK.
- **[agent-sandbox](examples/agent-sandbox/README.md)** - Example integration for running OpenSandbox workloads on Kubernetes with [kubernetes-sigs/agent-sandbox](https://github.com/kubernetes-sigs/agent-sandbox).

#### 🤖 Coding Agent Integrations

- **[claude-code](examples/claude-code/README.md)** - Run Claude Code inside OpenSandbox.
- **[gemini-cli](examples/gemini-cli/README.md)** - Run Google Gemini CLI inside OpenSandbox.
- **[codex-cli](examples/codex-cli/README.md)** - Run OpenAI Codex CLI inside OpenSandbox.
- **[kimi-cli](examples/kimi-cli/README.md)** - Run [Kimi CLI](https://github.com/MoonshotAI/kimi-cli) (Moonshot AI) inside OpenSandbox.
- **[langgraph](examples/langgraph/README.md)** - LangGraph state-machine workflow that creates/runs a sandbox job with fallback retry.
- **[google-adk](examples/google-adk/README.md)** - Google ADK agent using OpenSandbox tools to write/read files and run commands.
- **[nullclaw](examples/nullclaw/README.md)** - Launch a [Nullclaw](https://github.com/nullclaw/nullclaw) Gateway inside a sandbox.
- **[openclaw](examples/openclaw/README.md)** - Launch an OpenClaw Gateway inside a sandbox.

#### 🌐 Browser and Desktop Environments

- **[chrome](examples/chrome/README.md)** - Chromium sandbox with VNC and DevTools access for automation and debugging.
- **[playwright](examples/playwright/README.md)** - Playwright + Chromium headless scraping and testing example.
- **[desktop](examples/desktop/README.md)** - Full desktop environment in a sandbox with VNC access.
- **[vscode](examples/vscode/README.md)** - code-server (VS Code Web) running inside a sandbox for remote dev.

#### 🧠 ML and Training

- **[rl-training](examples/rl-training/README.md)** - DQN CartPole training in a sandbox with checkpoints and summary output.

For more details, please refer to [examples](examples/README.md) and the README files in each example directory.

## Project Structure

| Directory | Description                                                      |
|-----------|------------------------------------------------------------------|
| [`sdks/`](sdks/) | Multi-language SDKs (Python, Java/Kotlin, TypeScript/JavaScript, C#/.NET) |
| [`specs/`](specs/README.md) | OpenAPI specs and lifecycle specifications                      |
| [`server/`](server/README.md) | Python FastAPI sandbox lifecycle server                          |
| [`kubernetes/`](kubernetes/README.md) | Kubernetes deployment and examples                               |
| [`components/execd/`](components/execd/README.md) | Sandbox execution daemon (commands and file operations)          |
| [`components/ingress/`](components/ingress/README.md) | Sandbox traffic ingress proxy                                    |
| [`components/egress/`](components/egress/README.md) | Sandbox network egress control                                   |
| [`sandboxes/`](sandboxes/) | Runtime sandbox implementations                                   |
| [`examples/`](examples/README.md) | Integration examples and use cases                               |
| [`oseps/`](oseps/README.md) | OpenSandbox Enhancement Proposals                                |
| [`docs/`](docs/) | Architecture and design documentation                            |
| [`tests/`](tests/) | Cross-component E2E tests                                        |
| [`scripts/`](scripts/) | Development and maintenance scripts                              |

For detailed architecture, see [docs/architecture.md](docs/architecture.md).

## Documentation

- [docs/architecture.md](docs/architecture.md) – Overall architecture & design philosophy
- [oseps/README.md](oseps/README.md) – OpenSandbox Enhancement Proposals
- SDK
  - Sandbox base SDK ([Java/Kotlin SDK](sdks/sandbox/kotlin/README.md), [Python SDK](sdks/sandbox/python/README.md), [JavaScript/TypeScript SDK](sdks/sandbox/javascript/README.md), [C#/.NET SDK](sdks/sandbox/csharp/README.md)) - includes sandbox lifecycle, command execution, file operations
  - Code Interpreter SDK ([Java/Kotlin SDK](sdks/code-interpreter/kotlin/README.md), [Python SDK](sdks/code-interpreter/python/README.md), [JavaScript/TypeScript SDK](sdks/code-interpreter/javascript/README.md), [C#/.NET SDK](sdks/code-interpreter/csharp/README.md)) - code interpreter
- [specs/README.md](specs/README.md) - OpenAPI definitions for sandbox lifecycle API and sandbox execution API
- [server/README.md](server/README.md) - Sandbox server startup and configuration; supports Docker and Kubernetes runtimes

## License

This project is open source under the [Apache 2.0 License](LICENSE).

## Roadmap [2026.03]

### SDK

- **Sandbox client connection pool** - Client-side sandbox connection pool management, providing pre-provisioned sandboxes to obtain an environment at X ms.
- **Go SDK** - Go client SDK for sandbox lifecycle management, command execution, and file operations.

### Sandbox Runtime

- **Persistent volumes** - Mountable persistent volumes for sandboxes (see [Proposal 0003](oseps/0003-volume-and-volumebinding-support.md)).
- **Local lightweight sandbox** - Lightweight sandbox for AI tools running directly on PCs.
- **Secure Container** - Secure sandbox for AI Agents running inside container.

### Deployment

- **Guide** - Deployment guide for self-hosted Kubernetes cluster.

## Contact and Discussion

- Issues: Submit bugs, feature requests, or design discussions through GitHub Issues
- DingTalk: Join the [OpenSandbox technical discussion group](https://qr.dingtalk.com/action/joingroup?code=v1,k1,A4Bgl5q1I1eNU/r33D18YFNrMY108aFF38V+r19RJOM=&_dt_no_comment=1&origin=11)
## Star History

[![Star History Chart](https://api.star-history.com/svg?repos=alibaba/OpenSandbox&type=date&legend=top-left)](https://www.star-history.com/#alibaba/OpenSandbox&type=date&legend=top-left)


================================================
FILE: SECURITY.md
================================================
# Security Policy

## Reporting Security Issues

The OpenSandbox team takes security seriously. If you discover a security vulnerability, please report it responsibly.

### How to Report

- **GitHub Security Advisories**: Open a private security advisory on GitHub
- **Email**: Contact the maintainers directly with "[SECURITY]" in the subject

### What to Include

- Clear description of the vulnerability
- Steps to reproduce
- Potential impact and scope
- Suggested remediation (if available)

## Response Process

1. Acknowledgment within 48 hours
2. Investigation and validation
3. Fix development and testing
4. Coordinated disclosure

## Supported Versions

Only the latest release and main branch are actively supported with security updates.

## Security Best Practices

When deploying OpenSandbox:
- Keep dependencies up to date
- Use network policies to restrict sandbox egress
- Monitor audit logs regularly
- Follow principle of least privilege


================================================
FILE: cli/README.md
================================================
# OpenSandbox CLI

A command-line interface for managing OpenSandbox environments from your terminal. Built on top of the [OpenSandbox Python SDK](../sdks/sandbox/python/README.md), the CLI provides intuitive commands for sandbox lifecycle management, file operations, command execution, and code interpretation.

## Installation

### pip

```bash
pip install opensandbox-cli
```

### uv

```bash
uv add opensandbox-cli
```

### pipx (recommended for global CLI usage)

```bash
pipx install opensandbox-cli
```

## Overview

```bash
osb --help
```

![CLI Help](assets/cli_help.png)

## Quick Start

### Step 0: Start the OpenSandbox Server

Before using the CLI, make sure the OpenSandbox server is running. See the root [README.md](../README.md) for startup instructions.

```bash
opensandbox-server
```

![Start OpenSandbox Server](assets/start_opensandbox_server.png)

### Step 1: Install the CLI

```bash
cd cli
uv pip install -e .
```

![Install CLI](assets/install_cli.png)

### Step 2: Initialize Configuration

```bash
osb config init
osb config set connection.domain localhost:8080
osb config set connection.protocol http
```

![Init CLI](assets/init_cli.png)

### Step 3: Create a Sandbox

```bash
osb sandbox create --image python:3.12
```

![Create Sandbox](assets/cli_create_sandbox.png)

### Step 4: List Sandboxes

```bash
# Table output (default)
osb sandbox list

# JSON output for scripting
osb -o json sandbox list
```

![List Sandboxes](assets/cli_list_sandbox.png)

![List Sandboxes JSON](assets/cli_list_sandbox_json.png)

### Short ID Matching

Like Docker, you don't need to type the full sandbox ID — just enough characters to uniquely identify the target sandbox:

```bash
# Full ID
osb sandbox get db027570-4f86-45f8-b1a8-c31a2dd90da8

# Short prefix — as long as it's unambiguous
osb sandbox get db02
osb exec db02 -- echo "hello"
```

If the prefix matches multiple sandboxes, the CLI will report an error listing the matches so you can be more specific.

![Short ID Matching](assets/cli_sandbox_search.png)

### Step 5: Execute Commands

```bash
osb exec <sandbox-id> -- echo "hello world"
osb exec <sandbox-id> -- python -c "print(1+1)"
```

![Execute Commands](assets/cli_sandbox_exec.png)

### Step 6: File Operations

```bash
# Write a file
osb file write <sandbox-id> /tmp/test.txt -c "hello"

# Read it back
osb file cat <sandbox-id> /tmp/test.txt
```

![File Operations](assets/cli_sandbox_file.png)

### Step 7: Cleanup

```bash
osb sandbox kill <sandbox-id>
osb sandbox list
```

![Kill Sandbox](assets/cli_kill_sandbox.png)

## Command Reference

### `osb sandbox` — Lifecycle Management

| Command    | Description                                 |
| ---------- | ------------------------------------------- |
| `create`   | Create a new sandbox                        |
| `list`     | List sandboxes (with optional filters)      |
| `get`      | Get sandbox details by ID                   |
| `kill`     | Terminate one or more sandboxes             |
| `pause`    | Pause a running sandbox                     |
| `resume`   | Resume a paused sandbox                     |
| `renew`    | Renew sandbox expiration                    |
| `endpoint` | Get public endpoint for a sandbox port      |
| `health`   | Check sandbox health                        |
| `metrics`  | Get sandbox resource metrics (CPU, memory)  |

### `osb command` — Command Execution

| Command     | Description                               |
| ----------- | ----------------------------------------- |
| `run`       | Run a shell command in the sandbox        |
| `status`    | Get command execution status              |
| `logs`      | Get background command logs               |
| `interrupt` | Interrupt a running command               |

### `osb exec` — Quick Command Shortcut

```bash
osb exec <sandbox-id> -- <command>
```

Shortcut for `osb command run`. Everything after `--` is passed as the command.

### `osb file` — File Operations

| Command    | Description                                |
| ---------- | ------------------------------------------ |
| `cat`      | Read file contents                         |
| `write`    | Write content to a file                    |
| `upload`   | Upload a local file to the sandbox         |
| `download` | Download a file from the sandbox           |
| `rm`       | Delete files                               |
| `mv`       | Move or rename a file                      |
| `mkdir`    | Create directories                         |
| `rmdir`    | Remove directories                         |
| `search`   | Search for files by pattern                |
| `info`     | Get file/directory metadata                |
| `chmod`    | Set file permissions                       |
| `replace`  | Find and replace content in a file         |

### `osb code` — Code Interpreter

| Command     | Description                               |
| ----------- | ----------------------------------------- |
| `run`       | Execute code in a sandbox                 |
| `context`   | Manage code execution contexts            |
| `interrupt` | Interrupt a running code execution        |

### `osb config` — Configuration

| Command | Description                                |
| ------- | ------------------------------------------ |
| `init`  | Create a default config file               |
| `show`  | Show resolved configuration                |

## Configuration

The CLI resolves configuration from multiple sources with the following priority (highest to lowest):

1. **CLI flags** — `--api-key`, `--domain`, `--protocol`, `--timeout`
2. **Environment variables** — `OPEN_SANDBOX_API_KEY`, `OPEN_SANDBOX_DOMAIN`, `OPEN_SANDBOX_PROTOCOL`, `OPEN_SANDBOX_REQUEST_TIMEOUT`, `OPEN_SANDBOX_OUTPUT`
3. **Config file** — `~/.opensandbox/config.toml` (or path specified via `--config`)
4. **SDK defaults**

### Config File Format

```toml
[connection]
api_key = "your-api-key"
domain = "localhost:8080"
protocol = "http"
request_timeout = 30

[output]
format = "table"    # table | json | yaml
color = true

[defaults]
image = "python:3.11"
timeout = "10m"
```

## Global Options

| Option                        | Description                      |
| ----------------------------- | -------------------------------- |
| `--api-key TEXT`              | API key for authentication       |
| `--domain TEXT`               | API server domain                |
| `--protocol [http\|https]`    | Protocol                         |
| `--timeout INTEGER`           | Request timeout in seconds       |
| `-o, --output [table\|json\|yaml]` | Output format              |
| `--config PATH`               | Config file path                 |
| `-v, --verbose`               | Enable debug output              |
| `--no-color`                  | Disable colored output           |
| `--version`                   | Show version                     |

## Output Formats

The CLI supports three output formats via the `-o` / `--output` flag:

- **`table`** (default) — Human-friendly tables powered by [Rich](https://github.com/Textualize/rich)
- **`json`** — Machine-readable JSON
- **`yaml`** — YAML output

```bash
# Table (default)
osb sandbox list

# JSON for scripting
osb -o json sandbox list

# YAML
osb -o yaml sandbox list
```


================================================
FILE: cli/pyproject.toml
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[build-system]
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"

[project]
name = "opensandbox-cli"
dynamic = ["version"]
description = "OpenSandbox CLI - Command-line interface for managing sandboxes"
authors = [
    { name = "OpenSandbox Team", email = "ninan.nn@alibaba-inc.com" }
]
license = { file = "LICENSE" }
readme = "README.md"
requires-python = ">=3.10"
keywords = ["sandbox", "cli", "opensandbox"]
classifiers = [
    "Development Status :: 3 - Alpha",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: Apache Software License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3 :: Only",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Topic :: Software Development :: Libraries",
]
dependencies = [
    "opensandbox>=0.1.4,<0.2.0",
    "opensandbox-code-interpreter>=0.1.0,<0.2.0",
    "click>=8.1.0,<9.0",
    "rich>=13.0.0,<14.0",
    "pyyaml>=6.0,<7.0",
    "tomli>=2.0.0; python_version < '3.11'",
]

[project.urls]
Homepage = "https://open-sandbox.ai"
Repository = "https://github.com/alibaba/OpenSandbox"
Documentation = "https://open-sandbox.ai"
Issues = "https://github.com/alibaba/OpenSandbox/issues"

[project.scripts]
opensandbox = "opensandbox_cli.main:cli"
osb = "opensandbox_cli.main:cli"

[tool.hatch.version]
source = "vcs"

[tool.hatch.version.raw-options]
root = ".."
tag_regex = "^python/cli/v(?P<version>\\d+\\.\\d+\\.\\d+(?:[\\.\\w\\+\\-]*)?)$"
git_describe_command = 'git describe --dirty --tags --long --match "python/cli/v*"'
fallback_version = "0.1.0"

[tool.hatch.build]
include = [
    "LICENSE",
    "src/**/py.typed",
    "src/opensandbox_cli",
]

[tool.hatch.build.targets.wheel]
packages = ["src/opensandbox_cli"]

[dependency-groups]
dev = [
    "pytest>=7.0.0",
    "pytest-cov>=4.0.0",
    "ruff>=0.14.8",
    "pyright>=1.1.0",
]

[tool.ruff]
target-version = "py310"
line-length = 88

[tool.ruff.lint]
select = [
    "E",  # pycodestyle errors
    "W",  # pycodestyle warnings
    "F",  # pyflakes
    "I",  # isort
    "B",  # flake8-bugbear
    "C4", # flake8-comprehensions
    "UP", # pyupgrade
]
ignore = [
    "E501", # line too long, handled by formatter
    "B008", # do not perform function calls in argument defaults
    "C901", # too complex
]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]

[tool.pyright]
typeCheckingMode = "standard"
pythonVersion = "3.10"
pythonPlatform = "All"
include = ["src"]
exclude = [
    "**/node_modules",
    "**/__pycache__",
]
reportMissingImports = true
reportMissingTypeStubs = false

[tool.pytest.ini_options]
minversion = "6.0"
addopts = "-ra -q --strict-markers --strict-config"
testpaths = ["tests"]
python_files = ["test_*.py", "*_test.py"]

[tool.coverage.run]
source = ["src"]
branch = true

[tool.uv.sources]
opensandbox = { path = "../sdks/sandbox/python", editable = true }
opensandbox-code-interpreter = { path = "../sdks/code-interpreter/python", editable = true }


================================================
FILE: cli/src/opensandbox_cli/__init__.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

try:
    from importlib.metadata import version

    __version__ = version("opensandbox-cli")
except Exception:
    __version__ = "0.0.0-dev"


================================================
FILE: cli/src/opensandbox_cli/__main__.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Allow running as ``python -m opensandbox_cli``."""

from opensandbox_cli.main import cli

if __name__ == "__main__":
    cli()


================================================
FILE: cli/src/opensandbox_cli/client.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""SDK client factory stored in Click context."""

from __future__ import annotations

import re
from dataclasses import dataclass, field
from datetime import timedelta
from typing import Any

import click

from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.models.sandboxes import SandboxFilter
from opensandbox.sync.manager import SandboxManagerSync
from opensandbox.sync.sandbox import SandboxSync

from opensandbox_cli.output import OutputFormatter

# Full UUID pattern: 8-4-4-4-12 hex characters
_UUID_RE = re.compile(
    r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
)


@dataclass
class ClientContext:
    """Shared context passed via ``ctx.obj`` to all Click commands."""

    resolved_config: dict[str, Any]
    output: OutputFormatter
    _connection_config: ConnectionConfigSync | None = field(
        default=None, init=False, repr=False
    )
    _manager: SandboxManagerSync | None = field(
        default=None, init=False, repr=False
    )

    @property
    def connection_config(self) -> ConnectionConfigSync:
        if self._connection_config is None:
            cfg = self.resolved_config
            self._connection_config = ConnectionConfigSync(
                api_key=cfg.get("api_key"),
                domain=cfg.get("domain"),
                protocol=cfg.get("protocol", "http"),
                request_timeout=timedelta(seconds=cfg.get("request_timeout", 30)),
            )
        return self._connection_config

    def get_manager(self) -> SandboxManagerSync:
        """Return a lazily-created ``SandboxManagerSync``."""
        if self._manager is None:
            self._manager = SandboxManagerSync.create(self.connection_config)
        return self._manager

    def resolve_sandbox_id(self, prefix: str) -> str:
        """Resolve a sandbox ID prefix to the full ID (Docker-style).

        If *prefix* looks like a complete UUID, it is returned as-is without
        querying the server.  Otherwise **all pages** of sandboxes are fetched
        so that prefix collisions on later pages are never missed.
        """
        # Skip resolution for full UUIDs
        if _UUID_RE.match(prefix):
            return prefix

        mgr = self.get_manager()
        matches: list[str] = []
        page = 0

        while True:
            result = mgr.list_sandbox_infos(
                SandboxFilter(page=page, page_size=100)
            )
            if result.sandbox_infos:
                matches.extend(
                    info.id
                    for info in result.sandbox_infos
                    if info.id.startswith(prefix)
                )
            # Stop early if we already found >1 match (ambiguous)
            if len(matches) > 1:
                break
            if not result.pagination.has_next_page:
                break
            page += 1

        if len(matches) == 1:
            return matches[0]
        elif len(matches) == 0:
            raise click.ClickException(
                f"No sandbox found with ID prefix '{prefix}'"
            )
        else:
            ids_str = ", ".join(matches[:5])
            if len(matches) > 5:
                ids_str += ", ..."
            raise click.ClickException(
                f"Ambiguous ID prefix '{prefix}' matches {len(matches)} sandboxes: {ids_str}"
            )

    def connect_sandbox(
        self, sandbox_id: str, *, skip_health_check: bool = True
    ) -> SandboxSync:
        """Connect to an existing sandbox by ID (supports prefix matching)."""
        sandbox_id = self.resolve_sandbox_id(sandbox_id)
        return SandboxSync.connect(
            sandbox_id,
            connection_config=self.connection_config,
            skip_health_check=skip_health_check,
        )

    def close(self) -> None:
        """Release resources."""
        if self._manager is not None:
            self._manager.close()
            self._manager = None
        if self._connection_config is not None:
            self._connection_config.close_transport_if_owned()
            self._connection_config = None


================================================
FILE: cli/src/opensandbox_cli/commands/__init__.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: cli/src/opensandbox_cli/commands/code.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Code execution commands: run, context management, interrupt."""

from __future__ import annotations

import sys

import click

from opensandbox.models.execd import OutputMessage
from opensandbox.models.execd_sync import ExecutionHandlersSync

from opensandbox_cli.client import ClientContext
from opensandbox_cli.utils import handle_errors


@click.group("code", invoke_without_command=True)
@click.pass_context
def code_group(ctx: click.Context) -> None:
    """💻 Execute code in a sandbox (via Code Interpreter)."""
    if ctx.invoked_subcommand is None:
        click.echo(ctx.get_help())


# ---- run ------------------------------------------------------------------

@code_group.command("run")
@click.argument("sandbox_id")
@click.option("--language", "-l", required=True, help="Language (python, javascript, java, go, bash, ...).")
@click.option("--code", "-c", default=None, help="Code to execute. Reads from stdin if not provided.")
@click.option("--context-id", default=None, help="Execution context ID for stateful sessions.")
@click.pass_obj
@handle_errors
def code_run(
    obj: ClientContext,
    sandbox_id: str,
    language: str,
    code: str | None,
    context_id: str | None,
) -> None:
    """Execute code in a sandbox."""
    from code_interpreter.sync.code_interpreter import CodeInterpreterSync

    if code is None:
        if sys.stdin.isatty():
            click.echo("Reading code from stdin (Ctrl+D to finish):", err=True)
        code = sys.stdin.read()

    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        interpreter = CodeInterpreterSync.create(sandbox)

        kwargs: dict = {}
        if context_id:
            ctx = interpreter.codes.get_context(context_id)
            kwargs["context"] = ctx

        def on_stdout(msg: OutputMessage) -> None:
            sys.stdout.write(msg.text)
            sys.stdout.flush()

        def on_stderr(msg: OutputMessage) -> None:
            sys.stderr.write(msg.text)
            sys.stderr.flush()

        handlers = ExecutionHandlersSync(on_stdout=on_stdout, on_stderr=on_stderr)
        execution = interpreter.codes.run(
            code, language=language, handlers=handlers, **kwargs
        )

        if execution.error:
            obj.output.error(
                f"{execution.error.name}: {execution.error.value}"
            )
            sys.exit(1)
    finally:
        sandbox.close()


# ---- context group --------------------------------------------------------

@code_group.group("context", invoke_without_command=True)
@click.pass_context
def context_group(ctx: click.Context) -> None:
    """Manage code execution contexts."""
    if ctx.invoked_subcommand is None:
        click.echo(ctx.get_help())


@context_group.command("create")
@click.argument("sandbox_id")
@click.option("--language", "-l", required=True, help="Language for the context.")
@click.pass_obj
@handle_errors
def context_create(obj: ClientContext, sandbox_id: str, language: str) -> None:
    """Create a new code execution context."""
    from code_interpreter.sync.code_interpreter import CodeInterpreterSync

    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        interpreter = CodeInterpreterSync.create(sandbox)
        ctx = interpreter.codes.create_context(language)
        obj.output.success_panel(
            {"context_id": ctx.id, "language": language},
            title="Context Created",
        )
    finally:
        sandbox.close()


@context_group.command("list")
@click.argument("sandbox_id")
@click.option("--language", "-l", required=True, help="Language to list contexts for.")
@click.pass_obj
@handle_errors
def context_list(obj: ClientContext, sandbox_id: str, language: str) -> None:
    """List code execution contexts."""
    from code_interpreter.sync.code_interpreter import CodeInterpreterSync

    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        interpreter = CodeInterpreterSync.create(sandbox)
        contexts = interpreter.codes.list_contexts(language)
        for ctx in contexts:
            click.echo(f"{ctx.id}")
    finally:
        sandbox.close()


@context_group.command("delete")
@click.argument("sandbox_id")
@click.argument("context_id")
@click.pass_obj
@handle_errors
def context_delete(obj: ClientContext, sandbox_id: str, context_id: str) -> None:
    """Delete a code execution context."""
    from code_interpreter.sync.code_interpreter import CodeInterpreterSync

    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        interpreter = CodeInterpreterSync.create(sandbox)
        interpreter.codes.delete_context(context_id)
        obj.output.success(f"Deleted context: {context_id}")
    finally:
        sandbox.close()


@context_group.command("delete-all")
@click.argument("sandbox_id")
@click.option("--language", "-l", required=True, help="Language to delete all contexts for.")
@click.pass_obj
@handle_errors
def context_delete_all(obj: ClientContext, sandbox_id: str, language: str) -> None:
    """Delete all code execution contexts for a language."""
    from code_interpreter.sync.code_interpreter import CodeInterpreterSync

    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        interpreter = CodeInterpreterSync.create(sandbox)
        interpreter.codes.delete_contexts(language)
        obj.output.success(f"Deleted all {language} contexts")
    finally:
        sandbox.close()


# ---- interrupt ------------------------------------------------------------

@code_group.command("interrupt")
@click.argument("sandbox_id")
@click.argument("execution_id")
@click.pass_obj
@handle_errors
def code_interrupt(obj: ClientContext, sandbox_id: str, execution_id: str) -> None:
    """Interrupt a running code execution."""
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        sandbox.commands.interrupt(execution_id)
        obj.output.success(f"Interrupted: {execution_id}")
    finally:
        sandbox.close()


================================================
FILE: cli/src/opensandbox_cli/commands/command.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Command execution commands: run, status, logs, interrupt + top-level exec alias."""

from __future__ import annotations

import shlex
import sys
from datetime import timedelta

import click

from opensandbox.models.execd import OutputMessage, RunCommandOpts
from opensandbox.models.execd_sync import ExecutionHandlersSync

from opensandbox_cli.client import ClientContext
from opensandbox_cli.utils import DURATION, handle_errors


@click.group("command", invoke_without_command=True)
@click.pass_context
def command_group(ctx: click.Context) -> None:
    """⚡ Execute commands in a sandbox."""
    if ctx.invoked_subcommand is None:
        click.echo(ctx.get_help())


# ---- run ------------------------------------------------------------------

def _run_command(
    obj: ClientContext,
    sandbox_id: str,
    command: tuple[str, ...],
    background: bool,
    workdir: str | None,
    timeout: timedelta | None,
) -> None:
    """Shared implementation for 'command run' and top-level 'exec'."""
    cmd_str = " ".join(shlex.quote(arg) for arg in command)
    sandbox = obj.connect_sandbox(sandbox_id)

    try:
        opts = RunCommandOpts(
            background=background,
            working_directory=workdir,
            timeout=timeout,
        )

        if background:
            execution = sandbox.commands.run(cmd_str, opts=opts)
            obj.output.success_panel(
                {
                    "execution_id": execution.id,
                    "sandbox_id": sandbox_id,
                    "mode": "background",
                },
                title="Background Command Started",
            )
            return

        # Foreground: stream stdout/stderr to terminal
        last_text = ""

        def on_stdout(msg: OutputMessage) -> None:
            nonlocal last_text
            last_text = msg.text
            sys.stdout.write(msg.text)
            sys.stdout.flush()

        def on_stderr(msg: OutputMessage) -> None:
            nonlocal last_text
            last_text = msg.text
            sys.stderr.write(msg.text)
            sys.stderr.flush()

        handlers = ExecutionHandlersSync(on_stdout=on_stdout, on_stderr=on_stderr)
        execution = sandbox.commands.run(cmd_str, opts=opts, handlers=handlers)

        # Ensure terminal prompt starts on a new line
        if last_text and not last_text.endswith("\n"):
            sys.stdout.write("\n")
            sys.stdout.flush()

        if execution.error:
            obj.output.error_panel(
                f"{execution.error.name}: {execution.error.value}",
                title="Execution Error",
            )
            sys.exit(1)
    finally:
        sandbox.close()


@command_group.command("run")
@click.argument("sandbox_id")
@click.argument("command", nargs=-1, required=True)
@click.option("-d", "--background", is_flag=True, default=False, help="Run in background.")
@click.option("-w", "--workdir", default=None, help="Working directory.")
@click.option("-t", "--timeout", type=DURATION, default=None, help="Command timeout (e.g. 30s, 5m).")
@click.pass_obj
@handle_errors
def command_run(
    obj: ClientContext,
    sandbox_id: str,
    command: tuple[str, ...],
    background: bool,
    workdir: str | None,
    timeout: timedelta | None,
) -> None:
    """Run a command in a sandbox."""
    _run_command(obj, sandbox_id, command, background, workdir, timeout)


# ---- status ---------------------------------------------------------------

@command_group.command("status")
@click.argument("sandbox_id")
@click.argument("execution_id")
@click.pass_obj
@handle_errors
def command_status(obj: ClientContext, sandbox_id: str, execution_id: str) -> None:
    """Get command execution status."""
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        status = sandbox.commands.get_command_status(execution_id)
        obj.output.print_model(status, title="Command Status")
    finally:
        sandbox.close()


# ---- logs -----------------------------------------------------------------

@command_group.command("logs")
@click.argument("sandbox_id")
@click.argument("execution_id")
@click.option("--cursor", type=int, default=None, help="Cursor for incremental reads.")
@click.pass_obj
@handle_errors
def command_logs(
    obj: ClientContext, sandbox_id: str, execution_id: str, cursor: int | None
) -> None:
    """Get background command logs."""
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        logs = sandbox.commands.get_background_command_logs(execution_id, cursor=cursor)
        if obj.output.fmt in ("json", "yaml"):
            obj.output.print_model(logs, title="Command Logs")
        else:
            click.echo(logs.content)
    finally:
        sandbox.close()


# ---- interrupt ------------------------------------------------------------

@command_group.command("interrupt")
@click.argument("sandbox_id")
@click.argument("execution_id")
@click.pass_obj
@handle_errors
def command_interrupt(obj: ClientContext, sandbox_id: str, execution_id: str) -> None:
    """Interrupt a running command."""
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        sandbox.commands.interrupt(execution_id)
        obj.output.success(f"Interrupted: {execution_id}")
    finally:
        sandbox.close()


# ---- top-level exec alias ------------------------------------------------

@click.command("exec")
@click.argument("sandbox_id")
@click.argument("command", nargs=-1, required=True)
@click.option("-d", "--background", is_flag=True, default=False, help="Run in background.")
@click.option("-w", "--workdir", default=None, help="Working directory.")
@click.option("-t", "--timeout", type=DURATION, default=None, help="Command timeout (e.g. 30s, 5m).")
@click.pass_obj
@handle_errors
def exec_cmd(
    obj: ClientContext,
    sandbox_id: str,
    command: tuple[str, ...],
    background: bool,
    workdir: str | None,
    timeout: timedelta | None,
) -> None:
    """🚀 Execute a command in a sandbox (shortcut for 'command run')."""
    _run_command(obj, sandbox_id, command, background, workdir, timeout)


================================================
FILE: cli/src/opensandbox_cli/commands/config_cmd.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Config management commands: init, show, set."""

from __future__ import annotations

from pathlib import Path

import click

from opensandbox_cli.client import ClientContext
from opensandbox_cli.config import DEFAULT_CONFIG_PATH, init_config_file
from opensandbox_cli.utils import handle_errors


@click.group("config", invoke_without_command=True)
@click.pass_context
def config_group(ctx: click.Context) -> None:
    """⚙️  Manage CLI configuration."""
    if ctx.invoked_subcommand is None:
        click.echo(ctx.get_help())


# ---- init -----------------------------------------------------------------

@config_group.command("init")
@click.option("--force", is_flag=True, default=False, help="Overwrite existing config file.")
@click.option("--path", "config_path", type=click.Path(path_type=Path), default=None, help="Config file path.")
@handle_errors
def config_init(force: bool, config_path: Path | None) -> None:
    """Create a default configuration file."""
    # config_init doesn't have @click.pass_obj, get formatter from context
    ctx = click.get_current_context(silent=True)
    obj = getattr(ctx, "obj", None) if ctx else None
    output = getattr(obj, "output", None) if obj else None

    try:
        path = init_config_file(config_path, force=force)
        if output:
            output.success(f"Config file created: {path}")
        else:
            click.echo(f"Config file created: {path}")
    except FileExistsError as exc:
        if output:
            output.warning(str(exc))
        else:
            click.secho(str(exc), fg="yellow", err=True)


# ---- show -----------------------------------------------------------------

@config_group.command("show")
@click.pass_obj
@handle_errors
def config_show(obj: ClientContext) -> None:
    """Show the resolved configuration."""
    obj.output.print_dict(obj.resolved_config, title="Resolved Configuration")


# ---- set ------------------------------------------------------------------

@config_group.command("set")
@click.argument("key")
@click.argument("value")
@click.option("--path", "config_path", type=click.Path(path_type=Path), default=None, help="Config file path.")
@handle_errors
def config_set(key: str, value: str, config_path: Path | None) -> None:
    """Set a configuration value (e.g. 'connection.domain' 'localhost:9090')."""
    path = config_path or DEFAULT_CONFIG_PATH
    if not path.exists():
        click.secho(f"Config file not found: {path}. Run 'osb config init' first.", fg="red", err=True)
        return

    content = path.read_text()

    # Simple key replacement in TOML
    # Supports dotted keys like connection.domain
    parts = key.split(".", 1)
    if len(parts) == 2:
        section, field = parts
        # Try to find and update existing value
        import re

        section_pattern = rf"(\[{re.escape(section)}\].*?)(?=\n\[|\Z)"
        section_match = re.search(section_pattern, content, re.DOTALL)

        # Infer TOML value type: bool > int > float > string
        def _toml_value(raw: str) -> str:
            if raw.lower() in ("true", "false"):
                return raw.lower()
            try:
                int(raw)
                return raw
            except ValueError:
                pass
            try:
                float(raw)
                return raw
            except ValueError:
                pass
            return f'"{raw}"'

        toml_val = _toml_value(value)

        if section_match:
            section_text = section_match.group(1)
            field_pattern = rf'^(#?\s*{re.escape(field)}\s*=\s*).*$'
            field_match = re.search(field_pattern, section_text, re.MULTILINE)
            if field_match:
                new_line = f'{field} = {toml_val}'
                new_section = section_text[:field_match.start()] + new_line + section_text[field_match.end():]
                content = content[:section_match.start()] + new_section + content[section_match.end():]
            else:
                # Add field to section
                insert_pos = section_match.end()
                content = content[:insert_pos] + f'\n{field} = {toml_val}' + content[insert_pos:]
        else:
            # Add new section
            content += f'\n[{section}]\n{field} = {toml_val}\n'
    else:
        click.secho("Key must be in 'section.field' format (e.g. connection.domain).", fg="red", err=True)
        return

    path.write_text(content)

    # config_set doesn't have @click.pass_obj, get formatter from context
    ctx = click.get_current_context(silent=True)
    obj = getattr(ctx, "obj", None) if ctx else None
    output = getattr(obj, "output", None) if obj else None
    if output:
        output.success(f"Set {key} = {value}")
    else:
        click.echo(f"Set {key} = {value}")


================================================
FILE: cli/src/opensandbox_cli/commands/file.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""File operation commands: cat, write, upload, download, rm, mv, mkdir, rmdir, search, info, chmod, replace."""

from __future__ import annotations

import sys
from pathlib import Path

import click

from opensandbox_cli.client import ClientContext
from opensandbox_cli.utils import handle_errors


@click.group("file", invoke_without_command=True)
@click.pass_context
def file_group(ctx: click.Context) -> None:
    """📁 File operations on a sandbox."""
    if ctx.invoked_subcommand is None:
        click.echo(ctx.get_help())


# ---- cat (read) -----------------------------------------------------------

@file_group.command("cat")
@click.argument("sandbox_id")
@click.argument("path")
@click.option("--encoding", default="utf-8", help="File encoding.")
@click.pass_obj
@handle_errors
def file_cat(obj: ClientContext, sandbox_id: str, path: str, encoding: str) -> None:
    """Read a file from the sandbox."""
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        content = sandbox.files.read_file(path, encoding=encoding)
        click.echo(content, nl=False)
    finally:
        sandbox.close()


# ---- write ----------------------------------------------------------------

@file_group.command("write")
@click.argument("sandbox_id")
@click.argument("path")
@click.option("--content", "-c", default=None, help="Content to write. Reads from stdin if not provided.")
@click.option("--encoding", default="utf-8", help="File encoding.")
@click.option("--mode", default=None, help="File permission mode (e.g. 0644).")
@click.option("--owner", default=None, help="File owner.")
@click.option("--group", default=None, help="File group.")
@click.pass_obj
@handle_errors
def file_write(
    obj: ClientContext,
    sandbox_id: str,
    path: str,
    content: str | None,
    encoding: str,
    mode: str | None,
    owner: str | None,
    group: str | None,
) -> None:
    """Write content to a file in the sandbox."""
    if content is None:
        if sys.stdin.isatty():
            click.echo("Reading from stdin (Ctrl+D to finish):", err=True)
        content = sys.stdin.read()

    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        kwargs: dict = {"encoding": encoding}
        if mode is not None:
            kwargs["mode"] = mode
        if owner is not None:
            kwargs["owner"] = owner
        if group is not None:
            kwargs["group"] = group
        sandbox.files.write_file(path, content, **kwargs)
        obj.output.success(f"Written: {path}")
    finally:
        sandbox.close()


# ---- upload ---------------------------------------------------------------

@file_group.command("upload")
@click.argument("sandbox_id")
@click.argument("local_path", type=click.Path(exists=True))
@click.argument("remote_path")
@click.pass_obj
@handle_errors
def file_upload(
    obj: ClientContext, sandbox_id: str, local_path: str, remote_path: str
) -> None:
    """Upload a local file to the sandbox."""
    data = Path(local_path).read_bytes()
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        sandbox.files.write_file(remote_path, data)
        obj.output.success(f"Uploaded: {local_path} → {remote_path}")
    finally:
        sandbox.close()


# ---- download -------------------------------------------------------------

@file_group.command("download")
@click.argument("sandbox_id")
@click.argument("remote_path")
@click.argument("local_path", type=click.Path())
@click.pass_obj
@handle_errors
def file_download(
    obj: ClientContext, sandbox_id: str, remote_path: str, local_path: str
) -> None:
    """Download a file from the sandbox to local disk."""
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        content = sandbox.files.read_bytes(remote_path)
        Path(local_path).write_bytes(content)
        obj.output.success(f"Downloaded: {remote_path} → {local_path}")
    finally:
        sandbox.close()


# ---- rm (delete) ----------------------------------------------------------

@file_group.command("rm")
@click.argument("sandbox_id")
@click.argument("paths", nargs=-1, required=True)
@click.pass_obj
@handle_errors
def file_rm(obj: ClientContext, sandbox_id: str, paths: tuple[str, ...]) -> None:
    """Delete files from the sandbox."""
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        sandbox.files.delete_files(list(paths))
        for p in paths:
            obj.output.success(f"Deleted: {p}")
    finally:
        sandbox.close()


# ---- mv (move) ------------------------------------------------------------

@file_group.command("mv")
@click.argument("sandbox_id")
@click.argument("source")
@click.argument("destination")
@click.pass_obj
@handle_errors
def file_mv(
    obj: ClientContext, sandbox_id: str, source: str, destination: str
) -> None:
    """Move/rename a file in the sandbox."""
    from opensandbox.models.filesystem import MoveEntry

    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        sandbox.files.move_files([MoveEntry(source=source, destination=destination)])
        obj.output.success(f"Moved: {source} → {destination}")
    finally:
        sandbox.close()


# ---- mkdir ----------------------------------------------------------------

@file_group.command("mkdir")
@click.argument("sandbox_id")
@click.argument("paths", nargs=-1, required=True)
@click.option("--mode", default=None, help="Directory permission mode.")
@click.option("--owner", default=None, help="Directory owner.")
@click.option("--group", default=None, help="Directory group.")
@click.pass_obj
@handle_errors
def file_mkdir(
    obj: ClientContext,
    sandbox_id: str,
    paths: tuple[str, ...],
    mode: str | None,
    owner: str | None,
    group: str | None,
) -> None:
    """Create directories in the sandbox."""
    from opensandbox.models.filesystem import WriteEntry

    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        entries = []
        for p in paths:
            kwargs: dict = {"path": p}
            if mode is not None:
                kwargs["mode"] = mode
            if owner is not None:
                kwargs["owner"] = owner
            if group is not None:
                kwargs["group"] = group
            entries.append(WriteEntry(**kwargs))
        sandbox.files.create_directories(entries)
        for p in paths:
            obj.output.success(f"Created: {p}")
    finally:
        sandbox.close()


# ---- rmdir ----------------------------------------------------------------

@file_group.command("rmdir")
@click.argument("sandbox_id")
@click.argument("paths", nargs=-1, required=True)
@click.pass_obj
@handle_errors
def file_rmdir(obj: ClientContext, sandbox_id: str, paths: tuple[str, ...]) -> None:
    """Delete directories from the sandbox."""
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        sandbox.files.delete_directories(list(paths))
        for p in paths:
            obj.output.success(f"Removed: {p}")
    finally:
        sandbox.close()


# ---- search ---------------------------------------------------------------

@file_group.command("search")
@click.argument("sandbox_id")
@click.argument("path")
@click.option("--pattern", "-p", required=True, help="Glob pattern to search for.")
@click.pass_obj
@handle_errors
def file_search(
    obj: ClientContext, sandbox_id: str, path: str, pattern: str
) -> None:
    """Search for files in the sandbox."""
    from opensandbox.models.filesystem import SearchEntry

    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        results = sandbox.files.search(SearchEntry(path=path, pattern=pattern))
        if not results:
            if obj.output.fmt in ("json", "yaml"):
                obj.output.print_models([], columns=[])
            else:
                obj.output.info("No files found.")
            return
        if obj.output.fmt in ("json", "yaml"):
            obj.output.print_models(results, columns=["path", "size", "mode", "owner", "modified_at"])
        else:
            obj.output.print_models(results, columns=["path", "size", "owner"], title="Search Results")
    finally:
        sandbox.close()


# ---- info (stat) ----------------------------------------------------------

@file_group.command("info")
@click.argument("sandbox_id")
@click.argument("paths", nargs=-1, required=True)
@click.pass_obj
@handle_errors
def file_info(obj: ClientContext, sandbox_id: str, paths: tuple[str, ...]) -> None:
    """Get file/directory info."""
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        info_map = sandbox.files.get_file_info(list(paths))
        for path, entry in info_map.items():
            obj.output.print_dict(
                {"path": path, **entry.model_dump(mode="json")},
                title=path,
            )
    finally:
        sandbox.close()


# ---- chmod ----------------------------------------------------------------

@file_group.command("chmod")
@click.argument("sandbox_id")
@click.argument("path")
@click.option("--mode", required=True, help="Permission mode (e.g. 0755).")
@click.option("--owner", default=None, help="File owner.")
@click.option("--group", default=None, help="File group.")
@click.pass_obj
@handle_errors
def file_chmod(
    obj: ClientContext,
    sandbox_id: str,
    path: str,
    mode: str,
    owner: str | None,
    group: str | None,
) -> None:
    """Set file permissions."""
    from opensandbox.models.filesystem import SetPermissionEntry

    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        sandbox.files.set_permissions(
            [SetPermissionEntry(path=path, mode=mode, owner=owner, group=group)]
        )
        obj.output.success(f"Permissions set: {path}")
    finally:
        sandbox.close()


# ---- replace --------------------------------------------------------------

@file_group.command("replace")
@click.argument("sandbox_id")
@click.argument("path")
@click.option("--old", required=True, help="Text to search for.")
@click.option("--new", required=True, help="Replacement text.")
@click.pass_obj
@handle_errors
def file_replace(
    obj: ClientContext, sandbox_id: str, path: str, old: str, new: str
) -> None:
    """Replace content in a file."""
    from opensandbox.models.filesystem import ContentReplaceEntry

    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        sandbox.files.replace_contents(
            [ContentReplaceEntry(path=path, old_content=old, new_content=new)]
        )
        obj.output.success(f"Replaced in: {path}")
    finally:
        sandbox.close()


================================================
FILE: cli/src/opensandbox_cli/commands/sandbox.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Sandbox lifecycle commands: create, list, get, kill, pause, resume, renew, endpoint, health, metrics."""

from __future__ import annotations

import json
from datetime import timedelta

import click

from opensandbox.models.sandboxes import NetworkPolicy, SandboxFilter

from opensandbox_cli.client import ClientContext
from opensandbox_cli.utils import DURATION, KEY_VALUE, handle_errors


@click.group("sandbox", invoke_without_command=True)
@click.pass_context
def sandbox_group(ctx: click.Context) -> None:
    """📦 Manage sandbox lifecycle."""
    if ctx.invoked_subcommand is None:
        click.echo(ctx.get_help())


# Alias: osb sb ...
sandbox_group.name = "sandbox"


# ---- create ---------------------------------------------------------------

@sandbox_group.command("create")
@click.option("--image", "-i", required=True, help="Container image (e.g. python:3.11).")
@click.option("--timeout", "-t", "timeout", type=DURATION, default=None, help="Sandbox lifetime (e.g. 10m, 1h).")
@click.option("--env", "-e", "envs", multiple=True, type=KEY_VALUE, help="Environment variable (KEY=VALUE). Repeatable.")
@click.option("--metadata", "-m", "metadata_kv", multiple=True, type=KEY_VALUE, help="Metadata (KEY=VALUE). Repeatable.")
@click.option("--resource", "resources_kv", multiple=True, type=KEY_VALUE, help="Resource limit (e.g. cpu=1 memory=2Gi). Repeatable.")
@click.option("--entrypoint", default=None, help="Entrypoint command (JSON array or shell string).")
@click.option("--network-policy-file", type=click.Path(exists=True), default=None, help="Network policy JSON file.")
@click.option("--skip-health-check", is_flag=True, default=False, help="Skip waiting for sandbox readiness.")
@click.option("--ready-timeout", type=DURATION, default=None, help="Max wait time for sandbox readiness (e.g. 30s).")
@click.pass_obj
@handle_errors
def sandbox_create(
    obj: ClientContext,
    image: str,
    timeout: timedelta | None,
    envs: tuple[tuple[str, str], ...],
    metadata_kv: tuple[tuple[str, str], ...],
    resources_kv: tuple[tuple[str, str], ...],
    entrypoint: str | None,
    network_policy_file: str | None,
    skip_health_check: bool,
    ready_timeout: timedelta | None,
) -> None:
    """Create a new sandbox."""
    from opensandbox.sync.sandbox import SandboxSync

    kwargs: dict = {
        "connection_config": obj.connection_config,
        "skip_health_check": skip_health_check,
    }
    if timeout is not None:
        kwargs["timeout"] = timeout
    if ready_timeout is not None:
        kwargs["ready_timeout"] = ready_timeout
    if envs:
        kwargs["env"] = dict(envs)
    if metadata_kv:
        kwargs["metadata"] = dict(metadata_kv)
    if resources_kv:
        kwargs["resource"] = dict(resources_kv)
    if entrypoint:
        try:
            kwargs["entrypoint"] = json.loads(entrypoint)
        except json.JSONDecodeError:
            kwargs["entrypoint"] = ["sh", "-c", entrypoint]
    if network_policy_file:
        with open(network_policy_file) as f:
            kwargs["network_policy"] = NetworkPolicy(**json.load(f))

    with obj.output.spinner("Creating sandbox..."):
        sandbox = SandboxSync.create(image, **kwargs)
    obj.output.success_panel(
        {"id": sandbox.id, "image": image, "status": "created"},
        title="Sandbox Created",
    )


# ---- list -----------------------------------------------------------------

@sandbox_group.command("list")
@click.option("--state", "-s", "states", multiple=True, help="Filter by state (Pending, Running, Paused, ...). Repeatable.")
@click.option("--metadata", "-m", "metadata_kv", multiple=True, type=KEY_VALUE, help="Metadata filter (KEY=VALUE). Repeatable.")
@click.option("--page", type=int, default=None, help="Page number (0-indexed).")
@click.option("--page-size", type=int, default=None, help="Items per page.")
@click.pass_obj
@handle_errors
def sandbox_list(
    obj: ClientContext,
    states: tuple[str, ...],
    metadata_kv: tuple[tuple[str, str], ...],
    page: int | None,
    page_size: int | None,
) -> None:
    """List sandboxes."""
    mgr = obj.get_manager()
    filt = SandboxFilter(
        states=list(states) if states else None,
        metadata=dict(metadata_kv) if metadata_kv else None,
        page=page,
        page_size=page_size,
    )
    with obj.output.spinner("Fetching sandboxes..."):
        result = mgr.list_sandbox_infos(filt)
    if not result.sandbox_infos:
        if obj.output.fmt in ("json", "yaml"):
            obj.output.print_rows(
                [], columns=["id", "status", "image", "created_at", "expires_at"],
                title="Sandboxes",
            )
        else:
            obj.output.info("No sandboxes found.")
        return

    raw_rows = [info.model_dump(mode="json") for info in result.sandbox_infos]

    # For machine-readable formats, preserve the original structure
    if obj.output.fmt in ("json", "yaml"):
        obj.output.print_rows(
            raw_rows,
            columns=["id", "status", "image", "created_at", "expires_at"],
            title="Sandboxes",
        )
        return

    # Flatten nested status/image objects for clean table display
    rows = []
    for d in raw_rows:
        flat = dict(d)
        status_val = flat.get("status")
        if isinstance(status_val, dict):
            flat["status"] = status_val.get("state", str(status_val))
        image_val = flat.get("image")
        if isinstance(image_val, dict):
            flat["image"] = image_val.get("image", str(image_val))
        rows.append(flat)

    obj.output.print_rows(
        rows,
        columns=["id", "status", "image", "created_at", "expires_at"],
        title="Sandboxes",
    )


# ---- get ------------------------------------------------------------------

@sandbox_group.command("get")
@click.argument("sandbox_id")
@click.pass_obj
@handle_errors
def sandbox_get(obj: ClientContext, sandbox_id: str) -> None:
    """Get sandbox details."""
    sandbox_id = obj.resolve_sandbox_id(sandbox_id)
    mgr = obj.get_manager()
    info = mgr.get_sandbox_info(sandbox_id)
    d = info.model_dump(mode="json")

    # For machine-readable formats, preserve the original structure
    if obj.output.fmt in ("json", "yaml"):
        obj.output.print_dict(d, title="Sandbox Info")
        return

    # Flatten nested objects for clean table display
    status_val = d.get("status")
    if isinstance(status_val, dict):
        d["status"] = status_val.get("state", str(status_val))
        if status_val.get("reason"):
            d["status_reason"] = status_val["reason"]
        if status_val.get("message"):
            d["status_message"] = status_val["message"]
    image_val = d.get("image")
    if isinstance(image_val, dict):
        d["image"] = image_val.get("image", str(image_val))
    obj.output.print_dict(d, title="Sandbox Info")


# ---- kill -----------------------------------------------------------------

@sandbox_group.command("kill")
@click.argument("sandbox_ids", nargs=-1, required=True)
@click.pass_obj
@handle_errors
def sandbox_kill(obj: ClientContext, sandbox_ids: tuple[str, ...]) -> None:
    """Terminate one or more sandboxes."""
    mgr = obj.get_manager()
    for sid in sandbox_ids:
        resolved = obj.resolve_sandbox_id(sid)
        with obj.output.spinner(f"Killing sandbox {resolved}..."):
            mgr.kill_sandbox(resolved)
        obj.output.success(f"Sandbox terminated: {resolved}")


# ---- pause ----------------------------------------------------------------

@sandbox_group.command("pause")
@click.argument("sandbox_id")
@click.pass_obj
@handle_errors
def sandbox_pause(obj: ClientContext, sandbox_id: str) -> None:
    """Pause a running sandbox."""
    sandbox_id = obj.resolve_sandbox_id(sandbox_id)
    mgr = obj.get_manager()
    with obj.output.spinner("Pausing sandbox..."):
        mgr.pause_sandbox(sandbox_id)
    obj.output.success(f"Sandbox paused: {sandbox_id}")


# ---- resume ---------------------------------------------------------------

@sandbox_group.command("resume")
@click.argument("sandbox_id")
@click.pass_obj
@handle_errors
def sandbox_resume(obj: ClientContext, sandbox_id: str) -> None:
    """Resume a paused sandbox."""
    sandbox_id = obj.resolve_sandbox_id(sandbox_id)
    mgr = obj.get_manager()
    with obj.output.spinner("Resuming sandbox..."):
        mgr.resume_sandbox(sandbox_id)
    obj.output.success(f"Sandbox resumed: {sandbox_id}")


# ---- renew ----------------------------------------------------------------

@sandbox_group.command("renew")
@click.argument("sandbox_id")
@click.option("--timeout", "-t", required=True, type=DURATION, help="New TTL duration (e.g. 30m, 2h).")
@click.pass_obj
@handle_errors
def sandbox_renew(obj: ClientContext, sandbox_id: str, timeout: timedelta) -> None:
    """Renew sandbox expiration."""
    sandbox_id = obj.resolve_sandbox_id(sandbox_id)
    mgr = obj.get_manager()
    with obj.output.spinner("Renewing sandbox..."):
        resp = mgr.renew_sandbox(sandbox_id, timeout)
    obj.output.success_panel(
        {"sandbox_id": sandbox_id, "expires_at": str(resp.expires_at)},
        title="Sandbox Renewed",
    )


# ---- endpoint -------------------------------------------------------------

@sandbox_group.command("endpoint")
@click.argument("sandbox_id")
@click.option("--port", "-p", required=True, type=int, help="Port number.")
@click.pass_obj
@handle_errors
def sandbox_endpoint(obj: ClientContext, sandbox_id: str, port: int) -> None:
    """Get the public endpoint for a sandbox port."""
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        ep = sandbox.get_endpoint(port)
        obj.output.print_model(ep, title="Sandbox Endpoint")
    finally:
        sandbox.close()


# ---- health ---------------------------------------------------------------

@sandbox_group.command("health")
@click.argument("sandbox_id")
@click.pass_obj
@handle_errors
def sandbox_health(obj: ClientContext, sandbox_id: str) -> None:
    """Check sandbox health."""
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        healthy = sandbox.is_healthy()
        if obj.output.fmt == "table":
            if healthy:
                obj.output.success(f"Sandbox {sandbox_id} is healthy")
            else:
                obj.output.error(f"Sandbox {sandbox_id} is unhealthy")
        else:
            obj.output.print_dict(
                {"sandbox_id": sandbox_id, "healthy": healthy},
                title="Health Check",
            )
    finally:
        sandbox.close()


# ---- metrics --------------------------------------------------------------

@sandbox_group.command("metrics")
@click.argument("sandbox_id")
@click.pass_obj
@handle_errors
def sandbox_metrics(obj: ClientContext, sandbox_id: str) -> None:
    """Get sandbox resource metrics."""
    sandbox = obj.connect_sandbox(sandbox_id)
    try:
        m = sandbox.get_metrics()
        obj.output.print_model(m, title="Sandbox Metrics")
    finally:
        sandbox.close()


================================================
FILE: cli/src/opensandbox_cli/config.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""CLI configuration loading and management.

Priority (highest to lowest):
  1. CLI flags
  2. Environment variables
  3. Config file (~/.opensandbox/config.toml)
  4. SDK defaults
"""

from __future__ import annotations

import os
import sys
from pathlib import Path
from typing import Any

if sys.version_info >= (3, 11):
    import tomllib
else:
    try:
        import tomli as tomllib  # type: ignore[no-redef]
    except ModuleNotFoundError:  # pragma: no cover
        tomllib = None  # type: ignore[assignment]


DEFAULT_CONFIG_DIR = Path.home() / ".opensandbox"
DEFAULT_CONFIG_PATH = DEFAULT_CONFIG_DIR / "config.toml"

DEFAULT_CONFIG_TEMPLATE = """\
# OpenSandbox CLI configuration
# Priority: CLI flags > environment variables > this file > SDK defaults

[connection]
# api_key = "your-api-key"
# domain = "localhost:8080"
# protocol = "http"
# request_timeout = 30

[output]
# format = "table"    # table | json | yaml
# color = true

[defaults]
# image = "python:3.11"
# timeout = "10m"
"""


def load_config_file(config_path: Path | None = None) -> dict[str, Any]:
    """Load and parse the TOML config file.

    Returns an empty dict if the file doesn't exist or tomllib is unavailable.
    """
    path = config_path or DEFAULT_CONFIG_PATH
    if not path.exists():
        return {}
    if tomllib is None:
        return {}
    with open(path, "rb") as f:
        return tomllib.load(f)


def resolve_config(
    *,
    cli_api_key: str | None = None,
    cli_domain: str | None = None,
    cli_protocol: str | None = None,
    cli_timeout: int | None = None,
    cli_output: str | None = None,
    config_path: Path | None = None,
) -> dict[str, Any]:
    """Merge config from all sources and return a flat dict.

    Keys returned:
      - api_key, domain, protocol, request_timeout (int seconds)
      - output_format ("table" | "json" | "yaml")
      - default_image, default_timeout (str like "10m")
    """
    file_cfg = load_config_file(config_path)
    conn = file_cfg.get("connection", {})
    output_cfg = file_cfg.get("output", {})
    defaults = file_cfg.get("defaults", {})

    return {
        "api_key": cli_api_key
        or os.getenv("OPEN_SANDBOX_API_KEY")
        or conn.get("api_key"),
        "domain": cli_domain
        or os.getenv("OPEN_SANDBOX_DOMAIN")
        or conn.get("domain"),
        "protocol": cli_protocol
        or os.getenv("OPEN_SANDBOX_PROTOCOL")
        or conn.get("protocol")
        or "http",
        "request_timeout": cli_timeout
        or _int_or_none(os.getenv("OPEN_SANDBOX_REQUEST_TIMEOUT"))
        or conn.get("request_timeout")
        or 30,
        "output_format": cli_output
        or os.getenv("OPEN_SANDBOX_OUTPUT")
        or output_cfg.get("format")
        or "table",
        "color": output_cfg.get("color", True),
        "default_image": defaults.get("image"),
        "default_timeout": defaults.get("timeout"),
    }


def init_config_file(config_path: Path | None = None, *, force: bool = False) -> Path:
    """Create a default config file. Returns the path written."""
    path = config_path or DEFAULT_CONFIG_PATH
    if path.exists() and not force:
        raise FileExistsError(
            f"Config file already exists at {path}. Use --force to overwrite."
        )
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(DEFAULT_CONFIG_TEMPLATE)
    return path


def _int_or_none(value: str | None) -> int | None:
    if value is None:
        return None
    try:
        return int(value)
    except ValueError:
        return None


================================================
FILE: cli/src/opensandbox_cli/main.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Root Click group with global options."""

from __future__ import annotations

from pathlib import Path

import click
from rich.console import Console
from rich.text import Text

from opensandbox_cli import __version__
from opensandbox_cli.client import ClientContext
from opensandbox_cli.commands.code import code_group
from opensandbox_cli.commands.command import command_group, exec_cmd
from opensandbox_cli.commands.config_cmd import config_group
from opensandbox_cli.commands.file import file_group
from opensandbox_cli.commands.sandbox import sandbox_group
from opensandbox_cli.config import resolve_config
from opensandbox_cli.output import OutputFormatter

# ---------------------------------------------------------------------------
# Banner
# ---------------------------------------------------------------------------

BANNER = r"""[bold cyan]
   ____                   _____                 _ _
  / __ \                 / ____|               | | |
 | |  | |_ __   ___ _ _| (___   __ _ _ __   __| | |__   _____  __
 | |  | | '_ \ / _ \ '_ \___ \ / _` | '_ \ / _` | '_ \ / _ \ \/ /
 | |__| | |_) |  __/ | | |___) | (_| | | | | (_| | |_) | (_) >  <
  \____/| .__/ \___|_| |_|____/ \__,_|_| |_|\__,_|_.__/ \___/_/\_\
        | |
        |_|[/]  [dim]v{version}[/]
"""


class BannerGroup(click.Group):
    """Custom Click group that shows a banner before help text."""

    def format_help(self, ctx: click.Context, formatter: click.HelpFormatter) -> None:
        console = Console(stderr=False)
        console.print(BANNER.format(version=__version__))
        super().format_help(ctx, formatter)


@click.group(cls=BannerGroup, context_settings={"help_option_names": ["-h", "--help"]})
@click.option("--api-key", envvar="OPEN_SANDBOX_API_KEY", default=None, help="API key for authentication.")
@click.option("--domain", envvar="OPEN_SANDBOX_DOMAIN", default=None, help="API server domain (e.g. localhost:8080).")
@click.option("--protocol", type=click.Choice(["http", "https"]), default=None, help="Protocol (http/https).")
@click.option("--timeout", "request_timeout", type=int, default=None, help="Request timeout in seconds.")
@click.option("-o", "--output", "output_format", type=click.Choice(["table", "json", "yaml"]), default=None, help="Output format.")
@click.option("--config", "config_path", type=click.Path(exists=False, path_type=Path), default=None, help="Config file path.")
@click.option("-v", "--verbose", is_flag=True, default=False, help="Enable verbose/debug output.")
@click.option("--no-color", is_flag=True, default=False, help="Disable colored output.")
@click.version_option(version=__version__, prog_name="opensandbox")
@click.pass_context
def cli(
    ctx: click.Context,
    api_key: str | None,
    domain: str | None,
    protocol: str | None,
    request_timeout: int | None,
    output_format: str | None,
    config_path: Path | None,
    verbose: bool,
    no_color: bool,
) -> None:
    """OpenSandbox CLI — manage sandboxes from your terminal."""
    if verbose:
        import logging

        logging.basicConfig(level=logging.DEBUG)

    resolved = resolve_config(
        cli_api_key=api_key,
        cli_domain=domain,
        cli_protocol=protocol,
        cli_timeout=request_timeout,
        cli_output=output_format,
        config_path=config_path,
    )

    formatter = OutputFormatter(
        resolved["output_format"],
        color=not no_color and resolved.get("color", True),
    )

    ctx.obj = ClientContext(resolved_config=resolved, output=formatter)
    ctx.call_on_close(lambda: ctx.obj.close())


# Register sub-command groups
cli.add_command(sandbox_group)
cli.add_command(command_group)
cli.add_command(exec_cmd)
cli.add_command(file_group)
cli.add_command(code_group)
cli.add_command(config_group)


================================================
FILE: cli/src/opensandbox_cli/output.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Output formatting: table (rich), JSON, YAML."""

from __future__ import annotations

import json
import sys
from contextlib import contextmanager
from typing import Any, Generator, Sequence

import click

try:
    import yaml
except ImportError:  # pragma: no cover
    yaml = None  # type: ignore[assignment]

from pydantic import BaseModel
from rich import box
from rich.console import Console
from rich.panel import Panel
from rich.status import Status
from rich.table import Table
from rich.text import Text

# ---------------------------------------------------------------------------
# Status badge styling  (sandbox state → color + icon)
# ---------------------------------------------------------------------------

_STATUS_STYLES: dict[str, tuple[str, str]] = {
    # state → (rich style, icon)
    "running": ("bold green", "●"),
    "ready": ("bold green", "●"),
    "healthy": ("bold green", "●"),
    "pending": ("bold yellow", "◐"),
    "creating": ("bold yellow", "◐"),
    "starting": ("bold yellow", "◐"),
    "paused": ("bold blue", "⏸"),
    "stopped": ("dim", "○"),
    "terminated": ("dim", "○"),
    "killed": ("dim", "○"),
    "error": ("bold red", "✗"),
    "failed": ("bold red", "✗"),
    "unhealthy": ("bold red", "✗"),
    "created": ("bold cyan", "✦"),
}

# Columns that contain status-like values
_STATUS_COLUMNS = {"status", "state", "healthy"}

# Columns that should be rendered in a dimmer style (long IDs, timestamps)
_DIM_COLUMNS = {"created_at", "expires_at", "modified_at", "updated_at"}

# Columns that are primary identifiers
_ID_COLUMNS = {"id", "sandbox_id", "execution_id", "context_id"}


def _style_value(col: str, value: str) -> Text:
    """Apply contextual styling to a cell value."""
    lower = value.lower()

    if col in _STATUS_COLUMNS:
        style, icon = _STATUS_STYLES.get(lower, ("", ""))
        if style:
            return Text(f"{icon} {value}", style=style)

    if col in _DIM_COLUMNS:
        return Text(value, style="dim")

    if col in _ID_COLUMNS:
        return Text(value, style="bold cyan")

    return Text(value)


class OutputFormatter:
    """Renders data in table / json / yaml format."""

    def __init__(self, fmt: str = "table", *, color: bool = True) -> None:
        self.fmt = fmt
        self.color = color
        self.console = Console(
            stderr=False, no_color=not color, force_terminal=None
        )
        self._err_console = Console(
            stderr=True, no_color=not color, force_terminal=None
        )

    # ------------------------------------------------------------------
    # Status messages with icons
    # ------------------------------------------------------------------

    def success(self, msg: str) -> None:
        """Print a success message with ✅ icon."""
        if self.color:
            self.console.print(f"  [bold green]✅ {msg}[/]")
        else:
            click.echo(f"OK: {msg}")

    def info(self, msg: str) -> None:
        """Print an info message with ℹ️  icon."""
        if self.color:
            self.console.print(f"  [bold blue]ℹ️  {msg}[/]")
        else:
            click.echo(f"INFO: {msg}")

    def warning(self, msg: str) -> None:
        """Print a warning message with ⚠️  icon."""
        if self.color:
            self._err_console.print(f"  [bold yellow]⚠️  {msg}[/]")
        else:
            click.echo(f"WARN: {msg}", err=True)

    def error(self, msg: str) -> None:
        """Print an error message with ❌ icon."""
        if self.color:
            self._err_console.print(f"  [bold red]❌ {msg}[/]")
        else:
            click.echo(f"ERROR: {msg}", err=True)

    def error_panel(self, msg: str, title: str = "Error") -> None:
        """Print an error with a bold header and message."""
        if self.color:
            self._err_console.print()
            self._err_console.print(f"  [bold red]{title}[/]")
            self._err_console.print(f"  [dim]{'─' * (len(title) + 2)}[/]")
            for line in msg.splitlines():
                self._err_console.print(f"  {line}")
            self._err_console.print()
        else:
            click.echo(f"ERROR [{title}]: {msg}", err=True)

    # ------------------------------------------------------------------
    # Spinner for long-running operations
    # ------------------------------------------------------------------

    @contextmanager
    def spinner(self, msg: str) -> Generator[Status, None, None]:
        """Context manager that shows a spinner while work is in progress."""
        if self.color and self.fmt == "table":
            with self._err_console.status(f"[bold cyan]⏳ {msg}[/]", spinner="dots") as status:
                yield status
        else:
            # No spinner in non-color or non-table mode
            yield None  # type: ignore[arg-type]

    # ------------------------------------------------------------------
    # Panel output
    # ------------------------------------------------------------------

    def panel(self, content: str, *, title: str | None = None, style: str = "cyan") -> None:
        """Print content inside a styled panel."""
        if self.color:
            self.console.print(Panel(
                content,
                title=title,
                title_align="left",
                border_style=style,
                box=box.ROUNDED,
                padding=(0, 1),
            ))
        else:
            if title:
                click.echo(f"--- {title} ---")
            click.echo(content)

    def success_panel(self, data: dict[str, Any], *, title: str = "Success") -> None:
        """Print a success result with a header and indented key-value pairs."""
        if self.fmt != "table":
            if self.fmt == "json":
                self._print_json(data)
            elif self.fmt == "yaml":
                self._print_yaml(data)
            return

        if self.color:
            self.console.print()
            self.console.print(f"  [bold green]✓ {title}[/]")
            self.console.print(f"  [dim]{'─' * (len(title) + 2)}[/]")
            for k, v in data.items():
                self.console.print(f"  [bold]{k}:[/] [cyan]{v}[/]")
            self.console.print()
        else:
            click.echo(f"--- {title} ---")
            for k, v in data.items():
                click.echo(f"  {k}: {v}")

    # ------------------------------------------------------------------
    # Public helpers
    # ------------------------------------------------------------------

    def print_model(self, model: BaseModel, title: str | None = None) -> None:
        """Print a single Pydantic model as key-value panel or JSON/YAML."""
        data = _model_to_dict(model)
        if self.fmt == "json":
            self._print_json(data)
        elif self.fmt == "yaml":
            self._print_yaml(data)
        else:
            self._print_kv_table(data, title=title)

    def print_models(
        self,
        models: Sequence[BaseModel],
        columns: list[str],
        *,
        title: str | None = None,
    ) -> None:
        """Print a list of Pydantic models as a table or JSON/YAML."""
        rows = [_model_to_dict(m) for m in models]
        if self.fmt == "json":
            self._print_json(rows)
        elif self.fmt == "yaml":
            self._print_yaml(rows)
        else:
            self._print_table(rows, columns, title=title)

    def print_rows(
        self,
        rows: list[dict[str, Any]],
        columns: list[str],
        *,
        title: str | None = None,
    ) -> None:
        """Print pre-processed rows (list of dicts) as a table or JSON/YAML."""
        if self.fmt == "json":
            self._print_json(rows)
        elif self.fmt == "yaml":
            self._print_yaml(rows)
        else:
            self._print_table(rows, columns, title=title)

    def print_dict(self, data: dict[str, Any], title: str | None = None) -> None:
        """Print a flat dict."""
        if self.fmt == "json":
            self._print_json(data)
        elif self.fmt == "yaml":
            self._print_yaml(data)
        else:
            self._print_kv_table(data, title=title)

    def print_text(self, text: str) -> None:
        """Print raw text (ignores format)."""
        click.echo(text)

    # ------------------------------------------------------------------
    # Internal renderers
    # ------------------------------------------------------------------

    def _print_json(self, data: Any) -> None:
        if self.color:
            self.console.print_json(json.dumps(data, default=str))
        else:
            click.echo(json.dumps(data, indent=2, default=str))

    def _print_yaml(self, data: Any) -> None:
        if yaml is None:
            click.secho(
                "PyYAML is not installed. Use --output json instead.", fg="red", err=True
            )
            sys.exit(1)
        click.echo(yaml.dump(data, default_flow_style=False, allow_unicode=True).rstrip())

    def _print_kv_table(self, data: dict[str, Any], *, title: str | None = None) -> None:
        table = Table(
            title=title,
            show_header=True,
            header_style="bold magenta",
            title_style="bold cyan",
            box=box.ROUNDED,
            border_style="bright_black",
            padding=(0, 1),
            show_lines=True,
        )
        table.add_column("Key", style="bold cyan", no_wrap=True)
        table.add_column("Value")
        for k, v in data.items():
            val_text = _style_value(k, str(v)) if v is not None else Text("-", style="dim")
            table.add_row(str(k), val_text)
        self.console.print(table)

    def _print_table(
        self,
        rows: list[dict[str, Any]],
        columns: list[str],
        *,
        title: str | None = None,
    ) -> None:
        table = Table(
            title=title,
            show_header=True,
            header_style="bold magenta",
            title_style="bold cyan",
            box=box.ROUNDED,
            border_style="bright_black",
            padding=(0, 1),
            row_styles=["", "dim"],
        )
        for col in columns:
            style = ""
            if col in _ID_COLUMNS:
                style = "bold cyan"
            elif col in _DIM_COLUMNS:
                style = "dim"
            table.add_column(col.upper(), style=style, no_wrap=(col in _ID_COLUMNS))

        for row in rows:
            cells: list[Text | str] = []
            for col in columns:
                val = str(row.get(col, "-"))
                if col in _STATUS_COLUMNS:
                    cells.append(_style_value(col, val))
                else:
                    cells.append(val)
            table.add_row(*cells)
        self.console.print(table)


# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------


def _model_to_dict(model: BaseModel) -> dict[str, Any]:
    return model.model_dump(mode="json")


================================================
FILE: cli/src/opensandbox_cli/utils.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Shared CLI utilities: duration parsing, error handling, key-value parsing."""

from __future__ import annotations

import functools
import re
import sys
from datetime import timedelta

import click


# ---------------------------------------------------------------------------
# Duration parsing  (e.g. "10m", "1h30m", "90s", "2h")
# ---------------------------------------------------------------------------

_DURATION_RE = re.compile(
    r"^(?:(?P<hours>\d+)h)?(?:(?P<minutes>\d+)m)?(?:(?P<seconds>\d+)s)?$"
)


def parse_duration(value: str) -> timedelta:
    """Parse a human-friendly duration string into a ``timedelta``.

    Supported formats: ``10m``, ``1h30m``, ``90s``, ``2h``, ``1h30m45s``.
    A plain integer is treated as seconds.
    """
    value = value.strip()
    if not value:
        raise click.BadParameter("Duration cannot be empty")

    # Plain integer → seconds
    if value.isdigit():
        return timedelta(seconds=int(value))

    m = _DURATION_RE.match(value)
    if not m or not m.group(0):
        raise click.BadParameter(
            f"Invalid duration '{value}'. Use format like 10m, 1h30m, 90s."
        )

    hours = int(m.group("hours") or 0)
    minutes = int(m.group("minutes") or 0)
    seconds = int(m.group("seconds") or 0)
    return timedelta(hours=hours, minutes=minutes, seconds=seconds)


class DurationType(click.ParamType):
    """Click parameter type for duration strings."""

    name = "duration"

    def convert(
        self, value: str, param: click.Parameter | None, ctx: click.Context | None
    ) -> timedelta:
        if isinstance(value, timedelta):
            return value
        try:
            return parse_duration(value)
        except click.BadParameter:
            self.fail(
                f"Invalid duration '{value}'. Use format like 10m, 1h30m, 90s.",
                param,
                ctx,
            )


DURATION = DurationType()


# ---------------------------------------------------------------------------
# Key=Value parsing  (e.g. --env FOO=bar)
# ---------------------------------------------------------------------------


class KeyValueType(click.ParamType):
    """Click parameter type that parses ``KEY=VALUE`` strings into a tuple."""

    name = "KEY=VALUE"

    def convert(
        self, value: str, param: click.Parameter | None, ctx: click.Context | None
    ) -> tuple[str, str]:
        if isinstance(value, tuple):
            return value
        if "=" not in value:
            self.fail(f"Expected KEY=VALUE format, got '{value}'", param, ctx)
        key, _, val = value.partition("=")
        return (key, val)


KEY_VALUE = KeyValueType()


# ---------------------------------------------------------------------------
# Error handling decorator
# ---------------------------------------------------------------------------


def handle_errors(fn):  # type: ignore[no-untyped-def]
    """Decorator that catches SDK / HTTP exceptions and prints a friendly message."""

    @functools.wraps(fn)
    def wrapper(*args, **kwargs):  # type: ignore[no-untyped-def]
        try:
            return fn(*args, **kwargs)
        except click.exceptions.Exit:
            raise
        except click.ClickException:
            raise
        except Exception as exc:
            # Import here to avoid circular imports at module level
            from opensandbox.exceptions import SandboxException

            # Try to get the OutputFormatter from the Click context
            ctx = click.get_current_context(silent=True)
            obj = getattr(ctx, "obj", None) if ctx else None
            output = getattr(obj, "output", None) if obj else None

            if output and hasattr(output, "error_panel"):
                if isinstance(exc, SandboxException):
                    output.error_panel(str(exc), title="Sandbox Error")
                else:
                    output.error_panel(
                        f"{str(exc)}\n\n[dim]Type: {type(exc).__qualname__}[/]",
                        title=type(exc).__name__,
                    )
            else:
                click.secho(f"Error: {exc}", fg="red", err=True)
            sys.exit(1)

    return wrapper


================================================
FILE: cli/tests/__init__.py
================================================


================================================
FILE: cli/tests/conftest.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Shared test fixtures."""

from __future__ import annotations

from unittest.mock import MagicMock

import pytest
from click.testing import CliRunner

from opensandbox_cli.output import OutputFormatter


@pytest.fixture()
def runner() -> CliRunner:
    return CliRunner()


@pytest.fixture()
def mock_manager() -> MagicMock:
    return MagicMock()


@pytest.fixture()
def mock_sandbox() -> MagicMock:
    return MagicMock()


@pytest.fixture()
def mock_client_context(mock_manager: MagicMock, mock_sandbox: MagicMock) -> MagicMock:
    """A mock ClientContext that avoids real SDK/HTTP calls."""
    ctx = MagicMock()
    ctx.resolved_config = {
        "api_key": "test-key",
        "domain": "localhost:8080",
        "protocol": "http",
        "request_timeout": 30,
        "output_format": "json",
        "color": False,
        "default_image": None,
        "default_timeout": None,
    }
    ctx.output = OutputFormatter("json", color=False)
    ctx.get_manager.return_value = mock_manager
    ctx.connect_sandbox.return_value = mock_sandbox
    ctx.connection_config = MagicMock()
    ctx.close = MagicMock()
    return ctx


================================================
FILE: cli/tests/test_cli_help.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests that all CLI commands register correctly and --help exits cleanly."""

from __future__ import annotations

import pytest
from click.testing import CliRunner

from opensandbox_cli.main import cli


@pytest.fixture()
def runner() -> CliRunner:
    return CliRunner()


# ---------------------------------------------------------------------------
# Root
# ---------------------------------------------------------------------------


class TestRootCLI:
    def test_help(self, runner: CliRunner) -> None:
        result = runner.invoke(cli, ["--help"])
        assert result.exit_code == 0
        assert "OpenSandbox CLI" in result.output

    def test_version(self, runner: CliRunner) -> None:
        result = runner.invoke(cli, ["--version"])
        assert result.exit_code == 0
        assert "opensandbox" in result.output

    def test_root_lists_commands(self, runner: CliRunner) -> None:
        result = runner.invoke(cli, ["--help"])
        for cmd in ("sandbox", "command", "exec", "file", "code", "config"):
            assert cmd in result.output


# ---------------------------------------------------------------------------
# Sandbox sub-commands
# ---------------------------------------------------------------------------


class TestSandboxHelp:
    def test_sandbox_help(self, runner: CliRunner) -> None:
        result = runner.invoke(cli, ["sandbox", "--help"])
        assert result.exit_code == 0
        for subcmd in ("create", "list", "get", "kill", "pause", "resume", "renew", "endpoint", "health", "metrics"):
            assert subcmd in result.output

    @pytest.mark.parametrize(
        "subcmd",
        ["create", "list", "get", "kill", "pause", "resume", "renew", "endpoint", "health", "metrics"],
    )
    def test_sandbox_subcommand_help(self, runner: CliRunner, subcmd: str) -> None:
        result = runner.invoke(cli, ["sandbox", subcmd, "--help"])
        assert result.exit_code == 0
        assert subcmd in result.output.lower() or "usage" in result.output.lower()


# ---------------------------------------------------------------------------
# Command sub-commands
# ---------------------------------------------------------------------------


class TestCommandHelp:
    def test_command_help(self, runner: CliRunner) -> None:
        result = runner.invoke(cli, ["command", "--help"])
        assert result.exit_code == 0
        for subcmd in ("run", "status", "logs", "interrupt"):
            assert subcmd in result.output

    @pytest.mark.parametrize("subcmd", ["run", "status", "logs", "interrupt"])
    def test_command_subcommand_help(self, runner: CliRunner, subcmd: str) -> None:
        result = runner.invoke(cli, ["command", subcmd, "--help"])
        assert result.exit_code == 0


# ---------------------------------------------------------------------------
# exec (top-level shortcut)
# ---------------------------------------------------------------------------


class TestExecHelp:
    def test_exec_help(self, runner: CliRunner) -> None:
        result = runner.invoke(cli, ["exec", "--help"])
        assert result.exit_code == 0
        assert "shortcut" in result.output.lower() or "command" in result.output.lower()


# ---------------------------------------------------------------------------
# File sub-commands
# ---------------------------------------------------------------------------


class TestFileHelp:
    def test_file_help(self, runner: CliRunner) -> None:
        result = runner.invoke(cli, ["file", "--help"])
        assert result.exit_code == 0
        for subcmd in ("cat", "write", "upload", "download", "rm", "mv", "mkdir", "rmdir", "search", "info", "chmod", "replace"):
            assert subcmd in result.output

    @pytest.mark.parametrize(
        "subcmd",
        ["cat", "write", "upload", "download", "rm", "mv", "mkdir", "rmdir", "search", "info", "chmod", "replace"],
    )
    def test_file_subcommand_help(self, runner: CliRunner, subcmd: str) -> None:
        result = runner.invoke(cli, ["file", subcmd, "--help"])
        assert result.exit_code == 0


# ---------------------------------------------------------------------------
# Code sub-commands
# ---------------------------------------------------------------------------


class TestCodeHelp:
    def test_code_help(self, runner: CliRunner) -> None:
        result = runner.invoke(cli, ["code", "--help"])
        assert result.exit_code == 0
        for subcmd in ("run", "context", "interrupt"):
            assert subcmd in result.output

    def test_code_context_help(self, runner: CliRunner) -> None:
        result = runner.invoke(cli, ["code", "context", "--help"])
        assert result.exit_code == 0
        for subcmd in ("create", "list", "delete", "delete-all"):
            assert subcmd in result.output


# ---------------------------------------------------------------------------
# Config sub-commands
# ---------------------------------------------------------------------------


class TestConfigHelp:
    def test_config_help(self, runner: CliRunner) -> None:
        result = runner.invoke(cli, ["config", "--help"])
        assert result.exit_code == 0
        for subcmd in ("init", "show", "set"):
            assert subcmd in result.output

    @pytest.mark.parametrize("subcmd", ["init", "show", "set"])
    def test_config_subcommand_help(self, runner: CliRunner, subcmd: str) -> None:
        result = runner.invoke(cli, ["config", subcmd, "--help"])
        assert result.exit_code == 0


================================================
FILE: cli/tests/test_commands.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for CLI commands with mocked SDK calls.

Strategy: patch ``opensandbox_cli.main.ClientContext`` and ``resolve_config``
so the root ``cli`` callback creates our mock instead of a real SDK client.
"""

from __future__ import annotations

import json
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest
from click.testing import CliRunner

from opensandbox_cli.main import cli
from opensandbox_cli.output import OutputFormatter


@pytest.fixture()
def runner() -> CliRunner:
    return CliRunner()


def _build_mock_client_context(
    *,
    manager: MagicMock | None = None,
    sandbox: MagicMock | None = None,
    output_format: str = "json",
) -> MagicMock:
    ctx = MagicMock()
    ctx.resolved_config = {
        "api_key": "test-key",
        "domain": "localhost:8080",
        "protocol": "http",
        "request_timeout": 30,
        "output_format": output_format,
        "color": False,
        "default_image": None,
        "default_timeout": None,
    }
    ctx.output = OutputFormatter(output_format, color=False)
    ctx.get_manager.return_value = manager or MagicMock()
    ctx.connect_sandbox.return_value = sandbox or MagicMock()
    ctx.resolve_sandbox_id.side_effect = lambda prefix: prefix  # passthrough
    ctx.connection_config = MagicMock()
    ctx.close = MagicMock()
    return ctx


def _invoke(
    runner: CliRunner,
    args: list[str],
    *,
    manager: MagicMock | None = None,
    sandbox: MagicMock | None = None,
    output_format: str = "json",
) -> object:
    """Invoke CLI with mocked ClientContext."""
    mock_ctx = _build_mock_client_context(
        manager=manager, sandbox=sandbox, output_format=output_format
    )

    with patch("opensandbox_cli.main.resolve_config") as mock_resolve, \
         patch("opensandbox_cli.main.ClientContext", return_value=mock_ctx), \
         patch("opensandbox_cli.main.OutputFormatter", side_effect=lambda fmt, **kw: OutputFormatter(fmt, **kw)):
        mock_resolve.return_value = mock_ctx.resolved_config
        result = runner.invoke(cli, args, catch_exceptions=False)
    return result


# ---------------------------------------------------------------------------
# Config commands (no SDK mocking needed)
# ---------------------------------------------------------------------------


class TestConfigInit:
    def test_init_creates_file(self, runner: CliRunner, tmp_path: Path) -> None:
        cfg_path = tmp_path / "config.toml"
        result = runner.invoke(cli, ["config", "init", "--path", str(cfg_path)])
        assert result.exit_code == 0
        assert "Config file created" in result.output

    def test_init_refuses_overwrite(self, runner: CliRunner, tmp_path: Path) -> None:
        cfg_path = tmp_path / "config.toml"
        cfg_path.write_text("existing")
        result = runner.invoke(cli, ["config", "init", "--path", str(cfg_path)])
        assert "already exists" in result.output

    def test_init_force_overwrites(self, runner: CliRunner, tmp_path: Path) -> None:
        cfg_path = tmp_path / "config.toml"
        cfg_path.write_text("old")
        result = runner.invoke(cli, ["config", "init", "--path", str(cfg_path), "--force"])
        assert result.exit_code == 0
        assert "Config file created" in result.output


class TestConfigShow:
    def test_show_json_output(self, runner: CliRunner) -> None:
        result = runner.invoke(cli, ["-o", "json", "config", "show"])
        assert result.exit_code == 0
        data = json.loads(result.output)
        assert "api_key" in data

    def test_show_table_output(self, runner: CliRunner) -> None:
        result = runner.invoke(cli, ["config", "show"])
        assert result.exit_code == 0
        assert "api_key" in result.output


class TestConfigSet:
    def test_set_updates_existing_field(self, runner: CliRunner, tmp_path: Path) -> None:
        cfg_path = tmp_path / "config.toml"
        runner.invoke(cli, ["config", "init", "--path", str(cfg_path)])
        result = runner.invoke(cli, ["config", "set", "connection.domain", "new.host", "--path", str(cfg_path)])
        assert result.exit_code == 0
        assert "Set connection.domain = new.host" in result.output

    def test_set_rejects_flat_key(self, runner: CliRunner, tmp_path: Path) -> None:
        cfg_path = tmp_path / "config.toml"
        cfg_path.write_text("[connection]\n")
        result = runner.invoke(cli, ["config", "set", "flat_key", "value", "--path", str(cfg_path)])
        assert "section.field" in result.output


# ---------------------------------------------------------------------------
# Sandbox commands
# ---------------------------------------------------------------------------


class TestSandboxList:
    def test_list_invokes_manager(self, runner: CliRunner) -> None:
        mock_mgr = MagicMock()
        mock_result = MagicMock()
        mock_result.sandbox_infos = []
        mock_mgr.list_sandbox_infos.return_value = mock_result

        result = _invoke(runner, ["-o", "json", "sandbox", "list"], manager=mock_mgr)
        assert result.exit_code == 0
        mock_mgr.list_sandbox_infos.assert_called_once()


class TestSandboxKill:
    def test_kill_multiple(self, runner: CliRunner) -> None:
        mock_mgr = MagicMock()
        result = _invoke(runner, ["sandbox", "kill", "id1", "id2"], manager=mock_mgr)
        assert result.exit_code == 0
        assert mock_mgr.kill_sandbox.call_count == 2
        assert "Sandbox terminated: id1" in result.output
        assert "Sandbox terminated: id2" in result.output


class TestSandboxPause:
    def test_pause_calls_manager(self, runner: CliRunner) -> None:
        mock_mgr = MagicMock()
        result = _invoke(runner, ["sandbox", "pause", "sb-123"], manager=mock_mgr)
        assert result.exit_code == 0
        mock_mgr.pause_sandbox.assert_called_once_with("sb-123")
        assert "Sandbox paused: sb-123" in result.output


class TestSandboxResume:
    def test_resume_calls_manager(self, runner: CliRunner) -> None:
        mock_mgr = MagicMock()
        result = _invoke(runner, ["sandbox", "resume", "sb-123"], manager=mock_mgr)
        assert result.exit_code == 0
        mock_mgr.resume_sandbox.assert_called_once_with("sb-123")
        assert "Sandbox resumed: sb-123" in result.output


# ---------------------------------------------------------------------------
# File commands
# ---------------------------------------------------------------------------


class TestFileCat:
    def test_cat_outputs_content(self, runner: CliRunner) -> None:
        mock_sb = MagicMock()
        mock_sb.files.read_file.return_value = "hello world"
        result = _invoke(runner, ["file", "cat", "sb-1", "/etc/hostname"], sandbox=mock_sb)
        assert result.exit_code == 0
        assert "hello world" in result.output
        mock_sb.files.read_file.assert_called_once_with("/etc/hostname", encoding="utf-8")


class TestFileWrite:
    def test_write_with_content_flag(self, runner: CliRunner) -> None:
        mock_sb = MagicMock()
        result = _invoke(
            runner,
            ["file", "write", "sb-1", "/tmp/test.txt", "-c", "content here"],
            sandbox=mock_sb,
        )
        assert result.exit_code == 0
        assert "Written" in result.output
        mock_sb.files.write_file.assert_called_once()


class TestFileRm:
    def test_rm_deletes_files(self, runner: CliRunner) -> None:
        mock_sb = MagicMock()
        result = _invoke(
            runner, ["file", "rm", "sb-1", "/tmp/a", "/tmp/b"], sandbox=mock_sb
        )
        assert result.exit_code == 0
        mock_sb.files.delete_files.assert_called_once_with(["/tmp/a", "/tmp/b"])


class TestFileMv:
    def test_mv_moves_file(self, runner: CliRunner) -> None:
        mock_sb = MagicMock()
        result = _invoke(
            runner, ["file", "mv", "sb-1", "/tmp/old", "/tmp/new"], sandbox=mock_sb
        )
        assert result.exit_code == 0
        assert "Moved: /tmp/old" in result.output and "/tmp/new" in result.output


class TestFileMkdir:
    def test_mkdir_creates_dirs(self, runner: CliRunner) -> None:
        mock_sb = MagicMock()
        result = _invoke(
            runner, ["file", "mkdir", "sb-1", "/tmp/dir1", "/tmp/dir2"], sandbox=mock_sb
        )
        assert result.exit_code == 0
        assert "Created: /tmp/dir1" in result.output
        assert "Created: /tmp/dir2" in result.output


class TestFileRmdir:
    def test_rmdir_removes_dirs(self, runner: CliRunner) -> None:
        mock_sb = MagicMock()
        result = _invoke(
            runner, ["file", "rmdir", "sb-1", "/workspace/old"], sandbox=mock_sb
        )
        assert result.exit_code == 0
        assert "Removed: /workspace/old" in result.output


# ---------------------------------------------------------------------------
# Command execution
# ---------------------------------------------------------------------------


class TestCommandRun:
    def test_background_run(self, runner: CliRunner) -> None:
        mock_sb = MagicMock()
        mock_execution = MagicMock()
        mock_execution.id = "exec-123"
        mock_sb.commands.run.return_value = mock_execution

        result = _invoke(
            runner,
            ["-o", "json", "command", "run", "sb-1", "-d", "echo", "hello"],
            sandbox=mock_sb,
        )
        assert result.exit_code == 0
        data = json.loads(result.output)
        assert data["execution_id"] == "exec-123"
        assert data["mode"] == "background"


class TestExecShortcut:
    def test_exec_passes_to_run(self, runner: CliRunner) -> None:
        mock_sb = MagicMock()
        mock_execution = MagicMock()
        mock_execution.id = "exec-456"
        mock_sb.commands.run.return_value = mock_execution

        result = _invoke(
            runner,
            ["-o", "json", "exec", "sb-1", "-d", "--", "ls", "-la"],
            sandbox=mock_sb,
        )
        assert result.exit_code == 0
        mock_sb.commands.run.assert_called_once()


class TestCommandInterrupt:
    def test_interrupt_calls_sdk(self, runner: CliRunner) -> None:
        mock_sb = MagicMock()
        result = _invoke(
            runner, ["command", "interrupt", "sb-1", "exec-789"], sandbox=mock_sb
        )
        assert result.exit_code == 0
        mock_sb.commands.interrupt.assert_called_once_with("exec-789")
        assert "Interrupted: exec-789" in result.output


================================================
FILE: cli/tests/test_config.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for opensandbox_cli.config — config loading and priority merging."""

from __future__ import annotations

import os
from pathlib import Path

import pytest

from opensandbox_cli.config import (
    DEFAULT_CONFIG_TEMPLATE,
    init_config_file,
    load_config_file,
    resolve_config,
)


# ---------------------------------------------------------------------------
# load_config_file
# ---------------------------------------------------------------------------


class TestLoadConfigFile:
    def test_returns_empty_when_file_missing(self, tmp_path: Path) -> None:
        result = load_config_file(tmp_path / "nonexistent.toml")
        assert result == {}

    def test_parses_toml_file(self, tmp_path: Path) -> None:
        cfg = tmp_path / "config.toml"
        cfg.write_text(
            '[connection]\napi_key = "abc"\ndomain = "example.com"\n'
        )
        result = load_config_file(cfg)
        assert result["connection"]["api_key"] == "abc"
        assert result["connection"]["domain"] == "example.com"

    def test_parses_all_sections(self, tmp_path: Path) -> None:
        cfg = tmp_path / "config.toml"
        cfg.write_text(
            '[connection]\napi_key = "k"\n\n'
            '[output]\nformat = "json"\ncolor = false\n\n'
            '[defaults]\nimage = "alpine"\ntimeout = "5m"\n'
        )
        result = load_config_file(cfg)
        assert result["output"]["format"] == "json"
        assert result["output"]["color"] is False
        assert result["defaults"]["image"] == "alpine"
        assert result["defaults"]["timeout"] == "5m"


# ---------------------------------------------------------------------------
# resolve_config — priority: CLI > env > file > defaults
# ---------------------------------------------------------------------------


class TestResolveConfig:
    def test_defaults_when_nothing_configured(self, tmp_path: Path) -> None:
        cfg_path = tmp_path / "empty.toml"
        cfg_path.write_text("")
        result = resolve_config(config_path=cfg_path)
        assert result["api_key"] is None
        assert result["domain"] is None
        assert result["protocol"] == "http"
        assert result["request_timeout"] == 30
        assert result["output_format"] == "table"
        assert result["color"] is True

    def test_file_values_override_defaults(self, tmp_path: Path) -> None:
        cfg = tmp_path / "config.toml"
        cfg.write_text(
            '[connection]\napi_key = "file-key"\ndomain = "file.host"\n'
            'protocol = "https"\nrequest_timeout = 60\n\n'
            '[output]\nformat = "json"\ncolor = false\n\n'
            '[defaults]\nimage = "node:20"\ntimeout = "15m"\n'
        )
        result = resolve_config(config_path=cfg)
        assert result["api_key"] == "file-key"
        assert result["domain"] == "file.host"
        assert result["protocol"] == "https"
        assert result["request_timeout"] == 60
        assert result["output_format"] == "json"
        assert result["color"] is False
        assert result["default_image"] == "node:20"
        assert result["default_timeout"] == "15m"

    def test_env_overrides_file(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
        cfg = tmp_path / "config.toml"
        cfg.write_text('[connection]\napi_key = "file-key"\ndomain = "file.host"\n')

        monkeypatch.setenv("OPEN_SANDBOX_API_KEY", "env-key")
        monkeypatch.setenv("OPEN_SANDBOX_DOMAIN", "env.host")
        monkeypatch.setenv("OPEN_SANDBOX_PROTOCOL", "https")
        monkeypatch.setenv("OPEN_SANDBOX_REQUEST_TIMEOUT", "120")
        monkeypatch.setenv("OPEN_SANDBOX_OUTPUT", "yaml")

        result = resolve_config(config_path=cfg)
        assert result["api_key"] == "env-key"
        assert result["domain"] == "env.host"
        assert result["protocol"] == "https"
        assert result["request_timeout"] == 120
        assert result["output_format"] == "yaml"

    def test_cli_overrides_everything(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
        cfg = tmp_path / "config.toml"
        cfg.write_text('[connection]\napi_key = "file-key"\n')
        monkeypatch.setenv("OPEN_SANDBOX_API_KEY", "env-key")

        result = resolve_config(
            cli_api_key="cli-key",
            cli_domain="cli.host",
            cli_protocol="https",
            cli_timeout=999,
            cli_output="yaml",
            config_path=cfg,
        )
        assert result["api_key"] == "cli-key"
        assert result["domain"] == "cli.host"
        assert result["protocol"] == "https"
        assert result["request_timeout"] == 999
        assert result["output_format"] == "yaml"

    def test_invalid_timeout_env_falls_through(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
        cfg = tmp_path / "empty.toml"
        cfg.write_text("")
        monkeypatch.setenv("OPEN_SANDBOX_REQUEST_TIMEOUT", "not-a-number")
        result = resolve_config(config_path=cfg)
        # Falls through to default 30
        assert result["request_timeout"] == 30


# ---------------------------------------------------------------------------
# init_config_file
# ---------------------------------------------------------------------------


class TestInitConfigFile:
    def test_creates_default_config(self, tmp_path: Path) -> None:
        cfg_path = tmp_path / ".opensandbox" / "config.toml"
        result = init_config_file(cfg_path)
        assert result == cfg_path
        assert cfg_path.exists()
        content = cfg_path.read_text()
        assert "[connection]" in content
        assert "[output]" in content
        assert "[defaults]" in content

    def test_refuses_overwrite_without_force(self, tmp_path: Path) -> None:
        cfg_path = tmp_path / "config.toml"
        cfg_path.write_text("existing")
        with pytest.raises(FileExistsError, match="already exists"):
            init_config_file(cfg_path)

    def test_force_overwrites(self, tmp_path: Path) -> None:
        cfg_path = tmp_path / "config.toml"
        cfg_path.write_text("old content")
        init_config_file(cfg_path, force=True)
        assert cfg_path.read_text() == DEFAULT_CONFIG_TEMPLATE

    def test_creates_parent_directories(self, tmp_path: Path) -> None:
        cfg_path = tmp_path / "a" / "b" / "c" / "config.toml"
        init_config_file(cfg_path)
        assert cfg_path.exists()


================================================
FILE: cli/tests/test_output.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for opensandbox_cli.output — table, JSON, YAML rendering."""

from __future__ import annotations

import json

import pytest
from pydantic import BaseModel

from opensandbox_cli.output import OutputFormatter


# ---------------------------------------------------------------------------
# Test models
# ---------------------------------------------------------------------------


class FakeItem(BaseModel):
    id: str
    name: str
    score: int


# ---------------------------------------------------------------------------
# JSON output
# ---------------------------------------------------------------------------


class TestJsonOutput:
    def test_print_dict(self, capsys: pytest.CaptureFixture[str]) -> None:
        fmt = OutputFormatter("json", color=False)
        fmt.print_dict({"key": "value", "num": 42})
        captured = capsys.readouterr()
        data = json.loads(captured.out)
        assert data == {"key": "value", "num": 42}

    def test_print_model(self, capsys: pytest.CaptureFixture[str]) -> None:
        fmt = OutputFormatter("json", color=False)
        item = FakeItem(id="abc", name="test", score=100)
        fmt.print_model(item)
        captured = capsys.readouterr()
        data = json.loads(captured.out)
        assert data["id"] == "abc"
        assert data["name"] == "test"
        assert data["score"] == 100

    def test_print_models(self, capsys: pytest.CaptureFixture[str]) -> None:
        fmt = OutputFormatter("json", color=False)
        items = [
            FakeItem(id="1", name="a", score=10),
            FakeItem(id="2", name="b", score=20),
        ]
        fmt.print_models(items, columns=["id", "name", "score"])
        captured = capsys.readouterr()
        data = json.loads(captured.out)
        assert len(data) == 2
        assert data[0]["id"] == "1"
        assert data[1]["name"] == "b"


# ---------------------------------------------------------------------------
# YAML output
# ---------------------------------------------------------------------------


class TestYamlOutput:
    def test_print_dict(self, capsys: pytest.CaptureFixture[str]) -> None:
        fmt = OutputFormatter("yaml", color=False)
        fmt.print_dict({"key": "value"})
        captured = capsys.readouterr()
        assert "key: value" in captured.out

    def test_print_model(self, capsys: pytest.CaptureFixture[str]) -> None:
        fmt = OutputFormatter("yaml", color=False)
        item = FakeItem(id="x", name="y", score=5)
        fmt.print_model(item)
        captured = capsys.readouterr()
        assert "id: x" in captured.out
        assert "name: y" in captured.out
        assert "score: 5" in captured.out


# ---------------------------------------------------------------------------
# Table output
# ---------------------------------------------------------------------------


class TestTableOutput:
    def test_print_dict_contains_values(self, capsys: pytest.CaptureFixture[str]) -> None:
        fmt = OutputFormatter("table", color=False)
        fmt.print_dict({"host": "example.com", "port": 8080}, title="Config")
        captured = capsys.readouterr()
        assert "example.com" in captured.out
        assert "8080" in captured.out
        assert "Config" in captured.out

    def test_print_dict_none_renders_dash(self, capsys: pytest.CaptureFixture[str]) -> None:
        fmt = OutputFormatter("table", color=False)
        fmt.print_dict({"key": None})
        captured = capsys.readouterr()
        assert "-" in captured.out

    def test_print_models_shows_headers(self, capsys: pytest.CaptureFixture[str]) -> None:
        fmt = OutputFormatter("table", color=False)
        items = [FakeItem(id="1", name="a", score=10)]
        fmt.print_models(items, columns=["id", "name", "score"], title="Items")
        captured = capsys.readouterr()
        assert "ID" in captured.out
        assert "NAME" in captured.out
        assert "SCORE" in captured.out

    def test_print_text_ignores_format(self, capsys: pytest.CaptureFixture[str]) -> None:
        fmt = OutputFormatter("json", color=False)
        fmt.print_text("hello world")
        captured = capsys.readouterr()
        assert captured.out.strip() == "hello world"


================================================
FILE: cli/tests/test_resolve_id.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for Docker-style sandbox ID prefix matching."""

from __future__ import annotations

from unittest.mock import MagicMock

import click
import pytest

from opensandbox_cli.client import ClientContext
from opensandbox_cli.output import OutputFormatter


def _make_sandbox_info(sandbox_id: str) -> MagicMock:
    """Create a mock SandboxInfo with given ID."""
    info = MagicMock()
    info.id = sandbox_id
    return info


def _make_paged_result(
    sandbox_ids: list[str], *, has_next_page: bool = False
) -> MagicMock:
    """Create a mock PagedSandboxInfos with pagination metadata."""
    result = MagicMock()
    result.sandbox_infos = [_make_sandbox_info(sid) for sid in sandbox_ids]
    result.pagination = MagicMock()
    result.pagination.has_next_page = has_next_page
    return result


def _make_client_context(
    sandbox_ids: list[str],
    *,
    pages: list[list[str]] | None = None,
) -> ClientContext:
    """Create a ClientContext with a mocked manager listing the given IDs.

    If *pages* is provided, each element is a separate page of sandbox IDs
    (useful for testing pagination).  Otherwise all IDs are in a single page.
    """
    ctx = ClientContext(
        resolved_config={
            "api_key": "test-key",
            "domain": "localhost:8080",
            "protocol": "http",
            "request_timeout": 30,
            "output_format": "json",
            "color": False,
            "default_image": None,
            "default_timeout": None,
        },
        output=OutputFormatter("json", color=False),
    )
    # Mock the manager
    mock_mgr = MagicMock()
    if pages is not None:
        side_effects = []
        for i, page_ids in enumerate(pages):
            has_next = i < len(pages) - 1
            side_effects.append(_make_paged_result(page_ids, has_next_page=has_next))
        mock_mgr.list_sandbox_infos.side_effect = side_effects
    else:
        mock_mgr.list_sandbox_infos.return_value = _make_paged_result(sandbox_ids)
    ctx._manager = mock_mgr
    return ctx


class TestResolveSandboxId:
    """Test Docker-style prefix matching for sandbox IDs."""

    def test_full_uuid_skips_listing(self) -> None:
        """A full UUID is returned directly without calling list."""
        ctx = _make_client_context([])
        full_id = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
        assert ctx.resolve_sandbox_id(full_id) == full_id
        # Manager should NOT have been called
        ctx._manager.list_sandbox_infos.assert_not_called()

    def test_unique_prefix_resolves(self) -> None:
        """A unique prefix returns the full matching ID."""
        ctx = _make_client_context([
            "abc123-def456-7890-abcd-000000000001",
            "xyz789-def456-7890-abcd-000000000002",
        ])
        result = ctx.resolve_sandbox_id("abc")
        assert result == "abc123-def456-7890-abcd-000000000001"

    def test_exact_match_among_multiple(self) -> None:
        """A prefix that uniquely matches one sandbox works."""
        ctx = _make_client_context([
            "sandbox-alpha-001",
            "sandbox-beta-002",
            "sandbox-gamma-003",
        ])
        result = ctx.resolve_sandbox_id("sandbox-a")
        assert result == "sandbox-alpha-001"

    def test_ambiguous_prefix_raises(self) -> None:
        """Multiple matches raises ClickException with helpful message."""
        ctx = _make_client_context([
            "abc-111",
            "abc-222",
            "abc-333",
        ])
        with pytest.raises(click.ClickException, match="Ambiguous ID prefix"):
            ctx.resolve_sandbox_id("abc")

    def test_ambiguous_error_shows_ids(self) -> None:
        """The ambiguous error lists the conflicting IDs."""
        ctx = _make_client_context(["abc-111", "abc-222"])
        with pytest.raises(click.ClickException) as exc_info:
            ctx.resolve_sandbox_id("abc")
        assert "abc-111" in str(exc_info.value)
        assert "abc-222" in str(exc_info.value)

    def test_no_match_raises(self) -> None:
        """No matches raises ClickException."""
        ctx = _make_client_context(["xyz-001", "xyz-002"])
        with pytest.raises(click.ClickException, match="No sandbox found"):
            ctx.resolve_sandbox_id("abc")

    def test_empty_sandbox_list_raises(self) -> None:
        """Empty sandbox list raises ClickException."""
        ctx = _make_client_context([])
        with pytest.raises(click.ClickException, match="No sandbox found"):
            ctx.resolve_sandbox_id("abc")

    def test_single_char_prefix(self) -> None:
        """Even a single character can match if unique."""
        ctx = _make_client_context([
            "a-sandbox-001",
            "b-sandbox-002",
        ])
        result = ctx.resolve_sandbox_id("a")
        assert result == "a-sandbox-001"

    def test_full_id_matches_exactly(self) -> None:
        """A non-UUID full ID still matches via prefix logic."""
        ctx = _make_client_context(["my-sandbox-123"])
        result = ctx.resolve_sandbox_id("my-sandbox-123")
        assert result == "my-sandbox-123"

    def test_more_than_five_ambiguous_shows_ellipsis(self) -> None:
        """When >5 matches, the error shows '...'."""
        ids = [f"sb-{i:03d}" for i in range(10)]
        ctx = _make_client_context(ids)
        with pytest.raises(click.ClickException) as exc_info:
            ctx.resolve_sandbox_id("sb-")
        assert "..." in str(exc_info.value)
        assert "10 sandboxes" in str(exc_info.value)

    # -- Pagination tests --

    def test_match_on_second_page(self) -> None:
        """A prefix that only appears on page 2 is still found."""
        ctx = _make_client_context(
            [],
            pages=[
                ["xyz-001", "xyz-002"],
                ["abc-999"],
            ],
        )
        result = ctx.resolve_sandbox_id("abc")
        assert result == "abc-999"

    def test_collision_across_pages(self) -> None:
        """Matches on different pages are detected as ambiguous."""
        ctx = _make_client_context(
            [],
            pages=[
                ["abc-001"],
                ["abc-002"],
            ],
        )
        with pytest.raises(click.ClickException, match="Ambiguous ID prefix"):
            ctx.resolve_sandbox_id("abc")

    def test_no_match_across_all_pages(self) -> None:
        """No match after exhausting all pages raises ClickException."""
        ctx = _make_client_context(
            [],
            pages=[
                ["xyz-001"],
                ["xyz-002"],
            ],
        )
        with pytest.raises(click.ClickException, match="No sandbox found"):
            ctx.resolve_sandbox_id("abc")


================================================
FILE: cli/tests/test_utils.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for opensandbox_cli.utils — duration parsing, key-value type, error handling."""

from __future__ import annotations

from datetime import timedelta

import click
import pytest

from opensandbox_cli.utils import DURATION, KEY_VALUE, parse_duration


# ---------------------------------------------------------------------------
# parse_duration
# ---------------------------------------------------------------------------


class TestParseDuration:
    @pytest.mark.parametrize(
        "input_str, expected",
        [
            ("10", timedelta(seconds=10)),
            ("0", timedelta(seconds=0)),
            ("10s", timedelta(seconds=10)),
            ("5m", timedelta(minutes=5)),
            ("2h", timedelta(hours=2)),
            ("1h30m", timedelta(hours=1, minutes=30)),
            ("1h30m45s", timedelta(hours=1, minutes=30, seconds=45)),
            ("90s", timedelta(seconds=90)),
        ],
    )
    def test_valid_durations(self, input_str: str, expected: timedelta) -> None:
        assert parse_duration(input_str) == expected

    @pytest.mark.parametrize(
        "input_str",
        [
            "",
            "abc",
            "10x",
            "m10",
            "-5m",
        ],
    )
    def test_invalid_durations(self, input_str: str) -> None:
        with pytest.raises(click.BadParameter):
            parse_duration(input_str)

    def test_strips_whitespace(self) -> None:
        assert parse_duration("  10m  ") == timedelta(minutes=10)


# ---------------------------------------------------------------------------
# DurationType (Click param type)
# ---------------------------------------------------------------------------


class TestDurationType:
    def test_converts_string(self) -> None:
        result = DURATION.convert("5m", None, None)
        assert result == timedelta(minutes=5)

    def test_passes_through_timedelta(self) -> None:
        td = timedelta(hours=1)
        result = DURATION.convert(td, None, None)  # type: ignore[arg-type]
        assert result is td

    def test_invalid_raises_bad_parameter(self) -> None:
        with pytest.raises(click.exceptions.BadParameter):
            DURATION.convert("invalid", None, None)


# ---------------------------------------------------------------------------
# KeyValueType (Click param type)
# ---------------------------------------------------------------------------


class TestKeyValueType:
    def test_parses_simple_kv(self) -> None:
        assert KEY_VALUE.convert("FOO=bar", None, None) == ("FOO", "bar")

    def test_value_can_contain_equals(self) -> None:
        assert KEY_VALUE.convert("key=a=b=c", None, None) == ("key", "a=b=c")

    def test_empty_value(self) -> None:
        assert KEY_VALUE.convert("key=", None, None) == ("key", "")

    def test_missing_equals_fails(self) -> None:
        with pytest.raises(click.exceptions.BadParameter):
            KEY_VALUE.convert("no-equals", None, None)

    def test_passes_through_tuple(self) -> None:
        t = ("key", "val")
        result = KEY_VALUE.convert(t, None, None)  # type: ignore[arg-type]
        assert result is t


================================================
FILE: components/egress/Dockerfile
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM golang:1.24-bookworm AS builder

WORKDIR /workspace

ARG VERSION=dev
ARG GIT_COMMIT=unknown
ARG BUILD_TIME=unknown

# Copy only go mod/sum first for better caching
COPY components/egress/go.mod components/egress/go.sum ./components/egress/
# Bring internal module so replace ../internal works during download/build
COPY components/internal ./components/internal

WORKDIR /workspace/components/egress

# Static-ish build (no cgo) to simplify runtime deps
ENV CGO_ENABLED=0
RUN go mod download

# Copy the rest of the egress sources
COPY components/egress ./
RUN CGO_ENABLED=0 go build \
    -ldflags "-X 'github.com/alibaba/opensandbox/internal/version.Version=${VERSION}' \
              -X 'github.com/alibaba/opensandbox/internal/version.BuildTime=${BUILD_TIME}' \
              -X 'github.com/alibaba/opensandbox/internal/version.GitCommit=${GIT_COMMIT}'" \
    -o /out/egress .

FROM debian:bookworm-slim

# iptables is needed for DNS REDIRECT; ca-certificates for TLS to upstream resolvers
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        iptables \
        iproute2 \
        nftables \
        ca-certificates \
        sudo \
        curl \
        wget \
        net-tools \
        dnsutils \
        netcat-openbsd \
        iputils-ping \
        traceroute \
        telnet \
        tcpdump \
        nmap \
        htop \
        procps \
        strace \
        lsof \
    && rm -rf /var/lib/apt/lists/*

COPY --from=builder /out/egress /egress

# Default entrypoint; expects OPENSANDBOX_NETWORK_POLICY env at runtime.
ENTRYPOINT ["/egress"]

================================================
FILE: components/egress/README.md
================================================
# OpenSandbox Egress Sidecar

The **Egress Sidecar** is a core component of OpenSandbox that provides **FQDN-based egress control**. It runs alongside the sandbox application container (sharing the same network namespace) and enforces declared network policies.

## Features

- **FQDN-based Allowlist**: Control outbound traffic by domain name (e.g., `api.github.com`).
- **Wildcard Support**: Allow subdomains using wildcards (e.g., `*.pypi.org`).
- **Transparent Interception**: Uses transparent DNS proxying; no application configuration required.
- **Dynamic DNS (dns+nft mode)**: When a domain is allowed and the proxy resolves it, the resolved A/AAAA IPs are added to nftables with TTL so that default-deny + domain-allow is enforced at the network layer.
- **Privilege Isolation**: Requires `CAP_NET_ADMIN` only for the sidecar; the application container runs unprivileged.
- **Graceful Degradation**: If `CAP_NET_ADMIN` is missing, it warns and disables enforcement instead of crashing.

## Architecture

The egress control is implemented as a **Sidecar** that shares the network namespace with the sandbox application.

1.  **DNS Proxy (Layer 1)**:
    - Runs on `127.0.0.1:15353`.
    - `iptables` rules redirect all port 53 (DNS) traffic to this proxy.
    - Filters queries based on the allowlist.
    - Returns `NXDOMAIN` for denied domains.

2.  **Network Filter (Layer 2)** (when `OPENSANDBOX_EGRESS_MODE=dns+nft`):
    - Uses `nftables` to enforce IP-level allow/deny. Resolved IPs for allowed domains are added to dynamic allow sets with TTL (dynamic DNS).
    - At startup, the sidecar whitelists **127.0.0.1** (redirect target for the proxy) and **nameserver IPs** from `/etc/resolv.conf` so DNS resolution and proxy upstream work (including private DNS). Nameserver count is capped and invalid IPs are filtered; see [Configuration](#configuration).

## Requirements

- **Runtime**: Docker or Kubernetes.
- **Capabilities**: `CAP_NET_ADMIN` (for the sidecar container only).
- **Kernel**: Linux kernel with `iptables` support.

## Configuration

- Policy bootstrap & runtime:
  - Default deny-all. Seed initial policy via `OPENSANDBOX_EGRESS_RULES` (JSON, same shape as `/policy`); empty/`{}`/`null` stays deny-all.
  - `/policy` at runtime; empty body resets to default deny-all.
- HTTP service:
  - Listen address: `OPENSANDBOX_EGRESS_HTTP_ADDR` (default `:18080`).
  - Auth: `OPENSANDBOX_EGRESS_TOKEN` with header `OPENSANDBOX-EGRESS-AUTH: <token>`; if unset, endpoint is open.
- Mode (`OPENSANDBOX_EGRESS_MODE`, default `dns`):
  - `dns`: DNS proxy only, no nftables (IP/CIDR rules have no effect at L2).
  - `dns+nft`: enable nftables; if nft apply fails, fallback to `dns`. IP/CIDR enforcement and DoH/DoT blocking require this mode.
- **Nameserver exempt**  
  Set `OPENSANDBOX_EGRESS_NAMESERVER_EXEMPT` to a comma-separated list of **nameserver IPs** (e.g. `26.26.26.26` or `26.26.26.26,100.100.2.116`). Only single IPs are supported; CIDR entries are ignored. Traffic to these IPs on port 53 is not redirected to the proxy (iptables RETURN). In `dns+nft` mode, these IPs are also merged into the nft allow set so proxy upstream traffic to them (sent without SO_MARK) is accepted. Use when the upstream is reachable only via a specific route (e.g. tunnel) and SO_MARK would send proxy traffic elsewhere.
- **DNS and nft mode (nameserver whitelist)**  
  In `dns+nft` mode, the sidecar automatically allows:
  - **127.0.0.1** — so packets redirected by iptables to the proxy (127.0.0.1:15353) are accepted by nft.
  - **Nameserver IPs** from `/etc/resolv.conf` — so client DNS and proxy upstream work (e.g. private DNS).  
  Nameserver IPs are validated (unspecified and loopback are skipped) and capped. Use `OPENSANDBOX_EGRESS_MAX_NS` (default `3`; `0` = no cap, `1`–`10` = cap). See [SECURITY-RISKS.md](SECURITY-RISKS.md) for trust and scope of this whitelist.
- **Blocked hostname webhook**  
  - `OPENSANDBOX_EGRESS_DENY_WEBHOOK`: HTTP endpoint URL. When set, egress asynchronously POSTs JSON **only when a hostname is denied**: `{"hostname": "<original query>", "timestamp": "<RFC3339>", "source": "opensandbox-egress", "sandboxId": "<id-or-empty>"}`. Default timeout 5s, up to 3 retries with exponential backoff starting at 1s; 4xx is not retried, 5xx/network errors are retried.
  - `OPENSANDBOX_EGRESS_SANDBOX_ID`: optional sandbox identifier injected into the webhook payload as `sandboxId`. The value is read once at startup (unset → empty string).
  - **Allow requirement**: you must allow the webhook host (or its IP/CIDR) in the policy; with default deny, if you don’t explicitly allow it, the webhook traffic will be blocked by egress itself. Example: `{"defaultAction":"deny","egress":[{"action":"allow","target":"webhook.example.com"}]}`. If a broader deny CIDR covers the resolved IP, it will still be blocked—adjust your policy accordingly.
- DoH/DoT blocking:
  - DoT (tcp/udp 853) blocked by default.
  - Optional DoH over 443: `OPENSANDBOX_EGRESS_BLOCK_DOH_443=true`. If enabled without blocklist, all 443 is dropped.
  - DoH blocklist (IP/CIDR, comma-separated): `OPENSANDBOX_EGRESS_DOH_BLOCKLIST="9.9.9.9,1.1.1.1/32,2001:db8::/32"`.

### Runtime HTTP API

- Default listen address: `:18080` (override with `OPENSANDBOX_EGRESS_HTTP_ADDR`).
- Endpoints:
- `GET /policy` — returns the current policy.
- `POST /policy` — replaces the policy. Empty/whitespace/`{}`/`null` resets to default deny-all.
  - `PATCH /policy` — merge/append rules at runtime. Body **must** be a JSON array of egress rules (not wrapped in an object). New rules are placed before existing ones (same target overrides), so a later PATCH can override prior wildcard denies with a more specific allow, and vice versa.

Examples:

- DNS allowlist (default deny):
  ```bash
  curl -XPOST http://127.0.0.1:18080/policy \
    -d '{"defaultAction":"deny","egress":[{"action":"allow","target":"*.bing.com"}]}'
  ```
- DNS blocklist (default allow):
  ```bash
  curl -XPOST http://127.0.0.1:18080/policy \
    -d '{"defaultAction":"allow","egress":[{"action":"deny","target":"*.bing.com"}]}'
  ```
- IP/CIDR only:
  ```bash
  curl -XPOST http://127.0.0.1:18080/policy \
    -d '{"defaultAction":"deny","egress":[{"action":"allow","target":"1.1.1.1"},{"action":"deny","target":"10.0.0.0/8"}]}'
  ```
- Mixed DNS + IP/CIDR:
  ```bash
  curl -XPOST http://127.0.0.1:18080/policy \
    -d '{"defaultAction":"deny","egress":[{"action":"allow","target":"*.example.com"},{"action":"allow","target":"203.0.113.0/24"},{"action":"deny","target":"*.bad.com"}]}'
  ```
- Merge-only PATCH (override wildcard deny with a specific allow):
  ```bash
  # baseline: deny *.cloudflare.com
  curl -XPOST http://127.0.0.1:18080/policy \
    -d '{"defaultAction":"allow","egress":[{"action":"deny","target":"*.cloudflare.com"}]}'

  # allow a specific host; PATCH rules are prepended, so this wins
  curl -XPATCH http://127.0.0.1:18080/policy \
    -d '[{"action":"allow","target":"www.cloudflare.com"}]'
  ```

## Build & Run

### 1. Build Docker Image

```bash
# Build locally
docker build -t opensandbox/egress:local .

# Or use the build script (multi-arch)
./build.sh
```

### 2. Run Locally (Docker)

To test the sidecar with a sandbox application:

1.  **Start the Sidecar** (creates the network namespace):

    ```bash
    docker run -d --name sandbox-egress \
      --cap-add=NET_ADMIN \
      opensandbox/egress:local
    ```

    *Note: `CAP_NET_ADMIN` is required for `iptables` redirection.*

    After start, push policy via HTTP (empty body resets to deny-all):

    ```bash
    curl -XPOST http://11.167.84.130:18080/policy \
      -H "OPENSANDBOX-EGRESS-AUTH: $OPENSANDBOX_EGRESS_TOKEN" \
      -d '{"defaultAction":"deny","egress":[{"action":"allow","target":"*.bing.com"}]}'
    ```

2.  **Start Application** (shares sidecar's network):

    ```bash
    docker run --rm -it \
      --network container:sandbox-egress \
      curlimages/curl \
      sh
    ```

3.  **Verify**:

    Inside the application container:

    ```bash
    # Allowed domain
    curl -I https://google.com  # Should succeed

    # Denied domain
    curl -I https://github.com  # Should fail (resolve error)
    ```

## Development

- **Language**: Go 1.24+
- **Key Packages**:
    - `pkg/dnsproxy`: DNS server and policy matching logic.
    - `pkg/iptables`: `iptables` rule management.
    - `pkg/nftables`: nftables static/dynamic rules and DNS-resolved IP sets.
    - `pkg/policy`: Policy parsing and definition.
- **Main (egress)**:
    - `nameserver.go`: Builds the list of IPs to whitelist for DNS in nft mode (127.0.0.1 + validated/capped nameservers from resolv.conf).

```bash
# Run tests
go test ./...
```

### E2E benchmark: dns vs dns+nft (sync dynamic IP write)

An end-to-end benchmark compares **dns** (pass-through, no nft write) and **dns+nft** (sync `AddResolvedIPs` before each DNS reply) under real conditions: sidecar in Docker, iptables redirect, real DNS + HTTPS from a client container.

```bash
./tests/bench-dns-nft.sh
```

More details in [docs/benchmark.md](docs/benchmark.md).

## Troubleshooting

- **"iptables setup failed"**: Ensure the sidecar container has `--cap-add=NET_ADMIN`.
- **DNS resolution fails for all domains**:  
  Check upstream reachability from the sidecar (`ip route`, `dig @<upstream> . NS +timeout=3`). In `dns+nft` mode, check logs for `[dns] whitelisting proxy listen + N nameserver(s)`.
- **Traffic not blocked**: If nftables apply fails, the sidecar falls back to dns; check logs, `nft list table inet opensandbox`, and `CAP_NET_ADMIN`.


================================================
FILE: components/egress/TODO.md
================================================
# Egress Sidecar TODO (Linux MVP → Full OSEP-0001)

- Layer 2 still partial: static IP/CIDR now pushed to nftables, DoH/DoT blocking added (853 + optional 443 blocklist). DNS-learned IPs/dynamic isolation planned (see Short-term priorities).
- Policy surface: IP/CIDR parsing/validation done; `require_full_isolation` and richer validation messages are out of scope (see No goals).
- Observability missing: no violation logs.
- Capability probing missing: no CAP_NET_ADMIN/nftables detection; hostNetwork 已由 server 侧阻断。 Capability detection + mode exposure moved to No goals.
- Platform integration completed: specs/SDK/server wiring done; NET_ADMIN only on sidecar.
- No IPv6; startup ordering not enforced (relies on container start order).

## Short-term priorities (suggested order)
1) Layer 2 via nftables  
   - Tune DoH/DoT rules (ordering, allow-list exceptions, counters).
4) Observability & logging  
   - Violation logs (domain/action/upstream IP); expose current enforcement mode.  
   - Optional lightweight health/status endpoint.
6) Security hardening  
   - Whitelist/validate upstream DNS to avoid arbitrary 53 egress abuse.  
   - Document bypass/limits (dns-only can be bypassed via direct IP/DoH).
7) IPv6 & tests  
   - Handle IPv6 support or explicit non-support.  
   - Unit/integration tests: interception, graceful degrade, nftables, DoH blocking, hostNetwork rejection.

## No goals (explicitly excluded)
- Capability probing & mode exposure (CAP_NET_ADMIN/nft detection, mode surfacing).
- Policy expansion: `require_full_isolation` and richer validation errors.

## Dev notes
- Current behavior: default deny-all baseline even when no policy is provided; POST /policy empty resets to deny-all; env bootstrap defaults to deny-all.  
- DNS proxy always runs; SO_MARK=0x1 bypass for proxy’s own upstream DNS; iptables only redirects port 53, no other DROP rules.  
- nftables: static IP/CIDR applied on start and policy update; retry without delete-table if table absent; failures fall back to DNS-only.  
- Runtime deps: Linux, `CAP_NET_ADMIN`, `iptables`/`nft` binaries; upstream DNS must be reachable and recursive.


================================================
FILE: components/egress/build.sh
================================================
#!/bin/bash
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

TAG=${TAG:-latest}
VERSION=${VERSION:-$(git describe --tags --always --dirty 2>/dev/null || echo "dev")}
GIT_COMMIT=${GIT_COMMIT:-$(git rev-parse HEAD 2>/dev/null || echo "unknown")}
BUILD_TIME=${BUILD_TIME:-$(date -u +"%Y-%m-%dT%H:%M:%SZ")}
REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || realpath "$(dirname "$0")/../..")
cd "${REPO_ROOT}"

docker buildx rm egress-builder || true
docker buildx create --use --name egress-builder
docker buildx inspect --bootstrap
docker buildx ls

LATEST_TAGS=()
if [[ "${TAG}" == v* ]]; then
  LATEST_TAGS+=(-t opensandbox/egress:latest -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:latest)
fi

docker buildx build \
  -t opensandbox/egress:${TAG} \
  -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:${TAG} \
  "${LATEST_TAGS[@]}" \
  -f components/egress/Dockerfile \
  --build-arg VERSION="${VERSION}" \
  --build-arg GIT_COMMIT="${GIT_COMMIT}" \
  --build-arg BUILD_TIME="${BUILD_TIME}" \
  --platform linux/amd64,linux/arm64 \
  --push \
  .


================================================
FILE: components/egress/docs/benchmark.md
================================================
# Egress Benchmark

This document describes the **Egress Sidecar** end-to-end benchmark: it compares **dns** and **dns+nft** modes under real conditions for latency and throughput.

## Purpose

- **dns**: DNS proxy only (pass-through), no nftables writes; used as the baseline.
- **dns+nft**: DNS proxy plus synchronous `AddResolvedIPs` before each DNS reply, writing resolved IPs into nftables for
  L2 egress enforcement.

The benchmark runs the same workload in both modes and reports end-to-end latency (P50, P99) and throughput (Req/s) to
measure the overhead of the synchronous nft write path.

## Environment and Flow

- **Environment**: The Egress sidecar runs in a Docker container on the host. The container includes the sidecar (DNS
  proxy and optional nft), iptables redirect of port 53 to the proxy, and the policy server on port 18080. The workload
  runs **inside the same container**: DNS and HTTPS traffic go through the proxy.
- **Flow** (per phase):
    1. Start the sidecar with the chosen mode (`dns` or `dns+nft`).
    2. Wait for health checks, then push the allow list to `/policy` (see domain list below).
    3. Write the domain list into the container as `/tmp/bench-domains.txt` (one `https://<domain>` per line).
    4. **Warm-up**: One request to each of the first 10 domains (10 concurrent), 1 round.
    5. **Timed run**: One request per domain for all domains (N concurrent per round), for 10 rounds; each request
       records `time_namelookup` and `time_total`.
    6. Copy results from the container and compute P50, P99, average latency, and Req/s.
- **Execution order**: **dns+nft** runs first, then **dns**; the comparison table is printed at the end.

## Workload

- **Domain list**: Read from `components/egress/tests/hostname.txt`, one domain per line (lines starting with `#` and
  empty lines are ignored). Default is about 100 resolvable domains.
- **Rounds and concurrency**: The script uses `ROUNDS=10`. Each round issues one HTTPS request per domain in
  `hostname.txt`, with all requests in that round concurrent; 10 rounds total.
- **Total requests**: `TOTAL_REQUESTS = ROUNDS × NUM_DOMAINS` (e.g. 10 × 100 = 1000).
- **Per request**: Inside the container, `curl -o /dev/null -s -w "%{time_namelookup}\t%{time_total}\n"` is used against
  `https://<domain>`, with a 10s timeout per request; the whole benchmark run has a 300s wall-clock timeout.

## Policy

- Policy is default-deny with explicit allow rules: one `{"action":"allow","target":"<domain>"}` per domain in
  `hostname.txt` is sent via `POST /policy`, so every domain used in the benchmark is allowed.

## How to Run

**Script**: `components/egress/tests/bench-e2e-dns-nft.sh`

**Requirements**: Docker and `curl` on the host (for pushing policy); the Egress image includes `curl` for the workload.

**Commands** (from repo root or from `components/egress`):

```bash
./tests/bench-dns-nft.sh
```

The script resolves `tests/hostname.txt` relative to its own path, so the working directory does not need to be changed.

## Configuration

| Item                | Location / variable                    | Default / notes                                |
|---------------------|----------------------------------------|------------------------------------------------|
| Domain list         | `components/egress/tests/hostname.txt` | One domain per line; `#` comments allowed      |
| Rounds              | `ROUNDS` in script                     | 10                                             |
| Per-request timeout | `CURL_TIMEOUT` in script               | 10 seconds                                     |
| Benchmark timeout   | `BENCH_EXEC_TIMEOUT` in script         | 300 seconds (max wall time for the timed run)  |
| Image               | `IMG` in script                        | See script; override for a locally built image |

Changing the number of domains or rounds updates the total request count; the report shows “N rounds × M domains” for
the current config.

## Output and Metrics

- **Terminal**: A table with **Req/s**, **Avg(s)**, **P50(s)**, **P99(s)** for both modes, plus short notes (dns vs
  dns+nft, warm-up, first-resolution cost).
- **Artifacts** (on the host under `/tmp`): `bench-e2e-dns-total.txt`, `bench-e2e-dns+nft-total.txt` (one
  `time_total` per line), and `-namelookup.txt`, `-wall.txt`, etc., for further analysis or plotting.

## Notes

- The first resolution of a domain in dns+nft triggers a DNS lookup and an nft write, so cost is higher; later requests
  for the same domain hit the set and are cheaper. The multi-round, multi-domain design mixes cold and warm resolution.
- In CI (e.g. GitHub Actions), the script wraps the timed-run `docker exec` with `timeout` inside the shell function so
  `timeout` runs a real command, not a function name, avoiding “No such file or directory” errors.


================================================
FILE: components/egress/go.mod
================================================
module github.com/alibaba/opensandbox/egress

go 1.24.0

require (
	github.com/alibaba/opensandbox/internal v0.0.0
	github.com/miekg/dns v1.1.61
	github.com/stretchr/testify v1.11.1
	golang.org/x/sys v0.31.0
)

require (
	github.com/davecgh/go-spew v1.1.1 // indirect
	github.com/pmezard/go-difflib v1.0.0 // indirect
	go.uber.org/multierr v1.10.0 // indirect
	go.uber.org/zap v1.27.0 // indirect
	golang.org/x/mod v0.18.0 // indirect
	golang.org/x/net v0.38.0 // indirect
	golang.org/x/sync v0.7.0 // indirect
	golang.org/x/tools v0.22.0 // indirect
	gopkg.in/yaml.v3 v3.0.1 // indirect
)

replace github.com/alibaba/opensandbox/internal => ../internal


================================================
FILE: components/egress/go.sum
================================================
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs=
github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ=
go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA=
golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=


================================================
FILE: components/egress/main.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"context"
	"net/netip"
	"os"
	"os/signal"
	"strings"
	"syscall"

	"github.com/alibaba/opensandbox/egress/pkg/constants"
	"github.com/alibaba/opensandbox/egress/pkg/dnsproxy"
	"github.com/alibaba/opensandbox/egress/pkg/events"
	"github.com/alibaba/opensandbox/egress/pkg/iptables"
	"github.com/alibaba/opensandbox/egress/pkg/log"
	slogger "github.com/alibaba/opensandbox/internal/logger"
	"github.com/alibaba/opensandbox/internal/version"
)

func main() {
	version.EchoVersion("OpenSandbox Egress")

	ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
	defer cancel()

	ctx = withLogger(ctx)
	defer log.Logger.Sync()

	initialRules, err := dnsproxy.LoadPolicyFromEnvVar(constants.EnvEgressRules)
	if err != nil {
		log.Fatalf("failed to parse %s: %v", constants.EnvEgressRules, err)
	}

	allowIPs := AllowIPsForNft("/etc/resolv.conf")
	// Merge nameserver exempt IPs into nft allow set so proxy traffic to them (no SO_MARK) is allowed in dns+nft mode.
	for _, addr := range dnsproxy.ParseNameserverExemptList() {
		if !containsAddr(allowIPs, addr) {
			allowIPs = append(allowIPs, addr)
		}
	}

	mode := parseMode()
	log.Infof("enforcement mode: %s", mode)
	nftMgr := createNftManager(mode)
	proxy, err := dnsproxy.New(initialRules, "")
	if err != nil {
		log.Fatalf("failed to init dns proxy: %v", err)
	}
	if err := proxy.Start(ctx); err != nil {
		log.Fatalf("failed to start dns proxy: %v", err)
	}
	log.Infof("dns proxy started on 127.0.0.1:15353")

	if blockWebhookURL := strings.TrimSpace(os.Getenv(constants.EnvBlockedWebhook)); blockWebhookURL != "" {
		blockedBroadcaster := events.NewBroadcaster(ctx, events.BroadcasterConfig{QueueSize: 256})
		blockedBroadcaster.AddSubscriber(events.NewWebhookSubscriber(blockWebhookURL))
		proxy.SetBlockedBroadcaster(blockedBroadcaster)
		defer blockedBroadcaster.Close()
		log.Infof("denied hostname webhook enabled")
	}

	exemptDst := dnsproxy.ParseNameserverExemptList()
	if len(exemptDst) > 0 {
		log.Infof("nameserver exempt list: %v (proxy upstream in this list will not set SO_MARK)", exemptDst)
	}
	if err := iptables.SetupRedirect(15353, exemptDst); err != nil {
		log.Fatalf("failed to install iptables redirect: %v", err)
	}
	log.Infof("iptables redirect configured (OUTPUT 53 -> 15353) with SO_MARK bypass for proxy upstream traffic")

	setupNft(ctx, nftMgr, initialRules, proxy, allowIPs)

	// start policy server
	httpAddr := envOrDefault(constants.EnvEgressHTTPAddr, constants.DefaultEgressServerAddr)
	if err = startPolicyServer(ctx, proxy, nftMgr, mode, httpAddr, os.Getenv(constants.EnvEgressToken), allowIPs); err != nil {
		log.Fatalf("failed to start policy server: %v", err)
	}
	log.Infof("policy server listening on %s (POST /policy)", httpAddr)

	<-ctx.Done()
	log.Infof("received shutdown signal; exiting")
	_ = os.Stderr.Sync()
}

func withLogger(ctx context.Context) context.Context {
	level := envOrDefault(constants.EnvEgressLogLevel, "info")
	logger := slogger.MustNew(slogger.Config{Level: level}).Named("opensandbox.egress")
	return log.WithLogger(ctx, logger)
}

func envOrDefault(key, defaultVal string) string {
	if v := strings.TrimSpace(os.Getenv(key)); v != "" {
		return v
	}
	return defaultVal
}

func isTruthy(v string) bool {
	switch strings.ToLower(strings.TrimSpace(v)) {
	case "1", "true", "yes", "y", "on":
		return true
	default:
		return false
	}
}

func containsAddr(addrs []netip.Addr, a netip.Addr) bool {
	for _, x := range addrs {
		if x == a {
			return true
		}
	}
	return false
}

func parseMode() string {
	mode := strings.ToLower(strings.TrimSpace(os.Getenv(constants.EnvEgressMode)))
	switch mode {
	case "", constants.PolicyDnsOnly:
		return constants.PolicyDnsOnly
	case constants.PolicyDnsNft:
		return constants.PolicyDnsNft
	default:
		log.Warnf("invalid %s=%s, falling back to dns", constants.EnvEgressMode, mode)
		return constants.PolicyDnsOnly
	}
}


================================================
FILE: components/egress/nameserver.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"net/netip"
	"os"
	"strconv"

	"github.com/alibaba/opensandbox/egress/pkg/constants"
	"github.com/alibaba/opensandbox/egress/pkg/dnsproxy"
	"github.com/alibaba/opensandbox/egress/pkg/log"
)

// AllowIPsForNft returns the list of IPs to merge into the nft allow set for DNS in dns+nft mode:
// 127.0.0.1 (proxy listen / iptables redirect target) plus validated, capped nameserver IPs from resolvPath.
// Validation: skips unspecified (0.0.0.0, ::) and loopback (127.x, ::1).
// Cap: at most max nameservers (default 3; set EGRESS_MAX_NAMESERVERS=0 for no cap, or 1–10).
func AllowIPsForNft(resolvPath string) []netip.Addr {
	raw, _ := dnsproxy.ResolvNameserverIPs(resolvPath)
	maxNsCount := maxNameserversFromEnv()

	var validated []netip.Addr
	for _, ip := range raw {
		if maxNsCount > 0 && len(validated) >= maxNsCount {
			break
		}
		if !isValidNameserverIP(ip) {
			continue
		}
		validated = append(validated, ip)
	}

	// 127.0.0.1 first so packets redirected to proxy are accepted by nft.
	out := make([]netip.Addr, 0, 1+len(validated))
	out = append(out, netip.MustParseAddr("127.0.0.1"))
	out = append(out, validated...)

	if len(out) > 1 {
		log.Infof("[dns] whitelisting proxy listen + %d nameserver(s) for nft: %v", len(validated), formatIPs(out))
	} else {
		log.Infof("[dns] whitelisting proxy listen (127.0.0.1); no valid nameserver IPs from %s", resolvPath)
	}
	return out
}

func maxNameserversFromEnv() int {
	s := os.Getenv(constants.EnvMaxNameservers)
	if s == "" {
		return constants.DefaultMaxNameservers
	}
	n, err := strconv.Atoi(s)
	if err != nil || n < 0 {
		return constants.DefaultMaxNameservers
	}
	if n > 10 {
		return 10
	}
	// 0 = no cap
	return n
}

func isValidNameserverIP(ip netip.Addr) bool {
	if ip.IsUnspecified() {
		return false
	}
	if ip.IsLoopback() {
		return false
	}
	return true
}

func formatIPs(ips []netip.Addr) []string {
	out := make([]string, len(ips))
	for i, ip := range ips {
		out[i] = ip.String()
	}
	return out
}


================================================
FILE: components/egress/nameserver_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"net/netip"
	"os"
	"path/filepath"
	"testing"

	"github.com/alibaba/opensandbox/egress/pkg/constants"
	"github.com/stretchr/testify/require"
)

func TestAllowIPsForNft_EmptyResolv(t *testing.T) {
	dir := t.TempDir()
	resolv := filepath.Join(dir, "resolv.conf")
	require.NoError(t, os.WriteFile(resolv, []byte("# empty\n"), 0644))
	ips := AllowIPsForNft(resolv)
	require.Len(t, ips, 1, "expected 1 IP (127.0.0.1)")
	require.Equal(t, netip.MustParseAddr("127.0.0.1"), ips[0])
}

func TestAllowIPsForNft_ValidNameservers(t *testing.T) {
	dir := t.TempDir()
	resolv := filepath.Join(dir, "resolv.conf")
	// Standard resolv.conf with two nameservers
	content := "nameserver 192.168.65.7\nnameserver 10.0.0.1\n"
	require.NoError(t, os.WriteFile(resolv, []byte(content), 0644))
	ips := AllowIPsForNft(resolv)
	require.Len(t, ips, 3, "expected 3 IPs (127.0.0.1 + 2 nameservers)")
	require.Equal(t, netip.MustParseAddr("127.0.0.1"), ips[0], "expected first 127.0.0.1")
	require.Equal(t, netip.MustParseAddr("192.168.65.7"), ips[1], "expected 192.168.65.7")
	require.Equal(t, netip.MustParseAddr("10.0.0.1"), ips[2], "expected 10.0.0.1")
}

func TestAllowIPsForNft_FiltersInvalid(t *testing.T) {
	dir := t.TempDir()
	resolv := filepath.Join(dir, "resolv.conf")
	// 0.0.0.0 and 127.0.0.11 should be filtered; 192.168.1.1 kept
	content := "nameserver 0.0.0.0\nnameserver 192.168.1.1\nnameserver 127.0.0.11\n"
	require.NoError(t, os.WriteFile(resolv, []byte(content), 0644))
	ips := AllowIPsForNft(resolv)
	require.Len(t, ips, 2, "expected 2 IPs (127.0.0.1 + 192.168.1.1)")
	require.Equal(t, netip.MustParseAddr("127.0.0.1"), ips[0], "expected first 127.0.0.1")
	require.Equal(t, netip.MustParseAddr("192.168.1.1"), ips[1], "expected 192.168.1.1")
}

func TestAllowIPsForNft_Cap(t *testing.T) {
	dir := t.TempDir()
	resolv := filepath.Join(dir, "resolv.conf")
	content := "nameserver 10.0.0.1\nnameserver 10.0.0.2\nnameserver 10.0.0.3\nnameserver 10.0.0.4\n"
	require.NoError(t, os.WriteFile(resolv, []byte(content), 0644))
	old := os.Getenv(constants.EnvMaxNameservers)
	defer os.Setenv(constants.EnvMaxNameservers, old)
	os.Setenv(constants.EnvMaxNameservers, "2")

	ips := AllowIPsForNft(resolv)
	// 127.0.0.1 + 2 nameservers (cap)
	require.Len(t, ips, 3, "expected 3 IPs (127.0.0.1 + 2 capped)")
	require.Equal(t, netip.MustParseAddr("10.0.0.1"), ips[1], "expected first nameserver to be 10.0.0.1")
	require.Equal(t, netip.MustParseAddr("10.0.0.2"), ips[2], "expected second nameserver to be 10.0.0.2")
}

func TestIsValidNameserverIP(t *testing.T) {
	tests := []struct {
		ip   string
		want bool
	}{
		{"0.0.0.0", false},
		{"::", false},
		{"127.0.0.1", false},
		{"127.0.0.11", false},
		{"::1", false},
		{"192.168.65.7", true},
		{"10.0.0.1", true},
		{"8.8.8.8", true},
	}
	for _, tt := range tests {
		ip := netip.MustParseAddr(tt.ip)
		got := isValidNameserverIP(ip)
		if got != tt.want {
			t.Errorf("isValidNameserverIP(%s) = %v, want %v", tt.ip, got, tt.want)
		}
	}
}

func TestMaxNameserversFromEnv(t *testing.T) {
	old := os.Getenv(constants.EnvMaxNameservers)
	defer os.Setenv(constants.EnvMaxNameservers, old)

	for _, s := range []string{"", "x", "-1"} {
		os.Setenv(constants.EnvMaxNameservers, s)
		if got := maxNameserversFromEnv(); got != constants.DefaultMaxNameservers {
			t.Errorf("maxNameserversFromEnv(%q) = %d, want default %d", s, got, constants.DefaultMaxNameservers)
		}
	}
	os.Setenv(constants.EnvMaxNameservers, "0")
	if got := maxNameserversFromEnv(); got != 0 {
		t.Errorf("maxNameserversFromEnv(0) = %d, want 0", got)
	}
	os.Setenv(constants.EnvMaxNameservers, "5")
	if got := maxNameserversFromEnv(); got != 5 {
		t.Errorf("maxNameserversFromEnv(5) = %d, want 5", got)
	}
	os.Setenv(constants.EnvMaxNameservers, "99")
	if got := maxNameserversFromEnv(); got != 10 {
		t.Errorf("maxNameserversFromEnv(99) = %d, want 10 (capped)", got)
	}
}


================================================
FILE: components/egress/nft.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"context"
	"net/netip"
	"os"
	"strings"

	"github.com/alibaba/opensandbox/egress/pkg/constants"
	"github.com/alibaba/opensandbox/egress/pkg/dnsproxy"
	"github.com/alibaba/opensandbox/egress/pkg/log"
	"github.com/alibaba/opensandbox/egress/pkg/nftables"
	"github.com/alibaba/opensandbox/egress/pkg/policy"
)

// createNftManager returns an nft manager for dns+nft mode, or nil for dns-only.
func createNftManager(mode string) nftApplier {
	if mode != constants.PolicyDnsNft {
		return nil
	}
	return nftables.NewManagerWithOptions(parseNftOptions())
}

// setupNft applies static policy to nft and wires DNS-resolved IPs into the proxy when nft is enabled.
// nameserverIPs are merged into the allow set at startup so system DNS works (client + proxy upstream, e.g. private DNS).
func setupNft(ctx context.Context, nftMgr nftApplier, initialPolicy *policy.NetworkPolicy, proxy *dnsproxy.Proxy, nameserverIPs []netip.Addr) {
	if nftMgr == nil {
		log.Warnf("nftables disabled (dns-only mode)")
		return
	}
	log.Infof("applying nftables static policy (dns+nft mode) with %d nameserver IP(s) merged into allow set", len(nameserverIPs))
	policyWithNS := initialPolicy.WithExtraAllowIPs(nameserverIPs)
	if err := nftMgr.ApplyStatic(ctx, policyWithNS); err != nil {
		log.Fatalf("nftables static apply failed: %v", err)
	}
	log.Infof("nftables static policy applied (table inet opensandbox); DNS-resolved IPs will be added to dynamic allow sets")
	proxy.SetOnResolved(func(domain string, ips []nftables.ResolvedIP) {
		if err := nftMgr.AddResolvedIPs(ctx, ips); err != nil {
			log.Warnf("[dns] add resolved IPs to nft failed for domain %q: %v", domain, err)
		}
	})
}

func parseNftOptions() nftables.Options {
	opts := nftables.Options{BlockDoT: true}
	if isTruthy(os.Getenv(constants.EnvBlockDoH443)) {
		opts.BlockDoH443 = true
	}
	if raw := os.Getenv(constants.EnvDoHBlocklist); strings.TrimSpace(raw) != "" {
		parts := strings.Split(raw, ",")
		for _, p := range parts {
			target := strings.TrimSpace(p)
			if target == "" {
				continue
			}
			if addr, err := netip.ParseAddr(target); err == nil {
				if addr.Is4() {
					opts.DoHBlocklistV4 = append(opts.DoHBlocklistV4, target)
				} else if addr.Is6() {
					opts.DoHBlocklistV6 = append(opts.DoHBlocklistV6, target)
				}
				continue
			}
			if prefix, err := netip.ParsePrefix(target); err == nil {
				if prefix.Addr().Is4() {
					opts.DoHBlocklistV4 = append(opts.DoHBlocklistV4, target)
				} else if prefix.Addr().Is6() {
					opts.DoHBlocklistV6 = append(opts.DoHBlocklistV6, target)
				}
				continue
			}
			log.Warnf("ignoring invalid DoH blocklist entry: %s", target)
		}
	}
	return opts
}


================================================
FILE: components/egress/pkg/constants/configuration.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package constants

const (
	EnvBlockDoH443    = "OPENSANDBOX_EGRESS_BLOCK_DOH_443"
	EnvDoHBlocklist   = "OPENSANDBOX_EGRESS_DOH_BLOCKLIST" // comma-separated IP/CIDR
	EnvEgressMode     = "OPENSANDBOX_EGRESS_MODE"          // dns | dns+nft
	EnvEgressHTTPAddr = "OPENSANDBOX_EGRESS_HTTP_ADDR"
	EnvEgressToken    = "OPENSANDBOX_EGRESS_TOKEN"
	EnvEgressRules    = "OPENSANDBOX_EGRESS_RULES"
	EnvEgressLogLevel = "OPENSANDBOX_EGRESS_LOG_LEVEL"
	EnvMaxNameservers = "OPENSANDBOX_EGRESS_MAX_NS"
	EnvBlockedWebhook = "OPENSANDBOX_EGRESS_DENY_WEBHOOK"
	ENVSandboxID      = "OPENSANDBOX_EGRESS_SANDBOX_ID"

	// EnvNameserverExempt comma-separated IPs; proxy upstream to these is not marked and is allowed in nft allow set
	EnvNameserverExempt = "OPENSANDBOX_EGRESS_NAMESERVER_EXEMPT"
)

const (
	PolicyDnsOnly = "dns"
	PolicyDnsNft  = "dns+nft"
)

const (
	DefaultEgressServerAddr = ":18080"
	DefaultMaxNameservers   = 3
)


================================================
FILE: components/egress/pkg/constants/constants.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package constants

const (
	MarkValue = 0x1
	MarkHex   = "0x1"
)

const (
	EgressAuthTokenHeader = "OPENSANDBOX-EGRESS-AUTH"
)


================================================
FILE: components/egress/pkg/dnsproxy/exempt.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package dnsproxy

import (
	"net/netip"
	"os"
	"strings"
	"sync"

	"github.com/alibaba/opensandbox/egress/pkg/constants"
)

var (
	exemptListOnce sync.Once
	exemptAddrs    []netip.Addr
	exemptSet      map[netip.Addr]struct{}
)

// ParseNameserverExemptList returns IPs from OPENSANDBOX_EGRESS_NAMESERVER_EXEMPT (comma-separated).
// Only single IPs are accepted; invalid or CIDR entries are skipped. Result is cached. Used for nft allow set, iptables, and UpstreamInExemptList.
func ParseNameserverExemptList() []netip.Addr {
	exemptListOnce.Do(func() { parseNameserverExemptListUncached() })
	return exemptAddrs
}

func parseNameserverExemptListUncached() {
	raw := strings.TrimSpace(os.Getenv(constants.EnvNameserverExempt))
	if raw == "" {
		exemptAddrs = nil
		exemptSet = nil
		return
	}
	set := make(map[netip.Addr]struct{})
	var out []netip.Addr
	for _, s := range strings.Split(raw, ",") {
		s = strings.TrimSpace(s)
		if s == "" {
			continue
		}
		if addr, err := netip.ParseAddr(s); err == nil {
			if _, exists := set[addr]; exists {
				continue
			}
			set[addr] = struct{}{}
			out = append(out, addr)
		}
	}
	exemptAddrs = out
	exemptSet = set
}

// UpstreamInExemptList returns true when upstreamHost is in the nameserver exempt list (exact IP match).
// When true, the proxy should not set SO_MARK so upstream traffic follows normal routing (e.g. via tun).
func UpstreamInExemptList(upstreamHost string) bool {
	addr, err := netip.ParseAddr(upstreamHost)
	if err != nil {
		return false
	}
	ParseNameserverExemptList() // ensure cache is initialized
	_, ok := exemptSet[addr]
	return ok
}


================================================
FILE: components/egress/pkg/dnsproxy/exempt_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package dnsproxy

import (
	"net/netip"
	"sync"
	"testing"

	"github.com/alibaba/opensandbox/egress/pkg/constants"
	"github.com/stretchr/testify/require"
)

func resetNameserverExemptCache(t *testing.T) {
	t.Helper()
	exemptAddrs = nil
	exemptSet = nil
	exemptListOnce = sync.Once{}
}

func TestParseNameserverExemptList_IPOnly(t *testing.T) {
	t.Setenv(constants.EnvNameserverExempt, "1.1.1.1, 2001:db8::1 ,invalid, 10.0.0.0/8, ,")
	resetNameserverExemptCache(t)

	got := ParseNameserverExemptList()
	want := []netip.Addr{netip.MustParseAddr("1.1.1.1"), netip.MustParseAddr("2001:db8::1")}
	require.Equal(t, want, got, "ParseNameserverExemptList() mismatch")

	// Cached result should stay the same on subsequent calls.
	require.Equal(t, want, ParseNameserverExemptList(), "cached ParseNameserverExemptList() mismatch")
}

func TestUpstreamInExemptList_IPOnly(t *testing.T) {
	t.Setenv(constants.EnvNameserverExempt, "1.1.1.1,2001:db8::1")
	resetNameserverExemptCache(t)

	require.True(t, UpstreamInExemptList("1.1.1.1"), "expected IPv4 upstream to be exempt")
	require.True(t, UpstreamInExemptList("2001:db8::1"), "expected IPv6 upstream to be exempt")
	require.False(t, UpstreamInExemptList("10.0.0.2"), "unexpected exempt match for non-listed IP")
	require.False(t, UpstreamInExemptList("not-an-ip"), "invalid IP string should not match")
}

func TestUpstreamInExemptList_CIDRIgnored(t *testing.T) {
	t.Setenv(constants.EnvNameserverExempt, "10.0.0.0/24")
	resetNameserverExemptCache(t)

	require.Empty(t, ParseNameserverExemptList(), "CIDR should be ignored in exempt list")
	require.False(t, UpstreamInExemptList("10.0.0.5"), "CIDR should not make upstream exempt")
}


================================================
FILE: components/egress/pkg/dnsproxy/proxy.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package dnsproxy

import (
	"context"
	"fmt"
	"net"
	"net/netip"
	"os"
	"strings"
	"sync"
	"time"

	"github.com/miekg/dns"

	"github.com/alibaba/opensandbox/egress/pkg/events"
	"github.com/alibaba/opensandbox/egress/pkg/log"
	"github.com/alibaba/opensandbox/egress/pkg/nftables"
	"github.com/alibaba/opensandbox/egress/pkg/policy"
)

const defaultListenAddr = "127.0.0.1:15353"

type Proxy struct {
	policyMu   sync.RWMutex
	policy     *policy.NetworkPolicy
	listenAddr string
	upstream   string // single upstream for MVP
	servers    []*dns.Server

	// optional; called in goroutine when A/AAAA are present
	onResolved func(domain string, ips []nftables.ResolvedIP)

	// optional broadcaster to notify blocked hostnames
	blockedBroadcaster *events.Broadcaster
}

// New builds a proxy with resolved upstream; listenAddr can be empty for default.
func New(p *policy.NetworkPolicy, listenAddr string) (*Proxy, error) {
	if listenAddr == "" {
		listenAddr = defaultListenAddr
	}
	if p == nil {
		p = policy.DefaultDenyPolicy()
	}
	upstream, err := discoverUpstream()
	if err != nil {
		return nil, err
	}
	proxy := &Proxy{
		listenAddr: listenAddr,
		upstream:   upstream,
		policy:     ensurePolicyDefaults(p),
	}
	return proxy, nil
}

func (p *Proxy) Start(ctx context.Context) error {
	handler := dns.HandlerFunc(p.serveDNS)

	udpServer := &dns.Server{Addr: p.listenAddr, Net: "udp", Handler: handler}
	tcpServer := &dns.Server{Addr: p.listenAddr, Net: "tcp", Handler: handler}
	p.servers = []*dns.Server{udpServer, tcpServer}

	errCh := make(chan error, len(p.servers))
	for _, srv := range p.servers {
		s := srv
		go func() {
			if err := s.ListenAndServe(); err != nil {
				errCh <- err
			}
		}()
	}

	// Shutdown on context done
	go func() {
		<-ctx.Done()
		for _, srv := range p.servers {
			_ = srv.Shutdown()
		}
	}()

	select {
	case err := <-errCh:
		return fmt.Errorf("dns proxy failed: %w", err)
	case <-time.After(200 * time.Millisecond):
		// small grace window; running fine
		return nil
	}
}

func (p *Proxy) serveDNS(w dns.ResponseWriter, r *dns.Msg) {
	if len(r.Question) == 0 {
		_ = w.WriteMsg(new(dns.Msg)) // empty response
		return
	}
	q := r.Question[0]
	domain := q.Name

	p.policyMu.RLock()
	currentPolicy := p.policy
	p.policyMu.RUnlock()
	if currentPolicy != nil && currentPolicy.Evaluate(domain) == policy.ActionDeny {
		p.publishBlocked(domain)
		resp := new(dns.Msg)
		resp.SetRcode(r, dns.RcodeNameError)
		_ = w.WriteMsg(resp)
		return
	}

	resp, err := p.forward(r)
	if err != nil {
		log.Warnf("[dns] forward error for %s: %v", domain, err)
		fail := new(dns.Msg)
		fail.SetRcode(r, dns.RcodeServerFailure)
		_ = w.WriteMsg(fail)
		return
	}
	p.maybeNotifyResolved(domain, resp)
	_ = w.WriteMsg(resp)
}

// maybeNotifyResolved calls onResolved synchronously when resp contains A/AAAA,
// so that IPs are in nft before the client receives the DNS response and connects.
func (p *Proxy) maybeNotifyResolved(domain string, resp *dns.Msg) {
	if p.onResolved == nil {
		return
	}
	ips := extractResolvedIPs(resp)
	if len(ips) == 0 {
		return
	}
	p.onResolved(domain, ips)
}

func (p *Proxy) forward(r *dns.Msg) (*dns.Msg, error) {
	c := &dns.Client{
		Timeout: 5 * time.Second,
		Dialer:  p.dialerWithMark(),
	}
	resp, _, err := c.Exchange(r, p.upstream)
	return resp, err
}

// UpstreamHost returns the host part of the upstream resolver, empty on parse error.
func (p *Proxy) UpstreamHost() string {
	host, _, err := net.SplitHostPort(p.upstream)
	if err != nil {
		return ""
	}
	return host
}

// UpdatePolicy swaps the in-memory policy used by the proxy.
// Passing nil reverts to the default deny-all policy.
func (p *Proxy) UpdatePolicy(newPolicy *policy.NetworkPolicy) {
	p.policyMu.Lock()
	p.policy = ensurePolicyDefaults(newPolicy)
	p.policyMu.Unlock()
}

// CurrentPolicy returns the policy currently enforced by the proxy.
func (p *Proxy) CurrentPolicy() *policy.NetworkPolicy {
	p.policyMu.RLock()
	defer p.policyMu.RUnlock()
	return p.policy
}

// SetOnResolved sets the callback invoked when an allowed domain resolves to A/AAAA.
// Called in a goroutine; pass nil to disable. Only used when L2 dynamic IP is enabled (e.g. dns+nft mode).
func (p *Proxy) SetOnResolved(fn func(domain string, ips []nftables.ResolvedIP)) {
	p.onResolved = fn
}

// SetBlockedBroadcaster wires a broadcaster used to notify blocked hostnames.
func (p *Proxy) SetBlockedBroadcaster(b *events.Broadcaster) {
	p.blockedBroadcaster = b
}

func (p *Proxy) publishBlocked(domain string) {
	if p.blockedBroadcaster == nil {
		return
	}
	normalized := strings.ToLower(strings.TrimSuffix(domain, "."))
	if normalized == "" {
		return
	}

	p.blockedBroadcaster.Publish(events.BlockedEvent{
		Hostname:  normalized,
		Timestamp: time.Now().UTC(),
	})
}

// extractResolvedIPs parses A and AAAA records from resp.Answer into ResolvedIP slice.
//
// Uses netip.ParseAddr(v.A.String()) which allocates a temporary string per record; typically
// one or a few records per resolution, so the cost is small compared to DNS RTT and nft writes.
func extractResolvedIPs(resp *dns.Msg) []nftables.ResolvedIP {
	if resp == nil || len(resp.Answer) == 0 {
		return nil
	}

	var out []nftables.ResolvedIP
	for _, rr := range resp.Answer {
		switch v := rr.(type) {
		case *dns.A:
			if v.A == nil {
				continue
			}
			addr, err := netip.ParseAddr(v.A.String())
			if err != nil {
				continue
			}
			out = append(out, nftables.ResolvedIP{Addr: addr, TTL: time.Duration(v.Hdr.Ttl) * time.Second})
		case *dns.AAAA:
			if v.AAAA == nil {
				continue
			}
			addr, err := netip.ParseAddr(v.AAAA.String())
			if err != nil {
				continue
			}
			out = append(out, nftables.ResolvedIP{Addr: addr, TTL: time.Duration(v.Hdr.Ttl) * time.Second})
		}
	}
	return out
}

const fallbackUpstream = "8.8.8.8:53"

func discoverUpstream() (string, error) {
	cfg, err := dns.ClientConfigFromFile("/etc/resolv.conf")
	if err != nil || len(cfg.Servers) == 0 {
		if err != nil {
			log.Warnf("[dns] fallback upstream resolver due to error: %v", err)
		}
		return fallbackUpstream, nil
	}
	// Prefer first non-loopback nameserver (e.g. K8s cluster DNS after 127.0.0.11).
	// If only loopback exists (e.g. Docker 127.0.0.11), use it: proxy upstream traffic
	// is marked and bypasses the redirect, so loopback is reachable from the sidecar.
	var chosen string
	for _, s := range cfg.Servers {
		if ip := net.ParseIP(s); ip != nil && ip.IsLoopback() {
			if chosen == "" {
				chosen = s
			}
			continue
		}
		chosen = s
		break
	}
	if chosen == "" {
		chosen = cfg.Servers[0]
	}
	return net.JoinHostPort(chosen, cfg.Port), nil
}

// ResolvNameserverIPs reads nameserver lines from resolvPath and returns parsed IPv4/IPv6 addresses.
// Used at startup to whitelist the system DNS so client traffic to it is allowed and proxy can use it as upstream.
func ResolvNameserverIPs(resolvPath string) ([]netip.Addr, error) {
	cfg, err := dns.ClientConfigFromFile(resolvPath)
	if err != nil || len(cfg.Servers) == 0 {
		return nil, nil
	}
	var out []netip.Addr
	for _, s := range cfg.Servers {
		ip, err := netip.ParseAddr(s)
		if err != nil {
			continue
		}
		out = append(out, ip)
	}
	return out, nil
}

// LoadPolicyFromEnvVar reads the given env var and parses a policy; empty falls back to default deny-all.
func LoadPolicyFromEnvVar(envName string) (*policy.NetworkPolicy, error) {
	raw := os.Getenv(envName)
	if raw == "" {
		return policy.DefaultDenyPolicy(), nil
	}
	return policy.ParsePolicy(raw)
}

func ensurePolicyDefaults(p *policy.NetworkPolicy) *policy.NetworkPolicy {
	if p == nil {
		return policy.DefaultDenyPolicy()
	}
	if p.DefaultAction == "" {
		p.DefaultAction = policy.ActionDeny
	}
	return p
}


================================================
FILE: components/egress/pkg/dnsproxy/proxy_linux.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build linux

package dnsproxy

import (
	"net"
	"sync"
	"syscall"
	"time"

	"golang.org/x/sys/unix"

	"github.com/alibaba/opensandbox/egress/pkg/constants"
	"github.com/alibaba/opensandbox/egress/pkg/log"
)

var exemptDialerLogOnce sync.Once

// dialerWithMark sets SO_MARK so iptables can RETURN marked packets (bypass
// redirect for proxy's own upstream DNS queries). When upstream is in the nameserver
// exempt list, returns a plain dialer (no mark) so upstream traffic follows normal
// routing (e.g. via tun); iptables still does not redirect by destination exempt.
func (p *Proxy) dialerWithMark() *net.Dialer {
	if UpstreamInExemptList(p.UpstreamHost()) {
		exemptDialerLogOnce.Do(func() {
			log.Infof("[dns] upstream %s in nameserver exempt list, not setting SO_MARK", p.UpstreamHost())
		})
		return &net.Dialer{Timeout: 5 * time.Second}
	}

	return &net.Dialer{
		Timeout: 5 * time.Second,
		Control: func(network, address string, c syscall.RawConn) error {
			var opErr error
			if err := c.Control(func(fd uintptr) {
				opErr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_MARK, constants.MarkValue)
			}); err != nil {
				return err
			}
			return opErr
		},
	}
}


================================================
FILE: components/egress/pkg/dnsproxy/proxy_other.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !linux

package dnsproxy

import (
	"net"
	"time"
)

// Non-linux: no SO_MARK; return basic dialer.
func (p *Proxy) dialerWithMark() *net.Dialer {
	return &net.Dialer{Timeout: 5 * time.Second}
}


================================================
FILE: components/egress/pkg/dnsproxy/proxy_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package dnsproxy

import (
	"net"
	"testing"
	"time"

	"github.com/miekg/dns"
	"github.com/stretchr/testify/require"

	"github.com/alibaba/opensandbox/egress/pkg/nftables"
	"github.com/alibaba/opensandbox/egress/pkg/policy"
)

func TestProxyUpdatePolicy(t *testing.T) {
	proxy, err := New(nil, "127.0.0.1:15353")
	require.NoError(t, err, "init proxy")

	require.NotNil(t, proxy.CurrentPolicy(), "expected default deny policy (non-nil)")
	require.Equal(t, policy.ActionDeny, proxy.CurrentPolicy().Evaluate("example.com."), "expected default deny")

	pol, err := policy.ParsePolicy(`{"defaultAction":"deny","egress":[{"action":"allow","target":"example.com"}]}`)
	require.NoError(t, err, "parse policy")

	proxy.UpdatePolicy(pol)
	require.NotNil(t, proxy.CurrentPolicy(), "expected policy after update")
	require.Equal(t, policy.ActionAllow, proxy.CurrentPolicy().Evaluate("example.com."), "policy evaluation mismatch")

	proxy.UpdatePolicy(nil)
	require.NotNil(t, proxy.CurrentPolicy(), "expected default deny policy after clearing")
	require.Equal(t, policy.ActionDeny, proxy.CurrentPolicy().Evaluate("example.com."), "expected default deny after clearing")
}

func TestLoadPolicyFromEnvVar(t *testing.T) {
	const envName = "TEST_EGRESS_POLICY"
	t.Setenv(envName, `{"defaultAction":"deny","egress":[{"action":"allow","target":"example.com"}]}`)

	pol, err := LoadPolicyFromEnvVar(envName)
	require.NoError(t, err, "unexpected error")
	require.NotNil(t, pol, "expected parsed policy")
	require.Equal(t, policy.ActionAllow, pol.Evaluate("example.com."), "expected parsed policy to allow example.com")

	t.Setenv(envName, "")
	pol, err = LoadPolicyFromEnvVar(envName)
	require.NoError(t, err, "unexpected error on empty env")
	require.NotNil(t, pol, "expected default deny policy when env is empty")
	require.Equal(t, policy.ActionDeny, pol.DefaultAction, "expected default deny when env is empty")
}

func TestExtractResolvedIPs(t *testing.T) {
	msg := new(dns.Msg)
	msg.Answer = []dns.RR{
		&dns.A{Hdr: dns.RR_Header{Name: "example.com.", Ttl: 120}, A: net.ParseIP("1.2.3.4")},
		&dns.AAAA{Hdr: dns.RR_Header{Name: "example.com.", Ttl: 60}, AAAA: net.ParseIP("2001:db8::1")},
		&dns.A{Hdr: dns.RR_Header{Name: "example.com.", Ttl: 90}, A: net.ParseIP("5.6.7.8")},
	}
	ips := extractResolvedIPs(msg)
	require.Len(t, ips, 3, "expected 3 IPs")
	// Order follows Answer; check first A and AAAA
	require.Equal(t, "1.2.3.4", ips[0].Addr.String(), "first IP mismatch")
	require.Equal(t, 120*time.Second, ips[0].TTL, "first IP TTL mismatch")
	require.Equal(t, "2001:db8::1", ips[1].Addr.String(), "second IP mismatch")
	require.Equal(t, 60*time.Second, ips[1].TTL, "second IP TTL mismatch")
	require.Equal(t, "5.6.7.8", ips[2].Addr.String(), "third IP mismatch")
	require.Equal(t, 90*time.Second, ips[2].TTL, "third IP TTL mismatch")
}

func TestExtractResolvedIPs_EmptyOrNil(t *testing.T) {
	require.Nil(t, extractResolvedIPs(nil), "nil msg: expected nil")
	msg := new(dns.Msg)
	require.Nil(t, extractResolvedIPs(msg), "empty answer: expected nil")
	msg.Answer = []dns.RR{&dns.CNAME{Hdr: dns.RR_Header{Name: "x."}, Target: "y."}}
	require.Nil(t, extractResolvedIPs(msg), "CNAME only: expected nil")
}

func TestSetOnResolved(t *testing.T) {
	proxy, err := New(policy.DefaultDenyPolicy(), "")
	require.NoError(t, err)
	var called bool
	var capturedDomain string
	var capturedIPs []nftables.ResolvedIP
	proxy.SetOnResolved(func(domain string, ips []nftables.ResolvedIP) {
		called = true
		capturedDomain = domain
		capturedIPs = ips
	})
	require.NotNil(t, proxy.onResolved, "SetOnResolved did not set callback")
	proxy.SetOnResolved(nil)
	require.Nil(t, proxy.onResolved, "SetOnResolved(nil) did not clear callback")
	_ = called
	_ = capturedDomain
	_ = capturedIPs
}

func TestMaybeNotifyResolved_CallsCallbackWhenAOrAAAA(t *testing.T) {
	proxy, err := New(policy.DefaultDenyPolicy(), "")
	require.NoError(t, err)
	ch := make(chan struct {
		domain string
		ips    []nftables.ResolvedIP
	}, 1)
	proxy.SetOnResolved(func(domain string, ips []nftables.ResolvedIP) {
		ch <- struct {
			domain string
			ips    []nftables.ResolvedIP
		}{domain, ips}
	})

	msg := new(dns.Msg)
	msg.Answer = []dns.RR{
		&dns.A{Hdr: dns.RR_Header{Name: "example.com.", Ttl: 120}, A: net.ParseIP("1.2.3.4")},
	}
	proxy.maybeNotifyResolved("example.com.", msg)

	select {
	case got := <-ch:
		require.Equal(t, "example.com.", got.domain, "domain mismatch")
		require.Len(t, got.ips, 1, "expected one resolved IP")
		require.Equal(t, "1.2.3.4", got.ips[0].Addr.String(), "resolved IP mismatch")
	case <-time.After(2 * time.Second):
		require.FailNow(t, "callback was not invoked")
	}
}

func TestMaybeNotifyResolved_NoCallWhenOnResolvedNil(t *testing.T) {
	proxy, err := New(policy.DefaultDenyPolicy(), "")
	require.NoError(t, err)
	msg := new(dns.Msg)
	msg.Answer = []dns.RR{&dns.A{Hdr: dns.RR_Header{Name: "x.", Ttl: 60}, A: net.ParseIP("10.0.0.1")}}
	proxy.maybeNotifyResolved("x.", msg)
	// No callback set; should not panic. No assertion needed.
}

func TestMaybeNotifyResolved_NoCallWhenNoAOrAAAA(t *testing.T) {
	proxy, err := New(policy.DefaultDenyPolicy(), "")
	require.NoError(t, err)
	ch := make(chan struct {
		domain string
		ips    []nftables.ResolvedIP
	}, 1)
	proxy.SetOnResolved(func(domain string, ips []nftables.ResolvedIP) {
		ch <- struct {
			domain string
			ips    []nftables.ResolvedIP
		}{domain, ips}
	})

	msg := new(dns.Msg)
	msg.Answer = []dns.RR{&dns.CNAME{Hdr: dns.RR_Header{Name: "x."}, Target: "y."}}
	proxy.maybeNotifyResolved("x.", msg)

	select {
	case <-ch:
		require.FailNow(t, "callback should not be invoked when resp has no A/AAAA")
	case <-time.After(200 * time.Millisecond):
		// Expected: no callback
	}
}


================================================
FILE: components/egress/pkg/events/broadcaster.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package events

import (
	"context"
	"sync"
	"sync/atomic"
	"time"

	"github.com/alibaba/opensandbox/egress/pkg/log"
)

const defaultQueueSize = 128

// BlockedEvent describes a blocked hostname notification.
type BlockedEvent struct {
	Hostname  string    `json:"hostname"`
	Timestamp time.Time `json:"timestamp"`
}

// Subscriber consumes blocked events.
type Subscriber interface {
	HandleBlocked(ctx context.Context, ev BlockedEvent)
}

// BroadcasterConfig defines queue sizing for the broadcaster.
type BroadcasterConfig struct {
	QueueSize int
}

// Broadcaster fans out blocked events to one or more subscribers via channels.
type Broadcaster struct {
	ctx    context.Context
	cancel context.CancelFunc

	mu          sync.RWMutex
	subscribers []chan BlockedEvent
	queueSize   int
	closed      atomic.Bool
}

// NewBroadcaster builds a broadcaster with the given queue size (defaults to 128).
func NewBroadcaster(ctx context.Context, cfg BroadcasterConfig) *Broadcaster {
	if cfg.QueueSize <= 0 {
		cfg.QueueSize = defaultQueueSize
	}
	cctx, cancel := context.WithCancel(ctx)
	return &Broadcaster{
		ctx:       cctx,
		cancel:    cancel,
		queueSize: cfg.QueueSize,
	}
}

// AddSubscriber registers a new subscriber with its own buffered queue and worker.
func (b *Broadcaster) AddSubscriber(sub Subscriber) {
	if sub == nil {
		return
	}
	ch := make(chan BlockedEvent, b.queueSize)

	b.mu.Lock()
	b.subscribers = append(b.subscribers, ch)
	b.mu.Unlock()

	go func() {
		for {
			select {
			case <-b.ctx.Done():
				return
			case ev, ok := <-ch:
				if !ok {
					return
				}
				sub.HandleBlocked(b.ctx, ev)
			}
		}
	}()
}

// Publish sends an event to all subscribers; drops and logs when a subscriber queue is full.
func (b *Broadcaster) Publish(event BlockedEvent) {
	if b.closed.Load() {
		return
	}

	b.mu.RLock()
	defer b.mu.RUnlock()

	for _, ch := range b.subscribers {
		select {
		case ch <- event:
		default:
			log.Warnf("[events] blocked-event queue full; dropping hostname %s", event.Hostname)
		}
	}
}

// Close stops all workers and closes subscriber queues.
func (b *Broadcaster) Close() {
	if b.closed.Load() {
		return
	}

	b.cancel()

	b.mu.Lock()
	defer b.mu.Unlock()
	subs := b.subscribers
	b.subscribers = nil

	for _, ch := range subs {
		close(ch)
	}
	b.closed.Store(true)
}


================================================
FILE: components/egress/pkg/events/events_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package events

import (
	"context"
	"encoding/json"
	"io"
	"net/http"
	"net/http/httptest"
	"testing"
	"time"

	"github.com/alibaba/opensandbox/egress/pkg/constants"
	"github.com/stretchr/testify/require"
)

type captureSubscriber struct {
	recv chan BlockedEvent
}

func (c *captureSubscriber) HandleBlocked(_ context.Context, ev BlockedEvent) {
	c.recv <- ev
}

type blockingSubscriber struct {
	block chan struct{}
}

func (b *blockingSubscriber) HandleBlocked(_ context.Context, ev BlockedEvent) {
	// Block until the channel is closed to simulate a slow consumer and trigger backpressure.
	<-b.block
	_ = ev
}

func TestBroadcasterFanout(t *testing.T) {
	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	b := NewBroadcaster(ctx, BroadcasterConfig{QueueSize: 2})

	sub1 := &captureSubscriber{recv: make(chan BlockedEvent, 1)}
	sub2 := &captureSubscriber{recv: make(chan BlockedEvent, 1)}
	b.AddSubscriber(sub1)
	b.AddSubscriber(sub2)

	ev := BlockedEvent{Hostname: "example.com.", Timestamp: time.Now()}
	b.Publish(ev)

	select {
	case got := <-sub1.recv:
		require.Equal(t, ev.Hostname, got.Hostname, "sub1 expected hostname")
	case <-time.After(2 * time.Second):
		require.FailNow(t, "sub1 did not receive event")
	}

	select {
	case got := <-sub2.recv:
		require.Equal(t, ev.Hostname, got.Hostname, "sub2 expected hostname")
	case <-time.After(2 * time.Second):
		require.FailNow(t, "sub2 did not receive event")
	}

	b.Close()
}

func TestBroadcasterDropsWhenSubscriberBackedUp(t *testing.T) {
	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	// Small queue; blocking subscriber will hold the first event.
	b := NewBroadcaster(ctx, BroadcasterConfig{QueueSize: 1})
	block := make(chan struct{})
	sub := &blockingSubscriber{block: block}
	b.AddSubscriber(sub)

	ev1 := BlockedEvent{Hostname: "first.example", Timestamp: time.Now()}
	ev2 := BlockedEvent{Hostname: "second.example", Timestamp: time.Now()}

	b.Publish(ev1)
	// This publish should drop because subscriber is blocked and queue size is 1.
	b.Publish(ev2)

	// Allow subscriber to drain and exit.
	close(block)

	b.Close()
}

func TestWebhookSubscriberSendsPayload(t *testing.T) {
	var (
		gotMethod  string
		gotPayload webhookPayload
	)
	const (
		sandboxIDInitial = "sandbox-test"
		sandboxIDLater   = "sandbox-updated"
	)
	t.Setenv(constants.ENVSandboxID, sandboxIDInitial)

	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		gotMethod = r.Method
		body, _ := io.ReadAll(r.Body)
		_ = r.Body.Close()
		_ = json.Unmarshal(body, &gotPayload)
		w.WriteHeader(http.StatusOK)
	}))
	defer server.Close()

	sub := NewWebhookSubscriber(server.URL)
	require.NotNil(t, sub, "webhook subscriber should not be nil")
	t.Setenv(constants.ENVSandboxID, sandboxIDLater)

	ts := time.Date(2026, 1, 2, 3, 4, 5, 0, time.UTC)
	ev := BlockedEvent{Hostname: "Example.com.", Timestamp: ts}
	sub.HandleBlocked(context.Background(), ev)

	require.Equal(t, http.MethodPost, gotMethod, "expected POST")
	require.Equal(t, ev.Hostname, gotPayload.Hostname, "expected hostname")
	require.Equal(t, webhookSource, gotPayload.Source, "expected source")
	require.Equal(t, sandboxIDInitial, gotPayload.SandboxID, "expected sandboxId captured at init")
	require.NotEmpty(t, gotPayload.Timestamp, "expected timestamp to be set")
}


================================================
FILE: components/egress/pkg/events/webhook.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package events

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"os"
	"time"

	"github.com/alibaba/opensandbox/egress/pkg/constants"
	"github.com/alibaba/opensandbox/egress/pkg/log"
)

const (
	webhookSource         = "opensandbox-egress"
	defaultWebhookTimeout = 5 * time.Second
	defaultWebhookRetries = 3
	defaultWebhookBackoff = 1 * time.Second
)

// WebhookSubscriber delivers blocked events to an HTTP endpoint.
type WebhookSubscriber struct {
	url        string
	client     *http.Client
	timeout    time.Duration
	maxRetries int
	backoff    time.Duration
	sandboxID  string
}

type webhookPayload struct {
	Hostname  string `json:"hostname"`
	Timestamp string `json:"timestamp"`
	Source    string `json:"source"`
	SandboxID string `json:"sandboxId"`
}

// NewWebhookSubscriber builds a webhook subscriber with hardcoded timeout/retry settings.
func NewWebhookSubscriber(url string) *WebhookSubscriber {
	if url == "" {
		return nil
	}
	return &WebhookSubscriber{
		url:        url,
		client:     &http.Client{},
		timeout:    defaultWebhookTimeout,
		maxRetries: defaultWebhookRetries,
		backoff:    defaultWebhookBackoff,
		sandboxID:  os.Getenv(constants.ENVSandboxID),
	}
}

// HandleBlocked sends the blocked event to the configured webhook with retries.
func (w *WebhookSubscriber) HandleBlocked(ctx context.Context, ev BlockedEvent) {
	payload := webhookPayload{
		Hostname:  ev.Hostname,
		Timestamp: ev.Timestamp.UTC().Format(time.RFC3339),
		Source:    webhookSource,
		SandboxID: w.sandboxID,
	}
	body, err := json.Marshal(payload)
	if err != nil {
		log.Warnf("[webhook] failed to marshal payload for hostname %s: %v", ev.Hostname, err)
		return
	}

	var lastErr error
	for attempt := 0; attempt <= w.maxRetries; attempt++ {
		reqCtx := ctx
		cancel := func() {}
		if w.timeout > 0 {
			reqCtx, cancel = context.WithTimeout(ctx, w.timeout)
		}

		req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, w.url, bytes.NewReader(body))
		if err != nil {
			cancel()
			lastErr = err
			break
		}
		req.Header.Set("Content-Type", "application/json")

		resp, err := w.client.Do(req)
		if err == nil {
			_, _ = io.Copy(io.Discard, resp.Body)
			_ = resp.Body.Close()
			if resp.StatusCode < 300 {
				cancel()
				return
			}
			if resp.StatusCode < 500 {
				cancel()
				log.Warnf("[webhook] non-retriable status %d for hostname %s", resp.StatusCode, payload.Hostname)
				return
			}
			err = fmt.Errorf("status %d", resp.StatusCode)
		}

		cancel()
		lastErr = err
		if attempt < w.maxRetries {
			time.Sleep(w.backoff * time.Duration(1<<attempt))
		}
	}

	if lastErr != nil {
		log.Warnf("[webhook] failed to notify hostname %s after retries: %v", payload.Hostname, lastErr)
	}
}


================================================
FILE: components/egress/pkg/iptables/redirect.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package iptables

import (
	"fmt"
	"net/netip"
	"os/exec"
	"strconv"

	"github.com/alibaba/opensandbox/egress/pkg/constants"
	"github.com/alibaba/opensandbox/egress/pkg/log"
)

// SetupRedirect installs OUTPUT nat redirect for DNS (udp/tcp 53 -> port).
//
// exemptDst: optional list of destination IPs; traffic to these is not redirected. Packets carrying mark are also RETURNed (proxy's own upstream). Requires CAP_NET_ADMIN.
func SetupRedirect(port int, exemptDst []netip.Addr) error {
	log.Infof("installing iptables DNS redirect: OUTPUT port 53 -> %d (mark %s bypass)", port, constants.MarkHex)
	targetPort := strconv.Itoa(port)

	var rules [][]string
	for _, d := range exemptDst {
		addr := d
		dStr := d.String()
		if addr.Is4() {
			rules = append(rules,
				[]string{"iptables", "-t", "nat", "-A", "OUTPUT", "-p", "udp", "--dport", "53", "-d", dStr, "-j", "RETURN"},
				[]string{"iptables", "-t", "nat", "-A", "OUTPUT", "-p", "tcp", "--dport", "53", "-d", dStr, "-j", "RETURN"},
			)
		} else {
			rules = append(rules,
				[]string{"ip6tables", "-t", "nat", "-A", "OUTPUT", "-p", "udp", "--dport", "53", "-d", dStr, "-j", "RETURN"},
				[]string{"ip6tables", "-t", "nat", "-A", "OUTPUT", "-p", "tcp", "--dport", "53", "-d", dStr, "-j", "RETURN"},
			)
		}
	}
	// Bypass packets marked by the proxy itself (see dnsproxy dialer).
	markAndRedirect := [][]string{
		{"iptables", "-t", "nat", "-A", "OUTPUT", "-p", "udp", "--dport", "53", "-m", "mark", "--mark", constants.MarkHex, "-j", "RETURN"},
		{"iptables", "-t", "nat", "-A", "OUTPUT", "-p", "tcp", "--dport", "53", "-m", "mark", "--mark", constants.MarkHex, "-j", "RETURN"},
		// Redirect all other DNS traffic to local proxy port.
		{"iptables", "-t", "nat", "-A", "OUTPUT", "-p", "udp", "--dport", "53", "-j", "REDIRECT", "--to-port", targetPort},
		{"iptables", "-t", "nat", "-A", "OUTPUT", "-p", "tcp", "--dport", "53", "-j", "REDIRECT", "--to-port", targetPort},
		// IPv6 equivalents (ip6tables)
		{"ip6tables", "-t", "nat", "-A", "OUTPUT", "-p", "udp", "--dport", "53", "-m", "mark", "--mark", constants.MarkHex, "-j", "RETURN"},
		{"ip6tables", "-t", "nat", "-A", "OUTPUT", "-p", "tcp", "--dport", "53", "-m", "mark", "--mark", constants.MarkHex, "-j", "RETURN"},
		{"ip6tables", "-t", "nat", "-A", "OUTPUT", "-p", "udp", "--dport", "53", "-j", "REDIRECT", "--to-port", targetPort},
		{"ip6tables", "-t", "nat", "-A", "OUTPUT", "-p", "tcp", "--dport", "53", "-j", "REDIRECT", "--to-port", targetPort},
	}
	rules = append(rules, markAndRedirect...)

	for _, args := range rules {
		if output, err := exec.Command(args[0], args[1:]...).CombinedOutput(); err != nil {
			return fmt.Errorf("iptables command failed: %v (output: %s)", err, output)
		}
	}
	log.Infof("iptables DNS redirect installed successfully")
	return nil
}


================================================
FILE: components/egress/pkg/log/logger.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package log

import (
	"context"
	"os"

	slogger "github.com/alibaba/opensandbox/internal/logger"
)

// Logger is the shared logger instance for egress.
var Logger slogger.Logger = slogger.MustNew(slogger.Config{Level: "info"}).Named("opensandbox.egress")

// WithLogger replaces the global logger used by egress components.
func WithLogger(ctx context.Context, logger slogger.Logger) context.Context {
	if logger != nil {
		Logger = logger
	}
	return ctx
}

func Debugf(template string, args ...any) {
	Logger.Debugf(template, args...)
}

func Infof(template string, args ...any) {
	Logger.Infof(template, args...)
}

func Warnf(template string, args ...any) {
	Logger.Warnf(template, args...)
}

func Errorf(template string, args ...any) {
	Logger.Errorf(template, args...)
}

func Fatalf(template string, args ...any) {
	Logger.Errorf(template, args...)
	_ = Logger.Sync()
	os.Exit(1)
}


================================================
FILE: components/egress/pkg/nftables/dynamic.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package nftables

import (
	"fmt"
	"net/netip"
	"strings"
	"time"
)

const (
	dynAllowV4Set  = "dyn_allow_v4"
	dynAllowV6Set  = "dyn_allow_v6"
	dynSetTimeoutS = 300
	minTTLSec      = 60
	maxTTLSec      = 300
)

// ResolvedIP is a single IP learned from DNS with TTL for dynamic nft set.
type ResolvedIP struct {
	Addr netip.Addr
	TTL  time.Duration
}

// buildAddResolvedIPsScript returns a nft script fragment that
// adds resolved IPs to dyn_allow_v4/v6 with timeout.
func buildAddResolvedIPsScript(table string, ips []ResolvedIP) string {
	var v4, v6 []string
	for _, r := range ips {
		sec := clampTTL(r.TTL)
		if r.Addr.Is4() {
			v4 = append(v4, fmt.Sprintf("%s timeout %ds", r.Addr.String(), sec))
		} else if r.Addr.Is6() {
			v6 = append(v6, fmt.Sprintf("%s timeout %ds", r.Addr.String(), sec))
		}
	}
	var b strings.Builder
	if len(v4) > 0 {
		fmt.Fprintf(&b, "add element inet %s %s { %s }\n", table, dynAllowV4Set, strings.Join(v4, ", "))
	}
	if len(v6) > 0 {
		fmt.Fprintf(&b, "add element inet %s %s { %s }\n", table, dynAllowV6Set, strings.Join(v6, ", "))
	}
	return b.String()
}

func clampTTL(d time.Duration) int {
	sec := int(d.Seconds())
	if sec < minTTLSec {
		return minTTLSec
	}
	if sec > maxTTLSec {
		return maxTTLSec
	}
	return sec
}


================================================
FILE: components/egress/pkg/nftables/manager.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package nftables

import (
	"context"
	"fmt"
	"os/exec"
	"strings"
	"sync"

	"github.com/alibaba/opensandbox/egress/pkg/constants"
	"github.com/alibaba/opensandbox/egress/pkg/log"
	"github.com/alibaba/opensandbox/egress/pkg/policy"
)

const (
	tableName     = "opensandbox"
	chainName     = "egress"
	allowV4Set    = "allow_v4"
	allowV6Set    = "allow_v6"
	denyV4Set     = "deny_v4"
	denyV6Set     = "deny_v6"
	dohBlockV4Set = "doh_block_v4"
	dohBlockV6Set = "doh_block_v6"
)

type runner func(ctx context.Context, script string) ([]byte, error)

// Options controls nftables enforcement extras.
type Options struct {
	// BlockDoT drops tcp/udp 853 to prevent DNS-over-TLS bypass.
	BlockDoT bool
	// BlockDoH443 drops HTTPS DoH endpoints; when blocklist is empty and enabled, 443 is dropped.
	BlockDoH443    bool
	DoHBlocklistV4 []string
	DoHBlocklistV6 []string
}

// Manager applies static IP/CIDR policy into nftables and dynamic DNS-learned IPs.
type Manager struct {
	run  runner
	opts Options
	mu   sync.Mutex
}

// NewManager builds an nftables manager that shells out to `nft -f -` with defaults.
func NewManager() *Manager {
	return &Manager{run: defaultRunner, opts: Options{BlockDoT: true}}
}

// NewManagerWithRunner is for tests; allows capturing the rendered ruleset (defaults to BlockDoT=true).
func NewManagerWithRunner(r runner) *Manager {
	return &Manager{run: r, opts: Options{BlockDoT: true}}
}

// NewManagerWithRunnerAndOptions is for tests needing custom options.
func NewManagerWithRunnerAndOptions(r runner, opts Options) *Manager {
	return &Manager{run: r, opts: opts}
}

// NewManagerWithOptions allows customizing behavior (used by main()).
func NewManagerWithOptions(opts Options) *Manager {
	return &Manager{run: defaultRunner, opts: opts}
}

// ApplyStatic reconciles static allow/deny IP and CIDR entries into nftables.
//
// It creates a dedicated table/chain and overwrites previous state.
// Uses the same mutex as AddResolvedIPs so a /policy update never overlaps a DNS
// callback: without this, add-element could run while the table is being deleted/recreated
// and fail, causing a transient deny for a client that already got an allowed DNS answer.
func (m *Manager) ApplyStatic(ctx context.Context, p *policy.NetworkPolicy) error {
	if p == nil {
		p = policy.DefaultDenyPolicy()
	}
	allowV4, allowV6, denyV4, denyV6 := p.StaticIPSets()
	log.Infof("nftables: applying static policy: default=%s, allow_v4=%d, allow_v6=%d, deny_v4=%d, deny_v6=%d",
		p.DefaultAction, len(allowV4), len(allowV6), len(denyV4), len(denyV6))
	m.mu.Lock()
	defer m.mu.Unlock()
	script := buildRuleset(p, m.opts)
	if _, err := m.run(ctx, script); err != nil {
		// On a fresh host the delete-table may fail; retry once without the delete line.
		if isMissingTableError(err) {
			fallback := removeDeleteTableLine(script)
			if fallback != script {
				if _, retryErr := m.run(ctx, fallback); retryErr == nil {
					return nil
				}
			}
		}
		return err
	}
	log.Infof("nftables: static policy applied successfully")
	return nil
}

// AddResolvedIPs adds DNS-learned IPs to dynamic allow sets with TTL-based timeout.
// TTL is clamped to minTTLSec–maxTTLSec. Call only when table exists (dns+nft mode).
func (m *Manager) AddResolvedIPs(ctx context.Context, ips []ResolvedIP) error {
	if len(ips) == 0 {
		return nil
	}

	m.mu.Lock()
	defer m.mu.Unlock()
	script := buildAddResolvedIPsScript(tableName, ips)
	if script == "" {
		return nil
	}
	log.Infof("nftables: adding %d resolved IP(s) to dynamic allow sets with script statement %s", len(ips), script)
	_, err := m.run(ctx, script)
	return err
}

func buildRuleset(p *policy.NetworkPolicy, opts Options) string {
	allowV4, allowV6, denyV4, denyV6 := p.StaticIPSets()

	var b strings.Builder
	// Reset and re-create table, sets, and chain.
	fmt.Fprintf(&b, "delete table inet %s\n", tableName)
	fmt.Fprintf(&b, "add table inet %s\n", tableName)

	fmt.Fprintf(&b, "add set inet %s %s { type ipv4_addr; flags interval; }\n", tableName, allowV4Set)
	fmt.Fprintf(&b, "add set inet %s %s { type ipv4_addr; flags interval; }\n", tableName, denyV4Set)
	fmt.Fprintf(&b, "add set inet %s %s { type ipv6_addr; flags interval; }\n", tableName, allowV6Set)
	fmt.Fprintf(&b, "add set inet %s %s { type ipv6_addr; flags interval; }\n", tableName, denyV6Set)
	fmt.Fprintf(&b, "add set inet %s %s { type ipv4_addr; timeout %ds; }\n", tableName, dynAllowV4Set, dynSetTimeoutS)
	fmt.Fprintf(&b, "add set inet %s %s { type ipv6_addr; timeout %ds; }\n", tableName, dynAllowV6Set, dynSetTimeoutS)

	if len(opts.DoHBlocklistV4) > 0 {
		fmt.Fprintf(&b, "add set inet %s %s { type ipv4_addr; flags interval; }\n", tableName, dohBlockV4Set)
	}
	if len(opts.DoHBlocklistV6) > 0 {
		fmt.Fprintf(&b, "add set inet %s %s { type ipv6_addr; flags interval; }\n", tableName, dohBlockV6Set)
	}

	writeElements(&b, allowV4Set, allowV4)
	writeElements(&b, denyV4Set, denyV4)
	writeElements(&b, allowV6Set, allowV6)
	writeElements(&b, denyV6Set, denyV6)
	writeElements(&b, dohBlockV4Set, opts.DoHBlocklistV4)
	writeElements(&b, dohBlockV6Set, opts.DoHBlocklistV6)

	chainPolicy := "drop"
	if p.DefaultAction == policy.ActionAllow {
		chainPolicy = "accept"
	}
	fmt.Fprintf(&b, "add chain inet %s %s { type filter hook output priority 0; policy %s; }\n", tableName, chainName, chainPolicy)
	fmt.Fprintf(&b, "add rule inet %s %s ct state established,related accept\n", tableName, chainName)
	fmt.Fprintf(&b, "add rule inet %s %s meta mark %s accept\n", tableName, chainName, constants.MarkHex)
	fmt.Fprintf(&b, "add rule inet %s %s oifname \"lo\" accept\n", tableName, chainName)
	if opts.BlockDoT {
		fmt.Fprintf(&b, "add rule inet %s %s tcp dport 853 drop\n", tableName, chainName)
		fmt.Fprintf(&b, "add rule inet %s %s udp dport 853 drop\n", tableName, chainName)
	}
	if opts.BlockDoH443 {
		if len(opts.DoHBlocklistV4) == 0 && len(opts.DoHBlocklistV6) == 0 {
			// strict: drop all 443 when enabled but no blocklist provided
			fmt.Fprintf(&b, "add rule inet %s %s tcp dport 443 drop\n", tableName, chainName)
		} else {
			if len(opts.DoHBlocklistV4) > 0 {
				fmt.Fprintf(&b, "add rule inet %s %s ip daddr @%s tcp dport 443 drop\n", tableName, chainName, dohBlockV4Set)
			}
			if len(opts.DoHBlocklistV6) > 0 {
				fmt.Fprintf(&b, "add rule inet %s %s ip6 daddr @%s tcp dport 443 drop\n", tableName, chainName, dohBlockV6Set)
			}
		}
	}
	fmt.Fprintf(&b, "add rule inet %s %s ip daddr @%s drop\n", tableName, chainName, denyV4Set)
	fmt.Fprintf(&b, "add rule inet %s %s ip6 daddr @%s drop\n", tableName, chainName, denyV6Set)
	fmt.Fprintf(&b, "add rule inet %s %s ip daddr @%s accept\n", tableName, chainName, dynAllowV4Set)
	fmt.Fprintf(&b, "add rule inet %s %s ip6 daddr @%s accept\n", tableName, chainName, dynAllowV6Set)
	fmt.Fprintf(&b, "add rule inet %s %s ip daddr @%s accept\n", tableName, chainName, allowV4Set)
	fmt.Fprintf(&b, "add rule inet %s %s ip6 daddr @%s accept\n", tableName, chainName, allowV6Set)
	if chainPolicy == "drop" {
		fmt.Fprintf(&b, "add rule inet %s %s counter drop\n", tableName, chainName)
	}

	return b.String()
}

func writeElements(b *strings.Builder, setName string, elems []string) {
	if len(elems) == 0 {
		return
	}
	fmt.Fprintf(b, "add element inet %s %s { %s }\n", tableName, setName, strings.Join(elems, ", "))
}

func defaultRunner(ctx context.Context, script string) ([]byte, error) {
	cmd := exec.CommandContext(ctx, "nft", "-f", "-")
	cmd.Stdin = strings.NewReader(script)
	output, err := cmd.CombinedOutput()
	if err != nil {
		return output, fmt.Errorf("nft apply failed: %w (output: %s)", err, strings.TrimSpace(string(output)))
	}
	return output, nil
}

func isMissingTableError(err error) bool {
	if err == nil {
		return false
	}
	msg := strings.ToLower(err.Error())
	return strings.Contains(msg, "no such file or directory") && strings.Contains(msg, "delete table inet "+tableName)
}

func removeDeleteTableLine(script string) string {
	lines := strings.Split(script, "\n")
	var filtered []string
	for _, l := range lines {
		if strings.HasPrefix(l, "delete table inet "+tableName) {
			continue
		}
		if strings.TrimSpace(l) == "" {
			continue
		}
		filtered = append(filtered, l)
	}
	return strings.Join(filtered, "\n")
}


================================================
FILE: components/egress/pkg/nftables/manager_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package nftables

import (
	"context"
	"fmt"
	"net/netip"
	"testing"
	"time"

	"github.com/alibaba/opensandbox/egress/pkg/policy"
	"github.com/stretchr/testify/require"
)

func TestApplyStatic_BuildsRuleset_DefaultDeny(t *testing.T) {
	var rendered string
	m := NewManagerWithRunner(func(_ context.Context, script string) ([]byte, error) {
		rendered = script
		return nil, nil
	})

	p, err := policy.ParsePolicy(`{
		"defaultAction":"deny",
		"egress":[
			{"action":"allow","target":"1.1.1.1"},
			{"action":"allow","target":"2.2.0.0/16"},
			{"action":"deny","target":"2001:db8::/32"}
		]
	}`)
	require.NoError(t, err, "unexpected parse error")

	require.NoError(t, m.ApplyStatic(context.Background(), p), "ApplyStatic returned error")

	expectContains(t, rendered, "add chain inet opensandbox egress { type filter hook output priority 0; policy drop; }")
	expectContains(t, rendered, "add rule inet opensandbox egress ct state established,related accept")
	expectContains(t, rendered, "add rule inet opensandbox egress meta mark 0x1 accept")
	expectContains(t, rendered, "add rule inet opensandbox egress oifname \"lo\" accept")
	expectContains(t, rendered, "add rule inet opensandbox egress tcp dport 853 drop")
	expectContains(t, rendered, "add rule inet opensandbox egress udp dport 853 drop")
	expectContains(t, rendered, "add set inet opensandbox dyn_allow_v4 { type ipv4_addr; timeout 300s; }")
	expectContains(t, rendered, "add set inet opensandbox dyn_allow_v6 { type ipv6_addr; timeout 300s; }")
	expectContains(t, rendered, "add element inet opensandbox allow_v4 { 1.1.1.1, 2.2.0.0/16 }")
	expectContains(t, rendered, "add element inet opensandbox deny_v6 { 2001:db8::/32 }")
	expectContains(t, rendered, "add rule inet opensandbox egress ip daddr @dyn_allow_v4 accept")
	expectContains(t, rendered, "add rule inet opensandbox egress ip6 daddr @dyn_allow_v6 accept")
	expectContains(t, rendered, "add rule inet opensandbox egress counter drop")
}

func TestApplyStatic_DefaultAllowUsesAcceptPolicy(t *testing.T) {
	var rendered string
	m := NewManagerWithRunner(func(_ context.Context, script string) ([]byte, error) {
		rendered = script
		return nil, nil
	})

	p, err := policy.ParsePolicy(`{
		"defaultAction":"allow",
		"egress":[{"action":"deny","target":"10.0.0.0/8"}]
	}`)
	require.NoError(t, err, "unexpected parse error")

	require.NoError(t, m.ApplyStatic(context.Background(), p), "ApplyStatic returned error")

	expectContains(t, rendered, "policy accept;")
	expectContains(t, rendered, "add rule inet opensandbox egress tcp dport 853 drop")
	require.NotContains(t, rendered, "counter drop", "did not expect drop counter when defaultAction is allow:\n%s", rendered)
	expectContains(t, rendered, "add element inet opensandbox deny_v4 { 10.0.0.0/8 }")
}

func expectContains(t *testing.T, s, substr string) {
	t.Helper()
	require.Contains(t, s, substr, "expected rendered ruleset to contain %q\nrendered:\n%s", substr, s)
}

func TestApplyStatic_RetryWhenTableMissing(t *testing.T) {
	var calls int
	var scripts []string
	m := NewManagerWithRunner(func(_ context.Context, script string) ([]byte, error) {
		calls++
		scripts = append(scripts, script)
		if calls == 1 {
			return nil, fmt.Errorf("nft apply failed: exit status 1 (output: /dev/stdin:1:19-29: Error: No such file or directory; did you mean table ‘opensandbox’ in family inet?\ndelete table inet opensandbox\n                  ^^^^^^^^^^^)")
		}
		return nil, nil
	})

	p, _ := policy.ParsePolicy(`{"egress":[]}`)
	require.NoError(t, m.ApplyStatic(context.Background(), p), "expected retry to succeed")
	require.Equal(t, 2, calls, "expected 2 calls (fail then retry)")
	require.GreaterOrEqual(t, len(scripts), 2, "expected second attempt script to be recorded")
	require.NotContains(t, scripts[1], "delete table inet opensandbox", "expected second attempt to drop delete-table line")
}

func TestApplyStatic_DoHBlocklist(t *testing.T) {
	var rendered string
	opts := Options{
		BlockDoT:       true,
		BlockDoH443:    true,
		DoHBlocklistV4: []string{"9.9.9.9"},
		DoHBlocklistV6: []string{"2001:db8::/32"},
	}
	m := NewManagerWithRunnerAndOptions(func(_ context.Context, script string) ([]byte, error) {
		rendered = script
		return nil, nil
	}, opts)

	p, _ := policy.ParsePolicy(`{"defaultAction":"allow","egress":[]}`)
	require.NoError(t, m.ApplyStatic(context.Background(), p), "ApplyStatic returned error")

	expectContains(t, rendered, "add set inet opensandbox doh_block_v4 { type ipv4_addr; flags interval; }")
	expectContains(t, rendered, "add element inet opensandbox doh_block_v4 { 9.9.9.9 }")
	expectContains(t, rendered, "add rule inet opensandbox egress ip daddr @doh_block_v4 tcp dport 443 drop")
	expectContains(t, rendered, "add rule inet opensandbox egress ip6 daddr @doh_block_v6 tcp dport 443 drop")
}

func TestAddResolvedIPs_BuildsDynamicElements(t *testing.T) {
	var rendered string
	m := NewManagerWithRunner(func(_ context.Context, script string) ([]byte, error) {
		rendered = script
		return nil, nil
	})
	ips := []ResolvedIP{
		{Addr: netip.MustParseAddr("1.1.1.1"), TTL: 120 * time.Second},
		{Addr: netip.MustParseAddr("2001:db8::1"), TTL: 60 * time.Second},
	}
	require.NoError(t, m.AddResolvedIPs(context.Background(), ips), "AddResolvedIPs returned error")
	expectContains(t, rendered, "add element inet opensandbox dyn_allow_v4 { 1.1.1.1 timeout 120s }")
	expectContains(t, rendered, "add element inet opensandbox dyn_allow_v6 { 2001:db8::1 timeout 60s }")
}

func TestAddResolvedIPs_ClampsTTL(t *testing.T) {
	var rendered string
	m := NewManagerWithRunner(func(_ context.Context, script string) ([]byte, error) {
		rendered = script
		return nil, nil
	})
	ips := []ResolvedIP{
		{Addr: netip.MustParseAddr("10.0.0.1"), TTL: 10 * time.Second},
		{Addr: netip.MustParseAddr("10.0.0.2"), TTL: 9999 * time.Second},
	}
	require.NoError(t, m.AddResolvedIPs(context.Background(), ips), "AddResolvedIPs returned error")
	expectContains(t, rendered, "10.0.0.1 timeout 60s")
	expectContains(t, rendered, "10.0.0.2 timeout 300s")
}

func TestAddResolvedIPs_EmptyNoOp(t *testing.T) {
	m := NewManagerWithRunner(func(_ context.Context, script string) ([]byte, error) {
		require.FailNow(t, "runner should not be called for empty ips")
		return nil, nil
	})
	require.NoError(t, m.AddResolvedIPs(context.Background(), nil), "AddResolvedIPs returned error")
	require.NoError(t, m.AddResolvedIPs(context.Background(), []ResolvedIP{}), "AddResolvedIPs returned error")
}


================================================
FILE: components/egress/pkg/policy/policy.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package policy

import (
	"encoding/json"
	"fmt"
	"net/netip"
	"strings"
)

const (
	ActionAllow = "allow"
	ActionDeny  = "deny"
)

type targetKind int

const (
	targetUnknown targetKind = iota
	targetDomain
	targetIP
	targetCIDR
)

// DefaultDenyPolicy returns a new policy that denies all traffic.
func DefaultDenyPolicy() *NetworkPolicy {
	return &NetworkPolicy{DefaultAction: ActionDeny}
}

// NetworkPolicy is the minimal MVP shape for egress control.
// Only domain/wildcard targets are honored in this MVP.
type NetworkPolicy struct {
	Egress        []EgressRule `json:"egress"`
	DefaultAction string       `json:"defaultAction"`
}

type EgressRule struct {
	Action string `json:"action"`
	Target string `json:"target"`

	targetKind targetKind
	ip         netip.Addr
	prefix     netip.Prefix
}

// ParsePolicy parses JSON from env/config into a NetworkPolicy.
// Default action falls back to "deny" to align with proposal.
func ParsePolicy(raw string) (*NetworkPolicy, error) {
	trimmed := strings.TrimSpace(raw)
	if trimmed == "" || trimmed == "null" || trimmed == "{}" {
		return DefaultDenyPolicy(), nil
	}

	var p NetworkPolicy
	if err := json.Unmarshal([]byte(trimmed), &p); err != nil {
		return nil, err
	}
	if err := normalizePolicy(&p); err != nil {
		return nil, err
	}
	return ensureDefaults(&p), nil
}

// Evaluate returns allow/deny for a given domain (lowercased).
func (p *NetworkPolicy) Evaluate(domain string) string {
	if p == nil {
		return ActionDeny
	}
	domain = strings.ToLower(strings.TrimSuffix(domain, "."))
	for _, r := range p.Egress {
		if r.targetKind != targetDomain {
			continue
		}
		if r.matchesDomain(domain) {
			if r.Action == "" {
				return ActionDeny
			}
			return r.Action
		}
	}
	if p.DefaultAction == "" {
		return ActionDeny
	}
	return p.DefaultAction
}

// ensureDefaults guarantees a policy always has a default action.
func ensureDefaults(p *NetworkPolicy) *NetworkPolicy {
	if p == nil {
		return DefaultDenyPolicy()
	}
	if p.DefaultAction == "" {
		p.DefaultAction = ActionDeny
	}
	return p
}

func normalizePolicy(p *NetworkPolicy) error {
	p.DefaultAction = strings.ToLower(strings.TrimSpace(p.DefaultAction))
	if p.DefaultAction == "" {
		p.DefaultAction = ActionDeny
	}

	for i := range p.Egress {
		r := &p.Egress[i]
		r.Action = strings.ToLower(strings.TrimSpace(r.Action))
		if r.Action == "" {
			r.Action = ActionDeny
		}
		if r.Action != ActionAllow && r.Action != ActionDeny {
			return fmt.Errorf("unsupported action %q", r.Action)
		}

		r.Target = strings.TrimSpace(r.Target)
		if r.Target == "" {
			return fmt.Errorf("egress target cannot be empty")
		}
		if ip, err := netip.ParseAddr(r.Target); err == nil {
			r.targetKind = targetIP
			r.ip = ip
			continue
		}
		if prefix, err := netip.ParsePrefix(r.Target); err == nil {
			r.targetKind = targetCIDR
			r.prefix = prefix
			continue
		}
		r.targetKind = targetDomain
	}
	return nil
}

// WithExtraAllowIPs returns a copy of the policy with additional allow rules for each IP.
// Used at startup to whitelist system nameservers so client DNS and proxy upstream work with private DNS.
func (p *NetworkPolicy) WithExtraAllowIPs(ips []netip.Addr) *NetworkPolicy {
	if p == nil || len(ips) == 0 {
		return p
	}
	out := *p
	out.Egress = make([]EgressRule, len(p.Egress), len(p.Egress)+len(ips))
	copy(out.Egress, p.Egress)
	for _, ip := range ips {
		out.Egress = append(out.Egress, EgressRule{
			Action:     ActionAllow,
			Target:     ip.String(),
			targetKind: targetIP,
			ip:         ip,
		})
	}
	return &out
}

// StaticIPSets splits static IP/CIDR rules into allow/deny IPv4/IPv6 buckets.
// Empty or nil policy returns empty slices.
func (p *NetworkPolicy) StaticIPSets() (allowV4, allowV6, denyV4, denyV6 []string) {
	if p == nil {
		return
	}
	for _, r := range p.Egress {
		switch r.targetKind {
		case targetIP:
			addr := r.ip
			target := addr.String()
			if r.Action == ActionAllow {
				if addr.Is4() {
					allowV4 = append(allowV4, target)
				} else if addr.Is6() {
					allowV6 = append(allowV6, target)
				}
			} else {
				if addr.Is4() {
					denyV4 = append(denyV4, target)
				} else if addr.Is6() {
					denyV6 = append(denyV6, target)
				}
			}
		case targetCIDR:
			pfx := r.prefix
			target := pfx.String()
			if r.Action == ActionAllow {
				if pfx.Addr().Is4() {
					allowV4 = append(allowV4, target)
				} else if pfx.Addr().Is6() {
					allowV6 = append(allowV6, target)
				}
			} else {
				if pfx.Addr().Is4() {
					denyV4 = append(denyV4, target)
				} else if pfx.Addr().Is6() {
					denyV6 = append(denyV6, target)
				}
			}
		default:
			continue
		}
	}
	return
}

func (r *EgressRule) matchesDomain(domain string) bool {
	pattern := strings.ToLower(strings.TrimSpace(r.Target))
	domain = strings.ToLower(domain)

	if pattern == "" {
		return false
	}
	if pattern == domain {
		return true
	}
	if strings.HasPrefix(pattern, "*.") {
		// "*.example.com" matches "a.example.com" but not "example.com"
		suffix := strings.TrimPrefix(pattern, "*")
		return strings.HasSuffix(domain, suffix) && domain != strings.TrimPrefix(pattern, "*.")
	}
	return false
}


================================================
FILE: components/egress/pkg/policy/policy_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package policy

import (
	"net/netip"
	"testing"

	"github.com/stretchr/testify/require"
)

func TestParsePolicy_EmptyOrNullDefaultsDeny(t *testing.T) {
	cases := []string{
		"",
		"   ",
		"null",
		"{}\n",
	}
	for _, raw := range cases {
		p, err := ParsePolicy(raw)
		require.NoErrorf(t, err, "raw %q returned error", raw)
		require.NotNilf(t, p, "raw %q expected default deny policy, got nil", raw)
		require.Equalf(t, ActionDeny, p.DefaultAction, "raw %q expected defaultAction deny", raw)
		require.Equalf(t, ActionDeny, p.Evaluate("example.com."), "raw %q expected deny evaluation", raw)
	}
}

func TestParsePolicy_DefaultActionFallback(t *testing.T) {
	p, err := ParsePolicy(`{"egress":[{"action":"allow","target":"example.com"}]}`)
	require.NoError(t, err)
	require.NotNil(t, p, "expected policy object, got nil")
	require.Equal(t, ActionDeny, p.DefaultAction, "expected defaultAction fallback to deny")
}

func TestParsePolicy_EmptyEgressDefaultsDeny(t *testing.T) {
	p, err := ParsePolicy(`{"defaultAction":""}`)
	require.NoError(t, err)
	require.Equal(t, ActionDeny, p.DefaultAction, "expected default deny when defaultAction missing")
	require.Equal(t, ActionDeny, p.Evaluate("anything.com."), "expected evaluation deny for empty egress")
}

func TestParsePolicy_IPAndCIDRSupported(t *testing.T) {
	raw := `{
		"defaultAction":"deny",
		"egress":[
			{"action":"allow","target":"1.1.1.1"},
			{"action":"allow","target":"2.2.0.0/16"},
			{"action":"deny","target":"2001:db8::/32"},
			{"action":"deny","target":"2001:db8::1"}
		]
	}`
	p, err := ParsePolicy(raw)
	require.NoError(t, err)
	allowV4, allowV6, denyV4, denyV6 := p.StaticIPSets()
	require.Len(t, allowV4, 2, "allowV4 length mismatch")
	require.Equal(t, "1.1.1.1", allowV4[0])
	require.Equal(t, "2.2.0.0/16", allowV4[1])
	require.Len(t, denyV6, 2, "expected 2 denyV6 entries")
	require.Empty(t, allowV6, "allowV6 should be empty")
	require.Empty(t, denyV4, "denyV4 should be empty")
}

func TestParsePolicy_InvalidAction(t *testing.T) {
	_, err := ParsePolicy(`{"egress":[{"action":"foo","target":"example.com"}]}`)
	require.Error(t, err, "expected error for invalid action")
}

func TestParsePolicy_EmptyTargetError(t *testing.T) {
	_, err := ParsePolicy(`{"egress":[{"action":"allow","target":""}]}`)
	require.Error(t, err, "expected error for empty target")
}

func TestWithExtraAllowIPs(t *testing.T) {
	p, err := ParsePolicy(`{"defaultAction":"deny","egress":[{"action":"allow","target":"example.com"}]}`)
	require.NoError(t, err)
	allowV4, allowV6, _, _ := p.StaticIPSets()
	require.Empty(t, allowV4, "domain-only policy should have no static allowV4 IPs")
	require.Empty(t, allowV6, "domain-only policy should have no static allowV6 IPs")

	ips := []netip.Addr{
		netip.MustParseAddr("192.168.65.7"),
		netip.MustParseAddr("2001:db8::1"),
	}
	merged := p.WithExtraAllowIPs(ips)
	require.NotSame(t, p, merged, "expected new policy instance")
	allowV4, allowV6, _, _ = merged.StaticIPSets()
	require.Len(t, allowV4, 1, "allowV4 length mismatch")
	require.Equal(t, "192.168.65.7", allowV4[0])
	require.Len(t, allowV6, 1, "allowV6 length mismatch")
	require.Equal(t, "2001:db8::1", allowV6[0])

	// nil/empty ips returns same policy
	require.Same(t, p, p.WithExtraAllowIPs(nil), "WithExtraAllowIPs(nil) should return same policy")
	require.Same(t, p, p.WithExtraAllowIPs([]netip.Addr{}), "WithExtraAllowIPs([]) should return same policy")
}


================================================
FILE: components/egress/policy_server.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"context"
	"crypto/subtle"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"net/http"
	"net/netip"
	"strings"
	"sync"
	"time"

	"github.com/alibaba/opensandbox/egress/pkg/constants"
	"github.com/alibaba/opensandbox/egress/pkg/log"
	"github.com/alibaba/opensandbox/egress/pkg/nftables"
	"github.com/alibaba/opensandbox/egress/pkg/policy"
)

type policyUpdater interface {
	CurrentPolicy() *policy.NetworkPolicy
	UpdatePolicy(*policy.NetworkPolicy)
}

// enforcementReporter reports the current enforcement mode (dns | dns+nft).
type enforcementReporter interface {
	EnforcementMode() string
}

// nftApplier applies static policy and optional dynamic DNS-learned IPs to nftables.
type nftApplier interface {
	ApplyStatic(context.Context, *policy.NetworkPolicy) error
	AddResolvedIPs(context.Context, []nftables.ResolvedIP) error
}

// startPolicyServer launches a lightweight HTTP API for updating the egress policy at runtime.
// Supported endpoints:
//   - GET  /policy : returns the currently enforced policy.
//   - POST /policy : replace the policy; empty body resets to default deny-all.
//
// nameserverIPs are merged into every applied policy so system DNS stays allowed (e.g. private DNS).
func startPolicyServer(ctx context.Context, proxy policyUpdater, nft nftApplier, enforcementMode string, addr string, token string, nameserverIPs []netip.Addr) error {
	mux := http.NewServeMux()
	handler := &policyServer{proxy: proxy, nft: nft, token: token, enforcementMode: enforcementMode, nameserverIPs: nameserverIPs}
	mux.HandleFunc("/policy", handler.handlePolicy)
	mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) {
		w.WriteHeader(http.StatusOK)
		_, _ = w.Write([]byte("ok"))
	})

	srv := &http.Server{Addr: addr, Handler: mux}
	handler.server = srv

	// Shutdown listener when context ends.
	go func() {
		<-ctx.Done()
		shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
		defer cancel()
		if err := srv.Shutdown(shutdownCtx); err != nil && !errors.Is(err, http.ErrServerClosed) {
			log.Warnf("policy server shutdown error: %v", err)
		}
	}()

	errCh := make(chan error, 1)
	go func() {
		if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
			errCh <- err
		}
	}()

	select {
	case err := <-errCh:
		return err
	case <-time.After(200 * time.Millisecond):
		// assume healthy start; keep logging future errors
		go func() {
			if err := <-errCh; err != nil {
				log.Errorf("policy server error: %v", err)
			}
		}()
		return nil
	}
}

type policyServer struct {
	proxy           policyUpdater
	nft             nftApplier
	server          *http.Server
	token           string
	enforcementMode string
	nameserverIPs   []netip.Addr
	mu              sync.Mutex // serializes read-merge-apply to avoid lost updates across POST/PATCH
}

type policyStatusResponse struct {
	Status          string `json:"status,omitempty"`
	Mode            string `json:"mode,omitempty"`
	EnforcementMode string `json:"enforcementMode,omitempty"`
	Reason          string `json:"reason,omitempty"`
	Policy          any    `json:"policy,omitempty"`
}

func (s *policyServer) handlePolicy(w http.ResponseWriter, r *http.Request) {
	if !s.authorize(r) {
		http.Error(w, "unauthorized", http.StatusUnauthorized)
		return
	}
	switch r.Method {
	case http.MethodGet:
		s.handleGet(w)
	case http.MethodPost, http.MethodPut:
		s.handlePost(w, r)
	case http.MethodPatch:
		s.handlePatch(w, r)
	default:
		w.Header().Set("Allow", "GET, POST, PUT, PATCH")
		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
	}
}

func (s *policyServer) handleGet(w http.ResponseWriter) {
	current := s.proxy.CurrentPolicy()
	mode := modeFromPolicy(current)
	writeJSON(w, http.StatusOK, policyStatusResponse{
		Status:          "ok",
		Mode:            mode,
		EnforcementMode: s.enforcementMode,
		Policy:          current,
	})
}

func (s *policyServer) handlePost(w http.ResponseWriter, r *http.Request) {
	defer r.Body.Close()
	s.mu.Lock()
	defer s.mu.Unlock()

	body, err := io.ReadAll(io.LimitReader(r.Body, 1<<20)) // 1MB limit
	if err != nil {
		http.Error(w, fmt.Sprintf("failed to read body: %v", err), http.StatusBadRequest)
		return
	}
	raw := strings.TrimSpace(string(body))
	if raw == "" {
		log.Infof("policy API: reset to default deny-all")
		def := policy.DefaultDenyPolicy()
		if s.nft != nil {
			defWithNS := def.WithExtraAllowIPs(s.nameserverIPs)
			if err := s.nft.ApplyStatic(r.Context(), defWithNS); err != nil {
				log.Errorf("policy API: nftables apply failed on reset: %v", err)
				http.Error(w, fmt.Sprintf("failed to apply nftables: %v", err), http.StatusInternalServerError)
				return
			}
		}
		s.proxy.UpdatePolicy(def)
		log.Infof("policy API: proxy and nftables updated to deny_all")
		writeJSON(w, http.StatusOK, policyStatusResponse{
			Status: "ok",
			Mode:   "deny_all",
			Reason: "policy reset to default deny-all",
		})
		return
	}

	pol, err := policy.ParsePolicy(raw)
	if err != nil {
		http.Error(w, fmt.Sprintf("invalid policy: %v", err), http.StatusBadRequest)
		return
	}
	mode := modeFromPolicy(pol)
	log.Infof("policy API: updating policy to mode=%s, enforcement=%s", mode, s.enforcementMode)
	if s.nft != nil {
		polWithNS := pol.WithExtraAllowIPs(s.nameserverIPs)
		if err := s.nft.ApplyStatic(r.Context(), polWithNS); err != nil {
			log.Errorf("policy API: nftables apply failed: %v", err)
			http.Error(w, fmt.Sprintf("failed to apply nftables policy: %v", err), http.StatusInternalServerError)
			return
		}
	}
	s.proxy.UpdatePolicy(pol)
	log.Infof("policy API: proxy and nftables updated successfully")
	writeJSON(w, http.StatusOK, policyStatusResponse{
		Status:          "ok",
		Mode:            mode,
		EnforcementMode: s.enforcementMode,
	})
}

// handlePatch adds or replaces egress rules by merging with the current policy.
// It is a convenience wrapper over the full replace flow: we still read -> merge -> apply.
// Request body supports {"egress":[{"action":"allow","target":"example.com"}, ...]}.
func (s *policyServer) handlePatch(w http.ResponseWriter, r *http.Request) {
	defer r.Body.Close()
	s.mu.Lock()
	defer s.mu.Unlock()

	body, err := io.ReadAll(io.LimitReader(r.Body, 1<<20)) // 1MB limit
	if err != nil {
		http.Error(w, fmt.Sprintf("failed to read body: %v", err), http.StatusBadRequest)
		return
	}
	raw := strings.TrimSpace(string(body))
	if raw == "" {
		http.Error(w, "patch body cannot be empty", http.StatusBadRequest)
		return
	}

	var patchRules []policy.EgressRule
	if err = json.Unmarshal([]byte(raw), &patchRules); err != nil {
		http.Error(w, fmt.Sprintf("invalid patch rules: %v", err), http.StatusBadRequest)
		return
	}
	if len(patchRules) == 0 {
		http.Error(w, "patch must include at least one egress rule", http.StatusBadRequest)
		return
	}

	base := s.proxy.CurrentPolicy()
	if base == nil {
		base = policy.DefaultDenyPolicy()
	}
	baseCopy := *base
	baseCopy.Egress = append([]policy.EgressRule(nil), base.Egress...)

	merged := mergeEgressRules(baseCopy.Egress, patchRules)

	// Reuse parser to normalize targets/actions.
	rawMerged, _ := json.Marshal(policy.NetworkPolicy{
		DefaultAction: baseCopy.DefaultAction,
		Egress:        merged,
	})
	newPolicy, err := policy.ParsePolicy(string(rawMerged))
	if err != nil {
		http.Error(w, fmt.Sprintf("invalid merged policy: %v", err), http.StatusBadRequest)
		return
	}

	mode := modeFromPolicy(newPolicy)
	log.Infof("policy API: patching policy with %d new rule(s), mode=%s, enforcement=%s", len(patchRules), mode, s.enforcementMode)
	if s.nft != nil {
		polWithNS := newPolicy.WithExtraAllowIPs(s.nameserverIPs)
		if err := s.nft.ApplyStatic(r.Context(), polWithNS); err != nil {
			log.Errorf("policy API: nftables apply failed on patch: %v", err)
			http.Error(w, fmt.Sprintf("failed to apply nftables policy: %v", err), http.StatusInternalServerError)
			return
		}
	}
	s.proxy.UpdatePolicy(newPolicy)
	log.Infof("policy API: patch applied successfully")
	writeJSON(w, http.StatusOK, policyStatusResponse{
		Status:          "ok",
		Mode:            mode,
		EnforcementMode: s.enforcementMode,
	})
}

func (s *policyServer) authorize(r *http.Request) bool {
	if s.token == "" {
		return true
	}
	provided := r.Header.Get(constants.EgressAuthTokenHeader)
	if provided == "" {
		return false
	}
	if len(provided) != len(s.token) {
		return false
	}
	return subtle.ConstantTimeCompare([]byte(provided), []byte(s.token)) == 1
}

func writeJSON(w http.ResponseWriter, status int, payload any) {
	w.Header().Set("Content-Type", "application/json")
	w.WriteHeader(status)
	_ = json.NewEncoder(w).Encode(payload)
}

func modeFromPolicy(p *policy.NetworkPolicy) string {
	if p == nil {
		return "deny_all"
	}
	if p.DefaultAction == policy.ActionAllow && len(p.Egress) == 0 {
		return "allow_all"
	} else if p.DefaultAction == policy.ActionDeny && len(p.Egress) == 0 {
		return "deny_all"
	}

	return "enforcing"
}

// mergeEgressRules joins base rules and additions, deduping by target (last writer wins).
func mergeEgressRules(base, additions []policy.EgressRule) []policy.EgressRule {
	if len(additions) == 0 {
		return base
	}
	out := make([]policy.EgressRule, 0, len(base)+len(additions))
	seen := make(map[string]struct{})

	// Priority: additions first; base rules only if target not overridden.
	for _, r := range additions {
		key := mergeKey(r)
		if _, ok := seen[key]; ok {
			continue
		}
		seen[key] = struct{}{}
		out = append(out, r)
	}
	for _, r := range base {
		key := mergeKey(r)
		if _, ok := seen[key]; ok {
			continue
		}
		seen[key] = struct{}{}
		out = append(out, r)
	}
	return out
}

// mergeKey normalizes domain targets to lowercase for dedupe;
// IP/CIDR targets are kept as-is.
func mergeKey(r policy.EgressRule) string {
	if r.Target == "" {
		return r.Target
	}
	return strings.ToLower(r.Target)
}


================================================
FILE: components/egress/policy_server_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"context"
	"errors"
	"io"
	"net/http"
	"net/http/httptest"
	"strings"
	"testing"

	"github.com/alibaba/opensandbox/egress/pkg/nftables"
	"github.com/alibaba/opensandbox/egress/pkg/policy"
	"github.com/stretchr/testify/require"
)

type stubProxy struct {
	updated *policy.NetworkPolicy
}

func (s *stubProxy) CurrentPolicy() *policy.NetworkPolicy {
	return s.updated
}

func (s *stubProxy) UpdatePolicy(p *policy.NetworkPolicy) {
	s.updated = p
}

type stubNft struct {
	err     error
	calls   int
	applied *policy.NetworkPolicy
}

func (s *stubNft) ApplyStatic(_ context.Context, p *policy.NetworkPolicy) error {
	s.calls++
	s.applied = p
	return s.err
}

func (s *stubNft) AddResolvedIPs(_ context.Context, _ []nftables.ResolvedIP) error {
	return nil
}

func TestHandlePolicy_AppliesNftAndUpdatesProxy(t *testing.T) {
	proxy := &stubProxy{}
	nft := &stubNft{}
	srv := &policyServer{proxy: proxy, nft: nft, enforcementMode: "dns+nft"}

	body := `{"defaultAction":"deny","egress":[{"action":"allow","target":"1.1.1.1"}]}`
	req := httptest.NewRequest(http.MethodPost, "/policy", strings.NewReader(body))
	w := httptest.NewRecorder()

	srv.handlePolicy(w, req)

	resp := w.Result()
	require.Equal(t, http.StatusOK, resp.StatusCode, "expected 200 OK")
	require.Contains(t, resp.Header.Get("Content-Type"), "application/json", "expected json response")
	require.Equal(t, 1, nft.calls, "expected nft ApplyStatic called once")
	require.NotNil(t, proxy.updated, "expected proxy policy to be updated")
	require.Equal(t, policy.ActionDeny, proxy.updated.DefaultAction, "unexpected defaultAction")
}

func TestHandlePolicy_NftFailureReturns500(t *testing.T) {
	proxy := &stubProxy{}
	nft := &stubNft{err: errors.New("boom")}
	srv := &policyServer{proxy: proxy, nft: nft, enforcementMode: "dns+nft"}

	body := `{"defaultAction":"deny","egress":[{"action":"allow","target":"1.1.1.1"}]}`
	req := httptest.NewRequest(http.MethodPost, "/policy", strings.NewReader(body))
	w := httptest.NewRecorder()

	srv.handlePolicy(w, req)

	resp := w.Result()
	require.Equal(t, http.StatusInternalServerError, resp.StatusCode, "expected 500")
	require.Equal(t, 1, nft.calls, "expected nft ApplyStatic called once")
	require.Nil(t, proxy.updated, "expected proxy policy not updated on nft failure")
}

func TestHandleGet_ReturnsEnforcementMode(t *testing.T) {
	proxy := &stubProxy{updated: policy.DefaultDenyPolicy()}
	srv := &policyServer{proxy: proxy, nft: nil, enforcementMode: "dns"}

	req := httptest.NewRequest(http.MethodGet, "/policy", nil)
	w := httptest.NewRecorder()

	srv.handlePolicy(w, req)

	resp := w.Result()
	require.Equal(t, http.StatusOK, resp.StatusCode, "expected 200")
	body, err := io.ReadAll(resp.Body)
	require.NoError(t, err)
	require.Contains(t, string(body), `"enforcementMode":"dns"`, "expected enforcementMode dns in response")
}

func TestHandlePatch_MergesAndApplies(t *testing.T) {
	initial := &policy.NetworkPolicy{
		DefaultAction: policy.ActionDeny,
		Egress: []policy.EgressRule{
			{Action: policy.ActionAllow, Target: "example.com"},
			{Action: policy.ActionDeny, Target: "*.example.com"},
		},
	}
	proxy := &stubProxy{updated: initial}
	nft := &stubNft{}
	srv := &policyServer{proxy: proxy, nft: nft, enforcementMode: "dns+nft"}

	body := `[{"action":"deny","target":"blocked.com"},{"action":"allow","target":"example.com"}]`
	req := httptest.NewRequest(http.MethodPatch, "/policy", strings.NewReader(body))
	w := httptest.NewRecorder()

	srv.handlePolicy(w, req)

	resp := w.Result()
	require.Equal(t, http.StatusOK, resp.StatusCode, "expected 200")
	require.Equal(t, 1, nft.calls, "expected nft ApplyStatic called once")
	require.NotNil(t, proxy.updated, "expected proxy policy to be updated")
	require.Equal(t, policy.ActionDeny, proxy.updated.DefaultAction, "default action should be preserved")
	require.Len(t, proxy.updated.Egress, 3, "expected 3 egress rules")
	require.Equal(t, policy.ActionDeny, proxy.updated.Egress[0].Action, "first rule action mismatch")
	require.Equal(t, "blocked.com", proxy.updated.Egress[0].Target, "first rule target mismatch")
	require.Equal(t, policy.ActionAllow, proxy.updated.Egress[1].Action, "second rule action mismatch")
	require.Equal(t, "example.com", proxy.updated.Egress[1].Target, "second rule target mismatch")
	require.Equal(t, policy.ActionDeny, proxy.updated.Egress[2].Action, "base wildcard rule action mismatch")
	require.Equal(t, "*.example.com", proxy.updated.Egress[2].Target, "base wildcard rule target mismatch")
}

func TestHandlePatch_DomainCaseOverride(t *testing.T) {
	initial := &policy.NetworkPolicy{
		DefaultAction: policy.ActionDeny,
		Egress: []policy.EgressRule{
			{Action: policy.ActionDeny, Target: "Example.COM"},
		},
	}
	proxy := &stubProxy{updated: initial}
	nft := &stubNft{}
	srv := &policyServer{proxy: proxy, nft: nft, enforcementMode: "dns+nft"}

	body := `[{"action":"allow","target":"example.com"}]`
	req := httptest.NewRequest(http.MethodPatch, "/policy", strings.NewReader(body))
	w := httptest.NewRecorder()

	srv.handlePolicy(w, req)

	resp := w.Result()
	require.Equal(t, http.StatusOK, resp.StatusCode, "expected 200")
	require.NotNil(t, proxy.updated, "expected proxy policy to be updated")
	require.Len(t, proxy.updated.Egress, 1, "expected deduped rule count 1")
	require.Equal(t, policy.ActionAllow, proxy.updated.Egress[0].Action, "expected allow action")
	require.Equal(t, "example.com", proxy.updated.Egress[0].Target, "expected allow example.com to override")
}


================================================
FILE: components/egress/tests/bench-dns-nft.sh
================================================
#!/bin/bash

# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# E2E benchmark: baseline (no egress) vs dns (pass-through) vs dns+nft (sync dynamic IP write).
# Baseline: plain curl container, same workload, no container. Then egress dns and dns+nft.
# Metrics: E2E latency (p50, p99), throughput (req/s).
#
# Usage: ./tests/bench-dns-nft.sh
# Optional: BENCH_SAMPLE_SIZE=n to randomly use n domains from hostname.txt (default: use all).
# Requires: Docker, curl in PATH (for policy push). Egress image and baseline image (default curlimages/curl:latest) must have curl.
# Domain list: tests/hostname.txt (one domain per line).

set -euo pipefail

info() { echo "[$(date +%H:%M:%S)] $*"; }

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
HOSTNAME_FILE="${SCRIPT_DIR}/hostname.txt"
# tests/ is two levels under repo root: components/egress/tests -> climb 3 levels.
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"

IMG="opensandbox/egress:local"
BASELINE_IMG="${BASELINE_IMG:-curlimages/curl:latest}"
CONTAINER_NAME="egress-bench-e2e"
POLICY_PORT=18080
ROUNDS=10
# Optional: where to write egress logs on host. Override via LOG_HOST_DIR / LOG_FILE.
LOG_HOST_DIR="${LOG_HOST_DIR:-/tmp/egress-logs}"
LOG_FILE="${LOG_FILE:-egress.log}"
LOG_CONTAINER_DIR="/var/log/opensandbox"
LOG_CONTAINER_FILE="${LOG_CONTAINER_DIR}/${LOG_FILE}"

# Load benchmark domains from hostname.txt (one domain per line).
if [[ ! -f "${HOSTNAME_FILE}" ]] || [[ ! -s "${HOSTNAME_FILE}" ]]; then
  echo "Error: domain file not found or empty: ${HOSTNAME_FILE}" >&2
  exit 1
fi
BENCH_DOMAINS=()
while IFS= read -r line; do
  line="${line%%#*}"
  line="${line#"${line%%[![:space:]]*}"}"
  line="${line%"${line##*[![:space:]]}"}"
  [[ -n "$line" ]] && BENCH_DOMAINS+=( "$line" )
done < "${HOSTNAME_FILE}"
total_in_file=${#BENCH_DOMAINS[@]}
if [[ "$total_in_file" -eq 0 ]]; then
  echo "Error: no domains in ${HOSTNAME_FILE}" >&2
  exit 1
fi

# Optionally randomly sample n domains (BENCH_SAMPLE_SIZE); if unset or 0, use all.
if [[ -n "${BENCH_SAMPLE_SIZE:-}" ]] && [[ "${BENCH_SAMPLE_SIZE}" -gt 0 ]]; then
  if [[ "${BENCH_SAMPLE_SIZE}" -ge "$total_in_file" ]]; then
    NUM_DOMAINS=$total_in_file
  else
    # Portable shuffle: shuf (Linux), gshuf (macOS coreutils), else awk
    if command -v shuf >/dev/null 2>&1; then
      BENCH_DOMAINS=( $(printf '%s\n' "${BENCH_DOMAINS[@]}" | shuf -n "${BENCH_SAMPLE_SIZE}") )
    elif command -v gshuf >/dev/null 2>&1; then
      BENCH_DOMAINS=( $(printf '%s\n' "${BENCH_DOMAINS[@]}" | gshuf -n "${BENCH_SAMPLE_SIZE}") )
    else
      BENCH_DOMAINS=( $(printf '%s\n' "${BENCH_DOMAINS[@]}" | awk 'BEGIN{srand()} {printf "%s\t%s\n", rand(), $0}' | sort -n | cut -f2- | head -n "${BENCH_SAMPLE_SIZE}") )
    fi
    NUM_DOMAINS=${#BENCH_DOMAINS[@]}
    info "Using ${NUM_DOMAINS} randomly sampled domains (of ${total_in_file}) from ${HOSTNAME_FILE}"
  fi
else
  NUM_DOMAINS=$total_in_file
fi
TOTAL_REQUESTS=$((ROUNDS * NUM_DOMAINS))
CURL_TIMEOUT=10
# Max wall time for the benchmark loop (docker exec); avoid hanging forever.
BENCH_EXEC_TIMEOUT=300

cleanup() {
  docker rm -f "${CONTAINER_NAME}" >/dev/null 2>&1 || true
}
trap cleanup EXIT

# Compute stats from a file with one numeric value per line (e.g. time_total in seconds).
# Output: count avg_s p50_s p99_s
stats() {
  local file="$1"
  if [[ ! -f "$file" ]] || [[ ! -s "$file" ]]; then
    echo "0 0 0 0"
    return
  fi
  sort -n "$file" > "${file}.sorted"
  local n
  n=$(wc -l < "${file}.sorted")
  if [[ "$n" -eq 0 ]]; then
    echo "0 0 0 0"
    return
  fi
  local avg p50 p99
  avg=$(awk '{s+=$1; c++} END { if(c>0) print s/c; else print 0 }' "$file")
  p50=$(awk -v n="$n" 'NR==int(n*0.5+0.5){print $1; exit}' "${file}.sorted")
  p99=$(awk -v n="$n" 'NR==int(n*0.99+0.5){print $1; exit}' "${file}.sorted")
  echo "$n $avg $p50 $p99"
}

# Run workload inside CONTAINER_NAME; /tmp/bench-domains.txt must already exist in container.
# Usage: run_bench_to <outfile> [limit] [rounds] [timeout]
run_bench_to() {
  local outfile="$1"
  local limit="${2:-9999}"
  local rounds="${3:-1}"
  local use_timeout="${4:-}"
  local cmd=(
    docker exec -e BENCH_TIMEOUT="${CURL_TIMEOUT}" -e BENCH_OUTFILE="${outfile}" -e BENCH_LIMIT="${limit}" -e BENCH_ROUNDS="${rounds}" \
      "${CONTAINER_NAME}" sh -c '
    : > "$BENCH_OUTFILE"
    r=1
    while [ "$r" -le "$BENCH_ROUNDS" ]; do
      n=0
      while IFS= read -r url && [ "$n" -lt "$BENCH_LIMIT" ]; do
        ( curl -o /dev/null -s -I -w "%{time_namelookup}\t%{time_total}\n" --max-time "$BENCH_TIMEOUT" "$url" >> "$BENCH_OUTFILE" ) &
        n=$((n+1))
      done < /tmp/bench-domains.txt
      wait
      r=$((r+1))
    done
    '
  )
  if [[ "$use_timeout" == "timeout" ]] && command -v timeout >/dev/null 2>&1; then
    timeout "${BENCH_EXEC_TIMEOUT}" "${cmd[@]}"
  else
    "${cmd[@]}"
  fi
}

# Copy URL file into container (create temp file, docker cp, rm). Uses BENCH_DOMAINS.
copy_url_file_to_container() {
  local url_file="/tmp/bench-e2e-domains-$$.txt"
  : > "${url_file}"
  for d in "${BENCH_DOMAINS[@]}"; do
    echo "https://${d}" >> "${url_file}"
  done
  docker cp "${url_file}" "${CONTAINER_NAME}:/tmp/bench-domains.txt"
  rm -f "${url_file}"
}

# Run warm-up + timed benchmark, collect timings. Writes /tmp/bench-e2e-{mode}-total.txt, -namelookup.txt, -wall.txt.
# Requires: CONTAINER_NAME running, /tmp/bench-domains.txt inside container.
run_workload() {
  local mode="$1"
  local out_total="/tmp/bench-e2e-${mode}-total.txt"
  local out_namelookup="/tmp/bench-e2e-${mode}-namelookup.txt"
  : > "$out_total"
  : > "$out_namelookup"

  local first_url="https://${BENCH_DOMAINS[0]}"
  sleep 1
  # HEAD request: no response body, only check DNS + TCP + TLS + HTTP response.
  if ! docker exec "${CONTAINER_NAME}" curl -o /dev/null -s -I --max-time "${CURL_TIMEOUT}" "${first_url}"; then
    info "Warm-up curl failed; stderr from one attempt:"
    docker exec "${CONTAINER_NAME}" curl -o /dev/null -s -I --max-time 5 "${first_url}" 2>&1 || true
    return 1
  fi

  info "Warm-up: first 10 domains, 1 round..."
  bench_ret=0
  run_bench_to /tmp/bench-warmup.txt 10 1 2>/tmp/bench-e2e-stderr.txt || bench_ret=$?
  if [[ "$bench_ret" -ne 0 ]]; then
    info "Warm-up run failed (exit $bench_ret); continuing with timed run anyway."
  fi

  info "Running ${TOTAL_REQUESTS} E2E requests (${ROUNDS} rounds × ${NUM_DOMAINS} domains) inside container (max ${BENCH_EXEC_TIMEOUT}s)..."
  local start_ts
  start_ts=$(date +%s.%N)
  bench_ret=0
  run_bench_to /tmp/bench-raw.txt 9999 "${ROUNDS}" timeout 2>/tmp/bench-e2e-stderr.txt || bench_ret=$?
  if [[ "$bench_ret" -ne 0 ]]; then
    info "Benchmark run failed (exit $bench_ret) or hit timeout; using partial results if any."
  fi
  docker cp "${CONTAINER_NAME}:/tmp/bench-raw.txt" /tmp/bench-e2e-raw.txt 2>/dev/null || true
  local end_ts
  end_ts=$(date +%s.%N)

  if [[ -s /tmp/bench-e2e-stderr.txt ]]; then
    info "docker exec stderr (first 10 lines):"
    head -10 /tmp/bench-e2e-stderr.txt >&2
  fi
  if [[ ! -f /tmp/bench-e2e-raw.txt ]]; then
    : > /tmp/bench-e2e-raw.txt
  fi
  local lines
  lines=$(wc -l < /tmp/bench-e2e-raw.txt 2>/dev/null || echo 0)
  if [[ "$lines" -lt $((TOTAL_REQUESTS / 2)) ]]; then
    info "WARN: only ${lines}/${TOTAL_REQUESTS} responses captured; curl may be failing inside container."
  fi

  awk -F'\t' '{print $2}' /tmp/bench-e2e-raw.txt 2>/dev/null > "$out_total"
  awk -F'\t' '{print $1}' /tmp/bench-e2e-raw.txt 2>/dev/null > "$out_namelookup"
  local wall_s
  wall_s=$(awk -v s="$start_ts" -v e="$end_ts" 'BEGIN { print e - s }')
  echo "$wall_s" > "/tmp/bench-e2e-${mode}-wall.txt"
}

# Run one benchmark phase: start container with given mode, push policy, run client workload, collect timings.
# Usage: run_phase "dns" | "dns+nft"
run_phase() {
  local mode="$1"
  info "Phase: ${mode}"
  cleanup
  mkdir -p "${LOG_HOST_DIR}"
  docker run -d --name "${CONTAINER_NAME}" \
    --cap-add=NET_ADMIN \
    --sysctl net.ipv6.conf.all.disable_ipv6=1 \
    --sysctl net.ipv6.conf.default.disable_ipv6=1 \
    -e OPENSANDBOX_EGRESS_MODE="${mode}" \
    -e OPENSANDBOX_LOG_OUTPUT="${LOG_CONTAINER_FILE}" \
    -v "${LOG_HOST_DIR}:${LOG_CONTAINER_DIR}" \
    -p "${POLICY_PORT}:18080" \
    "${IMG}"

  for i in $(seq 1 30); do
    if curl -sf "http://127.0.0.1:${POLICY_PORT}/healthz" >/dev/null 2>&1; then
      break
    fi
    sleep 0.5
  done

  local policy_egress=""
  for d in "${BENCH_DOMAINS[@]}"; do
    policy_egress="${policy_egress}{\"action\":\"allow\",\"target\":\"${d}\"},"
  done
  policy_egress="${policy_egress%,}"
  local policy_json="{\"defaultAction\":\"deny\",\"egress\":[${policy_egress}]}"
  curl -sf -XPOST "http://127.0.0.1:${POLICY_PORT}/policy" -d "${policy_json}" >/dev/null

  copy_url_file_to_container
  run_workload "${mode}"
}

# Run baseline phase: plain curl container, no egress container. Same workload for comparison.
run_phase_baseline() {
  info "Phase: baseline (no egress)"
  cleanup
  docker pull "${BASELINE_IMG}" > /dev/null 2>&1
  docker run -d --name "${CONTAINER_NAME}" "${BASELINE_IMG}" sleep 3600
  sleep 2
  copy_url_file_to_container
  run_workload "baseline"
}

# Print comparison table (baseline, dns, dns+nft)
report() {
  local nb n1 n2 avg0 avg1 avg2 p50_0 p50_1 p50_2 p99_0 p99_1 p99_2 wall0 wall1 wall2
  read -r nb avg0 p50_0 p99_0 <<< "$(stats /tmp/bench-e2e-baseline-total.txt)"
  read -r n1 avg1 p50_1 p99_1 <<< "$(stats /tmp/bench-e2e-dns-total.txt)"
  read -r n2 avg2 p50_2 p99_2 <<< "$(stats /tmp/bench-e2e-dns+nft-total.txt)"
  wall0=$(cat /tmp/bench-e2e-baseline-wall.txt 2>/dev/null || echo "0")
  wall1=$(cat /tmp/bench-e2e-dns-wall.txt 2>/dev/null || echo "0")
  wall2=$(cat /tmp/bench-e2e-dns+nft-wall.txt 2>/dev/null || echo "0")
  if [[ "${nb:-0}" -eq 0 ]] || [[ "${n1:-0}" -eq 0 ]] || [[ "${n2:-0}" -eq 0 ]]; then
    echo "WARN: some phases had no successful requests; check container logs and network."
  fi

  local rps0 rps1 rps2
  rps0=$(awk -v n="$nb" -v w="$wall0" 'BEGIN { print (w>0 && n>0) ? n/w : 0 }')
  rps1=$(awk -v n="$n1" -v w="$wall1" 'BEGIN { print (w>0 && n>0) ? n/w : 0 }')
  rps2=$(awk -v n="$n2" -v w="$wall2" 'BEGIN { print (w>0 && n>0) ? n/w : 0 }')

  echo ""
  echo "========== E2E benchmark: baseline vs dns vs dns+nft =========="
  echo "Workload: ${TOTAL_REQUESTS} requests (${ROUNDS} rounds × ${NUM_DOMAINS} domains)"
  echo ""
  local ov_avg1 ov_p50_1 ov_p99_1 ov_rps1 ov_avg2 ov_p50_2 ov_p99_2 ov_rps2
  ov_avg1=$(awk -v a="$avg1" -v b="$avg0" 'BEGIN { printf "%+.1f", (b>0 && b!="") ? (a-b)/b*100 : 0 }')
  ov_p50_1=$(awk -v a="$p50_1" -v b="$p50_0" 'BEGIN { printf "%+.1f", (b>0 && b!="") ? (a-b)/b*100 : 0 }')
  ov_p99_1=$(awk -v a="$p99_1" -v b="$p99_0" 'BEGIN { printf "%+.1f", (b>0 && b!="") ? (a-b)/b*100 : 0 }')
  ov_rps1=$(awk -v a="$rps1" -v b="$rps0" 'BEGIN { printf "%+.1f", (b>0 && b!="") ? (b-a)/b*100 : 0 }')
  ov_avg2=$(awk -v a="$avg2" -v b="$avg0" 'BEGIN { printf "%+.1f", (b>0 && b!="") ? (a-b)/b*100 : 0 }')
  ov_p50_2=$(awk -v a="$p50_2" -v b="$p50_0" 'BEGIN { printf "%+.1f", (b>0 && b!="") ? (a-b)/b*100 : 0 }')
  ov_p99_2=$(awk -v a="$p99_2" -v b="$p99_0" 'BEGIN { printf "%+.1f", (b>0 && b!="") ? (a-b)/b*100 : 0 }')
  ov_rps2=$(awk -v a="$rps2" -v b="$rps0" 'BEGIN { printf "%+.1f", (b>0 && b!="") ? (b-a)/b*100 : 0 }')

  printf "%-10s %14s %20s %20s %20s\n" "Mode" "Req/s" "Avg(s)" "P50(s)" "P99(s)"
  printf "%-10s %14s %20s %20s %20s\n" "baseline" "$rps0" "$avg0" "$p50_0" "$p99_0"
  printf "%-10s %14s %20s %20s %20s\n" "dns"      "$(printf '%.2f(%s%%)' "$rps1" "$ov_rps1")" "$(printf '%.3f(%s%%)' "$avg1" "$ov_avg1")" "$(printf '%.3f(%s%%)' "$p50_1" "$ov_p50_1")" "$(printf '%.3f(%s%%)' "$p99_1" "$ov_p99_1")"
  printf "%-10s %14s %20s %20s %20s\n" "dns+nft"  "$(printf '%.2f(%s%%)' "$rps2" "$ov_rps2")" "$(printf '%.3f(%s%%)' "$avg2" "$ov_avg2")" "$(printf '%.3f(%s%%)' "$p50_2" "$ov_p50_2")" "$(printf '%.3f(%s%%)' "$p99_2" "$ov_p99_2")"
  echo ""
  echo "Overhead in parentheses vs baseline: latency +%% = slower, Req/s -%% = lower throughput."
  echo "baseline: Plain container (${BASELINE_IMG}), no egress container."
  echo "dns:      DNS proxy only, no nft write (pass-through)."
  echo "dns+nft:  DNS proxy + sync AddResolvedIPs before each DNS reply (L2 enforcement)."
  echo ""
  echo "Note: Warm-up runs before each phase. Baseline gives no-proxy comparison."
  echo "=========="
}

info "Building image ${IMG}"
docker build -t "${IMG}" -f "${REPO_ROOT}/components/egress/Dockerfile" "${REPO_ROOT}" > /dev/null 2>&1

run_phase_baseline
run_phase "dns+nft"
run_phase "dns"
report
info "Cleaning up"
cleanup


================================================
FILE: components/egress/tests/egress-in-webhook.sh
================================================
#!/bin/bash

# Copyright 2026 Alibaba Group Holding Ltd.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


docker run -d --name egress \
  --rm \
  --cap-add=NET_ADMIN \
  --sysctl net.ipv6.conf.all.disable_ipv6=1 \
  --sysctl net.ipv6.conf.default.disable_ipv6=1 \
  -e OPENSANDBOX_EGRESS_MODE=dns+nft \
  -e OPENSANDBOX_EGRESS_DENY_WEBHOOK=http://<webhook.svc>:8000 \
  -e OPENSANDBOX_EGRESS_SANDBOX_ID=mytest \
  -p 18080:18080 \
  "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:latest"


sleep 5
curl -sSf -XPOST "http://127.0.0.1:18080/policy" \
  -d '{"defaultAction":"allow","egress":[{"action":"deny","target":"*.github.com"},{"action":"deny","target":"10.0.0.0/8"}]}'

================================================
FILE: components/egress/tests/hostname.txt
================================================
example.com
example.org
example.net
example.edu
example.io
github.com
github.io
google.com
cloudflare.com
amazon.com
wikipedia.org
mozilla.org
apple.com
microsoft.com
yahoo.com
facebook.com
twitter.com
instagram.com
linkedin.com
reddit.com
stackoverflow.com
npmjs.com
python.org
golang.org
rust-lang.org
docker.com
kubernetes.io
apache.org
gnu.org
kernel.org
ibm.com
oracle.com
openai.com
anthropic.com
stripe.com
slack.com
dropbox.com
spotify.com
netflix.com
twitch.tv
discord.com
zoom.us
medium.com
substack.com
blogger.com
tumblr.com
imgur.com
flickr.com
vimeo.com
soundcloud.com
bandcamp.com
patreon.com
kickstarter.com
etsy.com
ebay.com
craigslist.org
alibaba.com
bing.com
duckduckgo.com
brave.com
opera.com
protonmail.com
fastmail.com
zoho.com
notion.so
trello.com
asana.com
atlassian.com
bitbucket.org
gitlab.com
sourceforge.net
codepen.io
vercel.com
netlify.com
heroku.com
digitalocean.com
linode.com
vultr.com
ovh.com
hetzner.com
scaleway.com
archlinux.org
debian.org
ubuntu.com
fedoraproject.org
opensuse.org
freebsd.org
openbsd.org
mysql.com
mongodb.com
redis.io
elastic.co
nodejs.org
reactjs.org
vuejs.org
svelte.dev
nextjs.org
nuxtjs.org
jquery.com
bootstrap.com
tailwindcss.com


================================================
FILE: components/egress/tests/smoke-dns.sh
================================================
#!/bin/bash

# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Simple smoke test using local image.
# Requires Docker with --cap-add=NET_ADMIN available.

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# tests/ is two levels under repo root: components/egress/tests -> climb 3 levels.
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"

IMG="opensandbox/egress:local"
containerName="egress-smoke-dns"
POLICY_PORT=18080

info() { echo "[$(date +%H:%M:%S)] $*"; }

cleanup() {
  docker rm -f "${containerName}" >/dev/null 2>&1 || true
}
trap cleanup EXIT

info "Building image ${IMG}"
docker build -t "${IMG}" -f "${REPO_ROOT}/components/egress/Dockerfile" "${REPO_ROOT}"

info "Starting containerName"
docker run -d --name "${containerName}" \
  --cap-add=NET_ADMIN \
  --sysctl net.ipv6.conf.all.disable_ipv6=1 \
  --sysctl net.ipv6.conf.default.disable_ipv6=1 \
  -e OPENSANDBOX_EGRESS_MODE=dns \
  -p ${POLICY_PORT}:18080 \
  "${IMG}"

info "Waiting for policy server..."
for i in {1..50}; do
  if curl -sf "http://127.0.0.1:${POLICY_PORT}/healthz" >/dev/null; then
    break
  fi
  sleep 0.5
done

info "Pushing policy (allow by default; deny github.com & 10.0.0.0/8)"
curl -sSf -XPOST "http://127.0.0.1:${POLICY_PORT}/policy" \
  -d '{"defaultAction":"deny","egress":[{"action":"allow","target":"*.github.com"}]}'

run_in_app() {
  docker run --rm --network container:"${containerName}" curlimages/curl "$@"
}

pass() { info "PASS: $*"; }
fail() { echo "FAIL: $*" >&2; exit 1; }

info "Test: denied domain should fail (google.com)"
if run_in_app -I https://google.com --max-time 5 >/dev/null 2>&1; then
  fail "google.com should be blocked"
else
  pass "google.com blocked"
fi

info "Test: allowed domain should succeed (api.github.com)"
run_in_app -I https://api.github.com --max-time 10 >/dev/null 2>&1 || fail "api.github.com should succeed"
pass "api.github.com allowed"

info "All smoke tests passed."

================================================
FILE: components/egress/tests/smoke-dynamic-ip.sh
================================================
#!/bin/bash

# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Smoke test: default deny + domain allow in dns+nft mode.
# Verifies that allowing a domain causes its resolved IP to be added to nft (dynamic IP),
# so that curl to that domain succeeds without static IP/CIDR in policy.

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# tests/ is two levels under repo root: components/egress/tests -> climb 3 levels.
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"

IMG="opensandbox/egress:local"
containerName="egress-smoke-dynamic-ip"
POLICY_PORT=18080

info() { echo "[$(date +%H:%M:%S)] $*"; }

cleanup() {
  docker rm -f "${containerName}" >/dev/null 2>&1 || true
}
trap cleanup EXIT

info "Building image ${IMG}"
docker build -t "${IMG}" -f "${REPO_ROOT}/components/egress/Dockerfile" "${REPO_ROOT}"

info "Starting sidecar (dns+nft)"
docker run -d --name "${containerName}" \
  --cap-add=NET_ADMIN \
  --sysctl net.ipv6.conf.all.disable_ipv6=1 \
  --sysctl net.ipv6.conf.default.disable_ipv6=1 \
  -e OPENSANDBOX_EGRESS_MODE=dns+nft \
  -p ${POLICY_PORT}:18080 \
  "${IMG}"

info "Waiting for policy server..."
for i in $(seq 1 50); do
  if curl -sf "http://127.0.0.1:${POLICY_PORT}/healthz" >/dev/null; then
    break
  fi
  sleep 0.5
done

info "Pushing policy (default deny; allow google.com only)"
curl -sSf -XPOST "http://127.0.0.1:${POLICY_PORT}/policy" \
  -d '{"defaultAction":"deny","egress":[{"action":"allow","target":"google.com"}]}'

run_in_app() {
  docker run --rm --network container:"${containerName}" curlimages/curl "$@"
}

pass() { info "PASS: $*"; }
fail() { echo "FAIL: $*" >&2; exit 1; }

info "Test: allowed domain (google.com) should succeed via dynamic IP"
run_in_app -I https://google.com --max-time 15 >/dev/null 2>&1 || fail "google.com should succeed (DNS allow + dynamic IP in nft)"
pass "google.com allowed"

info "Test: denied domain (api.github.com) should fail"
if run_in_app -I https://api.github.com --max-time 8 >/dev/null 2>&1; then
  fail "api.github.com should be blocked"
else
  pass "api.github.com blocked"
fi

info "Test: denied IP (1.1.1.1) should fail"
if run_in_app -I 1.1.1.1 --max-time 8 >/dev/null 2>&1; then
  fail "1.1.1.1 should be blocked"
else
  pass "1.1.1.1 blocked"
fi

info "All smoke tests (dynamic IP) passed."


================================================
FILE: components/egress/tests/smoke-nft.sh
================================================
#!/bin/bash

# Copyright 2026 Alibaba Group Holding Ltd.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Simple smoke test using local image.
# Requires Docker with --cap-add=NET_ADMIN available.

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# tests/ is two levels under repo root: components/egress/tests -> climb 3 levels.
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"

IMG="opensandbox/egress:local"
containerName="egress-smoke-nft"
POLICY_PORT=18080

info() { echo "[$(date +%H:%M:%S)] $*"; }

cleanup() {
  docker rm -f "${containerName}" >/dev/null 2>&1 || true
}
trap cleanup EXIT

info "Building image ${IMG}"
docker build -t "${IMG}" -f "${REPO_ROOT}/components/egress/Dockerfile" "${REPO_ROOT}"

info "Starting containerName"
docker run -d --name "${containerName}" \
  --cap-add=NET_ADMIN \
  --sysctl net.ipv6.conf.all.disable_ipv6=1 \
  --sysctl net.ipv6.conf.default.disable_ipv6=1 \
  -e OPENSANDBOX_EGRESS_MODE=dns+nft \
  -p ${POLICY_PORT}:18080 \
  "${IMG}"

info "Waiting for policy server..."
for i in {1..50}; do
  if curl -sf "http://127.0.0.1:${POLICY_PORT}/healthz" >/dev/null; then
    break
  fi
  sleep 0.5
done

info "Pushing policy (allow by default; deny github.com & 10.0.0.0/8)"
curl -sSf -XPOST "http://127.0.0.1:${POLICY_PORT}/policy" \
  -d '{"defaultAction":"allow","egress":[{"action":"deny","target":"*.github.com"},{"action":"deny","target":"10.0.0.0/8"}]}'

run_in_app() {
  docker run --rm --network container:"${containerName}" curlimages/curl "$@"
}

pass() { info "PASS: $*"; }
fail() { echo "FAIL: $*" >&2; exit 1; }

info "Test: allowed domain should succeed (google.com)"
run_in_app -I https://google.com --max-time 10 >/dev/null 2>&1 || fail "google.com should succeed"
pass "google.com allowed"

info "Test: denied domain should fail (api.github.com)"
if run_in_app -I https://api.github.com --max-time 8 >/dev/null 2>&1; then
  fail "api.github.com should be blocked"
else
  pass "api.github.com blocked"
fi

info "Test: allowed IP should succeed (1.1.1.1)"
run_in_app -I https://1.1.1.1 --max-time 10 >/dev/null 2>&1 || fail "1.1.1.1 should succeed"
pass "1.1.1.1 allowed"

info "Test: denied CIDR should fail (10.0.0.1)"
if run_in_app -I http://10.0.0.1 --max-time 5 >/dev/null 2>&1; then
  fail "10.0.0.1 should be blocked"
else
  pass "10.0.0.1 blocked"
fi

info "Test: DoT (853) should be blocked"
if run_in_app -k https://1.1.1.1:853 --max-time 5 >/dev/null 2>&1; then
  fail "DoT 853 should be blocked"
else
  pass "DoT 853 blocked"
fi

info "Rules update: wildcard deny -> patch allow specific (dns+nft)"
curl -sSf -XPOST "http://127.0.0.1:${POLICY_PORT}/policy" \
  -d '{"defaultAction":"allow","egress":[{"action":"deny","target":"*.cloudflare.com"}]}'

info "Test: www.cloudflare.com should be blocked initially (deny via wildcard)"
if run_in_app -I https://www.cloudflare.com --max-time 8 >/dev/null 2>&1; then
  fail "www.cloudflare.com should be blocked before patch"
else
  pass "www.cloudflare.com blocked before patch"
fi

info "Patching allow for www.cloudflare.com (specific should override earlier deny)"
curl -sSf -XPATCH "http://127.0.0.1:${POLICY_PORT}/policy" \
  -d '[{"action":"allow","target":"www.cloudflare.com"}]'

info "Test: www.cloudflare.com should be allowed after patch"
run_in_app -I https://www.cloudflare.com --max-time 10 >/dev/null 2>&1 || fail "www.cloudflare.com should succeed after patch"
pass "www.cloudflare.com allowed after patch"

info "Rules update: wildcard allow -> patch deny specific (dns+nft)"
curl -sSf -XPOST "http://127.0.0.1:${POLICY_PORT}/policy" \
  -d '{"defaultAction":"deny","egress":[{"action":"allow","target":"*.mozilla.org"}]}'

info "Test: www.mozilla.org should be allowed initially (allow via wildcard)"
run_in_app -I https://www.mozilla.org --max-time 10 >/dev/null 2>&1 || fail "www.mozilla.org should succeed before patch"
pass "www.mozilla.org allowed before patch"

info "Patching deny for www.mozilla.org (specific should override earlier allow)"
curl -sSf -XPATCH "http://127.0.0.1:${POLICY_PORT}/policy" \
  -d '[{"action":"deny","target":"www.mozilla.org"}]'

info "Test: www.mozilla.org should be blocked after patch"
if run_in_app -I https://www.mozilla.org --max-time 8 >/dev/null 2>&1; then
  fail "www.mozilla.org should be blocked after patch"
else
  pass "www.mozilla.org blocked after patch"
fi

info "All smoke tests passed."

================================================
FILE: components/egress/tests/webhook-server.py
================================================
#!/usr/bin/env python3

# Copyright 2026 Alibaba Group Holding Ltd.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Lightweight HTTP server to receive OPENSANDBOX_EGRESS_DENY_WEBHOOK callbacks.

Config:
- WEBHOOK_HOST: listen address (default 0.0.0.0)
- WEBHOOK_PORT: listen port (default 8000)
- WEBHOOK_PATH: webhook path (default /)

Run:
  python webhook_server.py
Then point OPENSANDBOX_EGRESS_DENY_WEBHOOK to http://<host>:<port><path>
"""

import http.server
import json
import os
import socketserver
from datetime import datetime

HOST = os.getenv("WEBHOOK_HOST", "0.0.0.0")
PORT = int(os.getenv("WEBHOOK_PORT", "8000"))
PATH = os.getenv("WEBHOOK_PATH", "/")


class WebhookHandler(http.server.BaseHTTPRequestHandler):
    def _send(self, code: int = 200, body: str = "ok") -> None:
        self.send_response(code)
        self.send_header("Content-Type", "text/plain; charset=utf-8")
        self.end_headers()
        self.wfile.write(body.encode("utf-8"))

    def do_POST(self) -> None:  # noqa: N802 (BaseHTTPRequestHandler API)
        # Only allow the configured path
        if self.path != PATH:
            self._send(404, "not found")
            return

        length = int(self.headers.get("Content-Length", 0))
        raw = self.rfile.read(length) if length else b""

        payload = raw.decode("utf-8", errors="replace")
        try:
            parsed = json.loads(payload)
        except json.JSONDecodeError:
            parsed = None

        # Log request info for debugging
        print(f"\n[{datetime.utcnow().isoformat()}Z] Received webhook")
        print(f"Path: {self.path}")
        print(f"Headers: {dict(self.headers)}")
        print(f"Raw body: {payload}")
        if parsed is not None:
            print("Parsed JSON:")
            print(json.dumps(parsed, indent=2))

        self._send(200, "received")

    # Silence default logging to reduce noise
    def log_message(self, *args) -> None:
        return


def main() -> None:
    with socketserver.TCPServer((HOST, PORT), WebhookHandler) as httpd:
        print(f"Listening on http://{HOST}:{PORT}{PATH} ...")
        httpd.serve_forever()


if __name__ == "__main__":
    main()

================================================
FILE: components/execd/.golangci.yml
================================================
run:
  skip-dirs:
    - vendor
    - tests
    - scripts
  skip-files:
    - .*/zz_generated.deepcopy.go
    - .*/mock/*.go
  tests: false
  timeout: 10m
linters-settings:
  funlen:
    lines: 500
    statements: 200
  gocyclo:
    min-complexity: 40
  gosimple:
    checks: ["S1019", "S1002"]
  staticcheck:
    checks: ["SA4006"]
  govet:
    enable:
      - asmdecl
      - assign
      - atomic
      - atomicalign
      - bools
      - buildtag
      - cgocall
      - copylocks
      - deepequalerrors
      - errorsas
      - findcall
      - framepointer
      - httpresponse
      - ifaceassert
      - lostcancel
      - nilfunc
      - nilness
      - reflectvaluecompare
      - shift
      - sigchanyzer
      - sortslice
      - stdmethods
      - stringintconv
      - testinggoroutine
      - tests
      - unmarshal
      - unreachable
      - unsafeptr
      - unusedresult
      - printf
    disable:
      - composites
      - loopclosure
      - fieldalignment
      - shadow
      - structtag
      - unusedwrite
  errcheck:
    exclude-functions:
    - flag.Set
    - os.Setenv
    - os.Unsetenv
    - logger.Sync
    - fmt.Fprintf
    - fmt.Fprintln
    - (io.Closer).Close
    - (io.ReadCloser).Close
    - (k8s.io/client-go/tools/cache.SharedInformer).AddEventHandler
  nestif:
    min-complexity: 32
  goconst:
    # Minimal length of string constant.
    # Default: 3
    min-len: 3
    # Minimum occurrences of constant string count to trigger issue.
    # Default: 3
    min-occurrences: 3
    # Ignore test files.
    # Default: false
    ignore-tests: true
    match-constant: false
    numbers: true
    min: 2
    max: 10
    ignore-calls: true
  gosec:
    includes:
      - G101 # Look for hard coded credentials
      - G102 # Bind to all interfaces
      - G103 # Audit the use of unsafe block
      - G104 # Audit errors not checked
      - G106 # Audit the use of ssh.InsecureIgnoreHostKey
      - G107 # Url provided to HTTP request as taint input
      - G108 # Profiling endpoint automatically exposed on /debug/pprof
      - G109 # Potential Integer overflow made by strconv.Atoi result conversion to int16/32
      - G110 # Potential DoS vulnerability via decompression bomb
      - G111 # Potential directory traversal
      - G112 # Potential slowloris attack
      - G113 # Usage of Rat.SetString in math/big with an overflow (CVE-2022-23772)
      # - G114 # Use of net/http serve function that has no support for setting timeouts
      - G201 # SQL query construction using format string
      - G202 # SQL query construction using string concatenation
      - G203 # Use of unescaped data in HTML templates
      #- G204 # Audit use of command execution
      - G301 # Poor file permissions used when creating a directory
      - G302 # Poor file permissions used with chmod
      - G303 # Creating tempfile using a predictable path
      - G304 # File path provided as taint input
      - G305 # File traversal when extracting zip/tar archive
      - G306 # Poor file permissions used when writing to a new file
      - G307 # Deferring a method which returns an error
      #- G401 # Detect the usage of DES, RC4, MD5 or SHA1
      - G402 # Look for bad TLS connection settings
      - G403 # Ensure minimum RSA key length of 2048 bits
      - G404 # Insecure random number source (rand)
      #- G501 # Import blocklist: crypto/md5
      - G502 # Import blocklist: crypto/des
      - G503 # Import blocklist: crypto/rc4
      - G504 # Import blocklist: net/http/cgi
      - G505 # Import blocklist: crypto/sha1
      - G601 # Implicit memory aliasing of items from a range statement
    # Exclude generated files
    # Default: false
    exclude-generated: true
    # Filter out the issues with a lower severity than the given value.
    # Valid options are: low, medium, high.
    # Default: low
    severity: medium
    # Filter out the issues with a lower confidence than the given value.
    # Valid options are: low, medium, high.
    # Default: low
    confidence: medium
    # Concurrency value.
    # Default: the number of logical CPUs usable by the current process.
    concurrency: 12
    # To specify the configuration of rules.
    config:
      # Globals are applicable to all rules.
      global:
        nosec: true
        show-ignored: true
        audit: true
      G101:
        # Regexp pattern for variables and constants to find.
        # Default: "(?i)passwd|pass|password|pwd|secret|token|pw|apiKey|bearer|cred"
        pattern: "(?i)example"
        # If true, complain about all cases (even with low entropy).
        # Default: false
        ignore_entropy: false
        # Maximum allowed entropy of the string.
        # Default: "80.0"
        entropy_threshold: "80.0"
        per_char_threshold: "3.0"
        truncate: "32"
      G104:
        fmt:
          - Fscanf
      G111:
        # Regexp pattern to find potential directory traversal.
        # Default: "http\\.Dir\\(\"\\/\"\\)|http\\.Dir\\('\\/'\\)"
        pattern: "custom\\.Dir\\(\\)"
      # Maximum allowed permissions mode for os.Mkdir and os.MkdirAll
      # Default: "0750"
      G301: "0750"
      # Maximum allowed permissions mode for os.OpenFile and os.Chmod
      # Default: "0600"
      G302: "0600"
      # Maximum allowed permissions mode for os.WriteFile and ioutil.WriteFile
      # Default: "0600"
      G306: "0600"
  nilnil:
    checked-types:
      - ptr
      - map
      - chan
  depguard:
    rules:
      prevent_unmaintained_packages:
        list-mode: lax # allow unless explicitely denied
        files:
          - $all
          - "!$test"
        allow:
          - $gostd
          - path/filepath
        deny:
          - pkg: io/ioutil
            desc: "replaced by io and os packages since Go 1.16: https://tip.golang.org/doc/go1.16#ioutil"
          - pkg: path
            desc: "replaced by cross-platform package path/filepath"
  gci:
    # Section configuration to compare against.
    # Section names are case-insensitive and may contain parameters in ().
    # The default order of sections is `standard > default > custom > blank > dot > alias > localmodule`,
    # If `custom-order` is `true`, it follows the order of `sections` option.
    # Default: ["standard", "default"]
    sections:
      - standard # Standard section: captures all standard packages.
      - default # Default section: contains all imports that could not be matched to another section type.:
      - prefix(github.com/org/project) # Custom section: groups all imports with the specified Prefix.
      - blank # Blank section: contains all blank imports. This section is not present unless explicitly enabled.
      - dot # Dot section: contains all dot imports. This section is not present unless explicitly enabled.
      - localmodule # Local module section: contains all local packages. This section is not present unless explicitly enabled.
    # Skip generated files.
    # Default: true
    skip-generated: true
    # Enable custom order of sections.
    # If `true`, make the section order the same as the order of `sections`.
    # Default: false
    custom-order: true
    # Drops lexical ordering for custom sections.
    # Default: false
    no-lex-order: true
  forbidigo:
    forbid:
      # Forbid spew Dump, whether it is called as function or method.
      # Depends on analyze-types below.
      - ^spew\.(ConfigState\.)?Dump$
      # The package name might be ambiguous.
      # The full import path can be used as additional criteria.
      # Depends on analyze-types below.
      - p: ^v1.Dump$
        pkg: ^example.com/pkg/api/v1$

linters:
  enable:
    - asasalint
    - asciicheck
    - bidichk
    - bodyclose
    # - cyclop
    - decorder
    - depguard
    - errcheck
    # - errchkjson
    - errorlint
    - forbidigo
    # - forcetypeassert
    - funlen
    - ineffassign
    - gocognit
    - gocyclo
    - goheader
    - gomodguard
    - goprintffuncname
    - gosimple
    - gosec
    - grouper
    - importas
    - maintidx
    - misspell
    - nakedret
    - nilerr
    - nilnil
    # - noctx
    - nosprintfhostport
    - paralleltest
    - predeclared
    # - promlinter
    - reassign
    - sqlclosecheck
    - staticcheck
    - tenv
    - testpackage
    - tparallel
    # del
    # - typecheck
    - usestdlibvars
    - nestif
    - unused
    - makezero
    - govet
    - goconst
    - gci
    # - rowserrcheck
    # 1.59 version no new lints
    # 1.58 version new lints
    # - fatcontext
    - canonicalheader
    # 1.57 version new lints
    - copyloopvar
    - intrange
    # 1.56 version new lints
    - spancheck
    # 1.55 version new lints
    - gochecksumtype
    - perfsprint
    - sloglint
    - testifylint
    - mirror
    - zerologlint
    # 1.51 version new lints
    - gocheckcompilerdirectives
    # 1.50 version new lints
    - testableexamples

issues:
  # Note: path identifiers are regular expressions, hence the \.go suffixes.
  exclude-rules:
    - path: main\.go
      linters:
        - forbidigo
    - path: _test\.go
      linters:
        - dogsled
        - errcheck
        - goconst
        - gosec
        - ineffassign
        - maintidx
        - typecheck
    - path: \.go$
      text: "should have a package comment"
    - path: \.go$
      text: 'exported (.+) should have comment( \(or a comment on this block\))? or be unexported'
    - path: \.go$
      text: "fmt.Sprintf can be replaced with string concatenation"


================================================
FILE: components/execd/DEVELOPMENT.md
================================================
# Development Guide - execd

This comprehensive guide explains how to work on `execd` as a contributor or maintainer. It covers environment setup,
development workflows, testing strategies, architectural patterns, and subsystem-specific implementation details.

## Table of Contents

- [Getting Started](#getting-started)
- [Project Structure](#project-structure)
- [Coding Standards](#coding-standards)
- [Testing Strategy](#testing-strategy)
- [Subsystem Guides](#subsystem-guides)
- [Common Development Tasks](#common-development-tasks)
- [Debugging Techniques](#debugging-techniques)
- [Performance Optimization](#performance-optimization)
- [Contributing Guidelines](#contributing-guidelines)
- [Additional Resources](#additional-resources)

## Getting Started

### Prerequisites

#### Required Tools

- **Go 1.24+** - Match the version declared in `go.mod`
- **Git** - Version control
- **Make** - Build automation (optional but recommended)

#### Optional but Recommended

- **golangci-lint** - For comprehensive linting
- **Docker/Podman** - For containerized testing and deployment
- **Jupyter Server** - Required for integration tests with real kernels
- **VS Code/GoLand** - IDE with Go support

### Initial Setup

```bash
# Clone the repository
git clone https://github.com/alibaba/OpenSandbox.git
cd OpenSandbox/components/execd

# Download dependencies
go mod download

# Verify setup
go build -o bin/execd .
```

## Project Structure

### Project Structure Deep Dive

```
execd/
├── main.go                 # Application entry point
├── go.mod                  # Go module definition
├── Makefile               # Build automation
├── Dockerfile             # Container image definition
│
├── pkg/                   # Public packages
│   ├── flag/              # CLI flag parsing
│   ├── web/               # HTTP layer
│   │   ├── router.go      # Route registration
│   │   ├── controller/    # Request handlers
│   │   └── model/         # API models
│   ├── runtime/           # Execution engine
│   │   ├── ctrl.go        # Main controller
│   │   ├── jupyter.go     # Jupyter execution
│   │   └── command.go     # Shell command execution
│   ├── jupyter/           # Jupyter client
│   │   ├── client.go      # HTTP/WebSocket client
│   │   ├── session/       # Session management
│   │   └── execute/       # Execution protocol
│   └── util/              # Utilities
│
└── tests/                # Integration test scripts
```

### Key Design Patterns

#### 1. Controller Pattern (pkg/web/controller)

Controllers are thin HTTP handlers that parse requests, validate, delegate to runtime, and stream responses via SSE.

#### 2. Runtime Controller Pattern (pkg/runtime)

The runtime controller dispatches requests to appropriate executors (Jupyter, Command, SQL) and manages session
lifecycle.

#### 3. Hook Pattern for Streaming

Execution results are streamed via hooks, allowing controllers to transform runtime events into SSE events without tight
coupling.

## Coding Standards

### Go Conventions

#### Formatting

**Always use `gofmt`** before committing:

```bash
gofmt -w .
# or
make fmt
```

#### Import Organization

Three groups separated by blank lines:

```go
import (
    // Standard library
    "context"
    "fmt"

    // Third-party
    "github.com/beego/beego/v2/core/logs"

    // Internal
    "github.com/alibaba/opensandbox/execd/pkg/runtime"
)
```

#### Error Handling

Always handle errors explicitly:

```go
// Good
result, err := someOperation()
if err != nil {
    logs.Error("operation failed: %v", err)
    return fmt.Errorf("failed to do something: %w", err)
}

// Bad - silent failure
result, _ := someOperation()
```

#### Logging

Use Beego's structured logger:

```go
logs.Info("starting execution: sessionID=%s", sessionID)
logs.Warning("session busy: sessionID=%s", sessionID)
logs.Error("execution failed: error=%v", err)
logs.Debug("received event: type=%s", eventType)
```

### Concurrency Best Practices

#### Use safego for goroutines

Always use `safego.Go` to prevent panics:

```go
import "github.com/alibaba/opensandbox/execd/pkg/util/safego"

safego.Go(func() {
    processInBackground()
})
```

#### Context Propagation

Always respect context cancellation:

```go
func (c *Controller) runCommand(ctx context.Context, req *ExecuteCodeRequest) error {
    cmd := exec.CommandContext(ctx, "bash", "-c", req.Code)

    go func() {
        <-ctx.Done()
        if cmd.Process != nil {
            cmd.Process.Kill()
        }
    }()

    return cmd.Run()
}
```

## Testing Strategy

### Unit Tests

Located in `*_test.go` files alongside source code.

**Example:**

```go
func TestController_Execute_Python(t *testing.T) {
    ctrl := NewController("http://jupyter:8888", "test-token")

    req := &ExecuteCodeRequest{
        Language: Python,
        Code:     "print('hello')",
    }

    err := ctrl.Execute(req)
    assert.NoError(t, err)
}
```

**Running Unit Tests:**

```bash
go test ./pkg/...
# with coverage
go test -v -cover ./pkg/...
```

### Integration Tests

Located in `*_integration_test.go`, require real dependencies.

**Running Integration Tests:**

```bash
export JUPYTER_URL=http://localhost:8888
export JUPYTER_TOKEN=your-token
go test -v ./pkg/jupyter/...
```

### Test Coverage

Check coverage:

```bash
go test -coverprofile=coverage.out ./pkg/...
go tool cover -html=coverage.out -o coverage.html
```

**Coverage Goals:**

- Core packages (`pkg/runtime`, `pkg/jupyter`): > 80%
- Controllers (`pkg/web/controller`): > 70%
- Utilities (`pkg/util`): > 90%

## Subsystem Guides

### Working with Jupyter Integration

#### Architecture

```
pkg/jupyter/
├── client.go          # Main client
├── transport.go       # Connection handling
├── session/           # Session lifecycle
├── execute/           # Execution protocol
└── auth/              # Authentication
```

#### Adding New Kernel Support

1. Define language in `pkg/runtime/language.go`:

```go
const Ruby Language = "ruby"
```

2. Map to kernel in `pkg/runtime/jupyter.go`

3. Test with real kernel:

```bash
# Install Ruby kernel
gem install iruby
iruby register --force

# Run test
export JUPYTER_URL=http://localhost:8888
go test -v ./pkg/jupyter/integration_test.go
```

#### Debugging Jupyter Communication

Run debug integration test:

```bash
go test -v ./pkg/jupyter/debug_integration_test.go
```

This dumps complete HTTP request/response pairs.

### Working with Command Execution

#### Key Implementation Details

**Process Group Management:**

```go
cmd.SysProcAttr = &syscall.SysProcAttr{
    Setpgid: true,  // Create new process group
}
```

This allows signal forwarding to all child processes:

```go
syscall.Kill(-cmd.Process.Pid, syscall.SIGTERM)
```

**Signal Forwarding:**

```go
signals := make(chan os.Signal, 1)
signal.Notify(signals)

go func() {
    for sig := range signals {
        if sig != syscall.SIGCHLD && sig != syscall.SIGURG {
            syscall.Kill(-cmd.Process.Pid, sig.(syscall.Signal))
        }
    }
}()
```

**Stdout/Stderr Streaming:**

Commands write to temporary log files, which are tailed and streamed to hooks.

## Common Development Tasks

### Adding a New API Endpoint

1. **Define model** in `pkg/web/model/`:

```go
type NewFeatureRequest struct {
    Param1 string `json:"param1" validate:"required"`
    Param2 int    `json:"param2"`
}
```

2. **Add controller method** in `pkg/web/controller/`:

```go
func (c *MyController) NewFeature() {
    var req model.NewFeatureRequest
    json.Unmarshal(c.Ctx.Input.RequestBody, &req)

    // Business logic
    result := processNewFeature(req)

    c.Data["json"] = result
    c.ServeJSON()
}
```

3. **Register route** in `pkg/web/router.go`:

```go
myNamespace := web.NewNamespace("/my-feature",
    web.NSRouter("", &controller.MyController{}, "post:NewFeature"),
)
web.AddNamespace(myNamespace)
```

### Adding Configuration Flag

1. **Declare in `pkg/flag/flags.go`:**

```go
var NewFeatureTimeout time.Duration
```

2. **Parse in `pkg/flag/parser.go`:**

```go
func InitFlags() {
    flag.DurationVar(&NewFeatureTimeout, "new-feature-timeout", 30*time.Second, "Description")

    // Parse environment variable
    if env := os.Getenv("NEW_FEATURE_TIMEOUT"); env != "" {
        if d, err := time.ParseDuration(env); err == nil {
            NewFeatureTimeout = d
        }
    }

    flag.Parse()
}
```

3. **Update README** with new flag documentation

## Debugging Techniques

### Local Debugging with Delve

```bash
# Install delve
go install github.com/go-delve/delve/cmd/dlv@latest

# Start debugging
dlv debug . -- \
  --jupyter-host=http://localhost:8888 \
  --jupyter-token=test

# Set breakpoint
(dlv) break pkg/runtime/ctrl.go:57
(dlv) continue
```

### Debugging SSE Streams

**Test with curl:**

```bash
curl -N -H "x-access-token: dev" \
  -H "Content-Type: application/json" \
  -d '{"language":"python","code":"print(\"test\")"}' \
  http://localhost:44772/code
```

The `-N` flag disables buffering for real-time events.

**Debug in browser:**

```javascript
const eventSource = new EventSource('/code');

eventSource.addEventListener('stdout', (e) => {
    console.log('stdout:', e.data);
});

eventSource.addEventListener('error', (e) => {
    console.error('error:', e.data);
});
```

### Performance Profiling

**CPU Profile:**

```bash
# Add to main.go
import _ "net/http/pprof"

go func() {
    http.ListenAndServe("localhost:6060", nil)
}()

# Collect profile
go tool pprof http://localhost:6060/debug/pprof/profile?seconds=30
```

**Memory Profile:**

```bash
go tool pprof http://localhost:6060/debug/pprof/heap
```

**Goroutine Inspection:**

```bash
curl http://localhost:6060/debug/pprof/goroutine?debug=2
```

## Performance Optimization

### Optimization Guidelines

1. **Profile before optimizing** - Use pprof to identify bottlenecks
2. **Benchmark changes** - Measure impact of optimizations
3. **Use `sync.Pool`** for frequently allocated objects
4. **Minimize allocations** in hot paths
5. **Buffer channels** appropriately

### Example: Optimizing SSE Writer

**Before:**

```go
func writeEvent(w http.ResponseWriter, event, data string) {
    fmt.Fprintf(w, "event: %s\ndata: %s\n\n", event, data)
    w.(http.Flusher).Flush()
}
```

**After:**

```go
var bufPool = sync.Pool{
    New: func() interface{} { return new(bytes.Buffer) },
}

func writeEvent(w http.ResponseWriter, event, data string) {
    buf := bufPool.Get().(*bytes.Buffer)
    buf.Reset()
    defer bufPool.Put(buf)

    buf.WriteString("event: ")
    buf.WriteString(event)
    buf.WriteString("\ndata: ")
    buf.WriteString(data)
    buf.WriteString("\n\n")

    w.Write(buf.Bytes())
    w.(http.Flusher).Flush()
}
```

**Benchmark:**

```go
func BenchmarkWriteEvent(b *testing.B) {
    w := httptest.NewRecorder()
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        writeEvent(w, "test", "data")
    }
}
```

## Contributing Guidelines

### Pull Request Process

1. **Fork and clone** the repository
2. **Create feature branch** from `main`
3. **Implement changes** following coding standards
4. **Add tests** for new functionality
5. **Run all tests** and ensure they pass
6. **Update documentation** as needed
7. **Submit PR** with clear description

### Code Review Standards

Reviewers check for:

- [ ] Correctness and functionality
- [ ] Test coverage
- [ ] Code style and formatting
- [ ] Documentation completeness
- [ ] Performance implications
- [ ] Security considerations
- [ ] Error handling
- [ ] Backwards compatibility

### Release Checklist

Before releasing:

- [ ] All tests pass (unit, integration, e2e)
- [ ] Documentation updated (README, DEVELOPMENT, API docs)
- [ ] CHANGELOG updated with changes
- [ ] Version bumped appropriately (semver)
- [ ] Dependencies reviewed and updated
- [ ] Security scan passed
- [ ] Performance benchmarks run
- [ ] Docker image built and tested

## Additional Resources

### Useful Commands

```bash
# Format all Go files
make fmt

# Run linter
make golint

# Run all tests
make test

# Build binary
make build
```

### External Documentation

- [Beego Documentation](https://beego.wiki/)
- [Jupyter Kernel Protocol](https://jupyter-client.readthedocs.io/en/stable/messaging.html)
- [Go Best Practices](https://golang.org/doc/effective_go)
- [Server-Sent Events Spec](https://html.spec.whatwg.org/multipage/server-sent-events.html)

### Getting Help

- **Issues**: Report bugs or request features on GitHub Issues
- **Discussions**: Ask questions in GitHub Discussions
- **Chat**: Join the OpenSandbox community chat
- **Documentation**: Check the wiki for detailed guides

---

**Happy hacking!** Feel free to augment this guide with tips you discover along the way. For questions or suggestions,
open an issue or discussion on GitHub.


================================================
FILE: components/execd/Dockerfile
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM golang:1.24.0 AS builder

WORKDIR /build

ARG VERSION=dev
ARG GIT_COMMIT=unknown
ARG BUILD_TIME=unknown

# Prepare local modules to satisfy replace directives.
COPY components/internal/go.mod components/internal/go.sum ./components/internal/
COPY components/execd/go.mod components/execd/go.sum ./components/execd/

# Download deps with only mod files for better caching.
RUN cd components/internal && go mod download
RUN cd components/execd && go mod download

# Copy sources.
COPY components/internal ./components/internal
COPY components/execd ./components/execd

WORKDIR /build/components/execd

RUN CGO_ENABLED=0 go build \
    -ldflags "-X 'github.com/alibaba/opensandbox/internal/version.Version=${VERSION}' \
              -X 'github.com/alibaba/opensandbox/internal/version.BuildTime=${BUILD_TIME}' \
              -X 'github.com/alibaba/opensandbox/internal/version.GitCommit=${GIT_COMMIT}'" \
    -o /build/execd ./main.go

FROM alpine:latest

COPY --from=builder /build/execd .
COPY components/execd/bootstrap.sh ./bootstrap.sh

ENTRYPOINT ["./execd"]


================================================
FILE: components/execd/Makefile
================================================
.PHONY: fmt
fmt: ## Run go fmt against code.
	go fmt ./...

.PHONY: vet
vet: ## Run go vet against code.
	go mod tidy && go mod vendor
	go vet ./...

.PHONY: test
test: vet ## Run tests
	go test -v -coverpkg=./... ./pkg/...

##@ Linter

.PHONY: install-golint
install-golint:
	@if ! command -v golangci-lint &> /dev/null; then \
  		echo "installing golangci-lint..."; \
  		go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest; \
  	else \
  	    echo "golangci-lint already installed"; \
	fi

.PHONY: golint
golint: fmt install-golint
	golangci-lint run -v --fix ./...

VERSION ?= $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
GIT_COMMIT ?= $(shell git rev-parse HEAD 2>/dev/null || echo "unknown")
BUILD_TIME ?= $(shell date -u +"%Y-%m-%dT%H:%M:%SZ")
LDFLAGS := -X 'github.com/alibaba/opensandbox/internal/version.Version=$(VERSION)' \
	-X 'github.com/alibaba/opensandbox/internal/version.BuildTime=$(BUILD_TIME)' \
	-X 'github.com/alibaba/opensandbox/internal/version.GitCommit=$(GIT_COMMIT)'

.PHONY: build
build: vet ## Build the binary.
	@mkdir -p bin
	go build -ldflags "$(LDFLAGS)" -o bin/execd main.go

.PHONY: multi-build
multi-build: vet ## Cross-compile for linux/windows/darwin amd64/arm64.
	@mkdir -p bin
	@for os in linux windows darwin; do \
		for arch in amd64 arm64; do \
			out=bin/execd-$${os}-$${arch}; \
			[ "$${os}" = "windows" ] && out="$${out}.exe"; \
			echo ">> building $${os}/$${arch} -> $${out}"; \
			GOOS=$${os} GOARCH=$${arch} CGO_ENABLED=0 go build -ldflags "$(LDFLAGS)" -o "$${out}" main.go || exit $$?; \
		done; \
	done


================================================
FILE: components/execd/README.md
================================================
# execd - OpenSandbox Execution Daemon

English | [中文](README_zh.md)

`execd` is the execution daemon for OpenSandbox. Built on Beego, it exposes a comprehensive HTTP API that turns external requests into runtime actions: managing Jupyter sessions, streaming code output via Server-Sent Events (SSE), executing shell commands, operating on the sandbox filesystem, and collecting host-side metrics.

## Table of Contents

- [Overview](#overview)
- [Core Features](#core-features)
- [Architecture](#architecture)
- [Getting Started](#getting-started)
- [Configuration](#configuration)
- [API Reference](#api-reference)
- [Supported Languages](#supported-languages)
- [Development](#development)
- [Testing](#testing)
- [Observability](#observability)
- [Performance Benchmarks](#performance-benchmarks)
- [Contributing](#contributing)
- [License](#license)
- [Support](#support)

## Overview

`execd` provides a unified interface for:

- **Code execution**: Python, Java, JavaScript, TypeScript, Go, and Bash
- **Session management**: Long-lived Jupyter kernel sessions with state
- **Command execution**: Synchronous and background shell commands
- **File operations**: Full filesystem CRUD with chunked upload/download
- **Monitoring**: Real-time host metrics (CPU, memory, uptime)

## Core Features

### Unified runtime management

- Translate REST calls into runtime requests handled by `pkg/runtime`
- Multiple execution backends: Jupyter, shell, etc.
- Automatic language detection and routing
- Pluggable Jupyter server configuration

### Jupyter integration

- Maintain kernel sessions via `pkg/jupyter`
- WebSocket-based real-time communication
- Stream execution events through SSE

### Command executor

- Foreground and background shell commands
- Proper signal forwarding with process groups
- Real-time stdout/stderr streaming
- Context-aware interruption

### Filesystem

- CRUD helpers around the sandbox filesystem
- Glob-based file search
- Chunked upload/download with resume support
- Permission management

### Observability

- Lightweight metrics endpoint (CPU, memory, uptime)
- Structured streaming logs
- SSE-based real-time monitoring

## Architecture

### Directory structure

| Path                   | Purpose                                              |
|------------------------|------------------------------------------------------|
| `main.go`              | Entry point; initializes Beego, CLI flags, routers   |
| `pkg/flag/`            | CLI and environment configuration                    |
| `pkg/web/`             | HTTP layer (controllers, models, router, SSE helpers) |
| `pkg/web/controller/`  | Handlers for files, code, commands, metrics          |
| `pkg/web/model/`       | Request/response models and SSE event types          |
| `pkg/runtime/`         | Dispatcher to Jupyter and shell executors            |
| `pkg/jupyter/`         | Minimal Jupyter client (kernels/sessions/WebSocket)  |
| `pkg/jupyter/execute/` | Execution result types and stream parsers            |
| `pkg/jupyter/session/` | Session management and lifecycle                     |
| `pkg/util/`            | Utilities (safe goroutine helpers, glob helpers)     |
| `tests/`               | Test scripts and tools                               |

## Getting Started

### Prerequisites

- **Go 1.24+** (as defined in `go.mod`)
- **Jupyter Server** (required for code execution)
- **Docker** (optional, for containerized builds)
- **Make** (optional, for convenience targets)

### Quick Start

#### 1. Clone and build

```bash
git clone git@github.com:alibaba/OpenSandbox.git
cd OpenSandbox/components/execd
go mod download
make build
```

#### 2. Start Jupyter Server

```bash
# Option 1: use the provided script
./tests/jupyter.sh

# Option 2: start manually
jupyter notebook --port=54321 --no-browser --ip=0.0.0.0 \
  --NotebookApp.token='your-jupyter-token'
```

#### 3. Run execd

```bash
./bin/execd \
  --jupyter-host=http://127.0.0.1:54321 \
  --jupyter-token=your-jupyter-token \
  --port=44772
```

#### 4. Verify

```bash
curl -v http://localhost:44772/ping
# Expect HTTP 200
```

### Image build

```bash
docker build -t opensandbox/execd:dev .

# Run container
docker run -d \
  -p 44772:44772 \
  -e JUPYTER_HOST=http://jupyter-server \
  -e JUPYTER_TOKEN=your-token \
  --name execd \
  opensandbox/execd:dev
```

## Configuration

### Command-line flags

| Flag                          | Type     | Default | Description                                   |
|-------------------------------|----------|---------|-----------------------------------------------|
| `--jupyter-host`              | string   | `""`    | Jupyter server URL (reachable by execd)       |
| `--jupyter-token`             | string   | `""`    | Jupyter HTTP/WebSocket token                  |
| `--port`                      | int      | `44772` | HTTP listen port                              |
| `--log-level`                 | int      | `6`     | Beego log level (0=Emergency, 7=Debug)        |
| `--access-token`              | string   | `""`    | Shared API secret (optional)                  |
| `--graceful-shutdown-timeout` | duration | `3s`    | Wait time before cutting off SSE on shutdown  |

### Environment variables

All flags can be set via environment variables:

```bash
export JUPYTER_HOST=http://127.0.0.1:8888
export JUPYTER_TOKEN=your-token
```

Environment variables override defaults but are superseded by explicit CLI flags.

## API Reference

[API Spec](../../specs/execd-api.yaml).

## Supported Languages

### Jupyter-based

| Language   | Kernel      | Highlights                  |
|------------|-------------|-----------------------------|
| Python     | IPython     | Full Jupyter protocol       |
| Java       | IJava       | JShell-based execution      |
| JavaScript | IJavaScript | Node.js runtime             |
| TypeScript | ITypeScript | TS compilation + Node exec  |
| Go         | gophernotes | Go interpreter              |
| Bash       | Bash kernel | Shell scripts               |

### Native executors

| Mode/Language        | Backend | Highlights                   |
|----------------------|---------|------------------------------|
| `command`            | OS exec | Synchronous shell commands   |
| `background-command` | OS exec | Detached background process  |

## Development

See [DEVELOPMENT.md](./DEVELOPMENT.md) for detailed guidelines.

## Testing

### Unit tests

```bash
make test
```

### Integration tests

Integration tests requiring a real Jupyter Server are skipped by default:

```bash
export JUPYTER_URL=http://localhost:8888
export JUPYTER_TOKEN=your-token
go test -v ./pkg/jupyter/...
```

### Manual testing workflow

1. Start Jupyter: `./tests/jupyter.sh`
2. Start execd: `./bin/execd --jupyter-host=http://localhost:54321 --jupyter-token=opensandboxexecdlocaltest`
3. Execute code:

```bash
curl -X POST -H "Content-Type: application/json" \
  -d '{"language":"python","code":"print(\"test\")"}' \
  http://localhost:44772/code
```

## Configuration

### API graceful shutdown window

- Env: `EXECD_API_GRACE_SHUTDOWN` (e.g. `500ms`, `2s`, `1m`)
- Flag: `--graceful-shutdown-timeout`
- Default: `1s`

This controls how long execd keeps SSE responses (code/command runs) alive after sending the final chunk, so clients can drain tail output before the connection closes. Set to `0s` to disable the grace period.

## Observability

### Logging

Beego leveled logger:

```go
logs.Info("message")   // info
logs.Warning("message") // warning
logs.Error("message")   // error
logs.Debug("message")   // debug
```

- Env: `EXECD_LOG_FILE` writes execd logs to the given file path; when unset, logs are sent to stdout.

Log levels (0-7):

- 0: Emergency
- 1: Alert
- 2: Critical
- 3: Error
- 4: Warning
- 5: Notice
- 6: Info (default)
- 7: Debug

### Metrics

`/metrics` exposes:

- CPU usage percent
- Memory total/used (GB)
- Memory usage percent
- Process uptime
- Current timestamp

For real-time monitoring, use `/metrics/watch` (SSE, 1s cadence).

## Performance Benchmarks

### Typical latency (localhost)

| Operation           | Latency  |
|---------------------|----------|
| `/ping`             | < 1ms    |
| `/files/info`       | < 5ms    |
| Code execution (Py) | 50-200ms |
| File upload (1MB)   | 10-50ms  |
| Metrics snapshot    | < 10ms   |

### Resource usage (idle)

- Memory: ~50MB
- CPU: < 1%
- Goroutines: ~15

### Scalability

- 100+ concurrent SSE connections
- File operations scale linearly with file size
- Jupyter sessions are stateful and need dedicated resources

## Contributing

1. Fork the repository
2. Create a feature branch
3. Follow coding conventions (see DEVELOPMENT.md)
4. Add tests for new functionality
5. Run `make fmt` and `make test`
6. Submit a pull request

## License

`execd` is part of the OpenSandbox project. See [LICENSE](../../LICENSE) in the repository root.

## Support

- Issues: [GitHub Issues](https://github.com/alibaba/OpenSandbox/issues)
- Documentation: [OpenSandbox Docs](https://github.com/alibaba/OpenSandbox/wiki)
- Community: [Discussions](https://github.com/alibaba/OpenSandbox/discussions)


================================================
FILE: components/execd/README_zh.md
================================================
# execd - OpenSandbox 执行守护进程

中文 | [English](README.md)

`execd` 是 OpenSandbox 的执行守护进程，基于 Beego 框架提供全面的 HTTP API。它将外部请求转化为实际的运行时动作：管理 Jupyter
会话、以 SSE（Server-Sent Events）流式返回代码输出、执行 shell 命令、操作沙箱文件系统，并采集主机侧指标。

## 目录

- [概述](#概述)
- [核心特性](#核心特性)
- [架构设计](#架构设计)
- [快速开始](#快速开始)
- [配置说明](#配置说明)
- [API 参考](#api-参考)
- [支持的语言](#支持的语言)
- [开发指南](#开发指南)
- [测试](#测试)
- [可观测性](#可观测性)
- [许可证](#许可证)

## 概述

`execd` 作为 OpenSandbox 的运行时守护进程，提供统一的接口用于：

- **代码执行**：Python、Java、JavaScript、TypeScript、Go 和 Bash
- **会话管理**：带状态保持的长连接 Jupyter kernel 会话
- **命令执行**：同步执行和异步执行 shell 命令
- **文件操作**：完整的文件系统 CRUD，支持分块上传/下载
- **监控**：实时系统指标（CPU、内存、运行时间）

## 核心特性

### 统一运行时管理

- 将 REST 调用转化为由 `pkg/runtime` 控制器处理的运行时请求
- 支持多种执行后端：Jupyter、Shell、等等
- 自动语言检测和路由
- 可插拔 Jupyter server 配置

### Jupyter 集成

- 通过 `pkg/jupyter` 维护 kernel 会话
- 基于 WebSocket 的实时通信
- 通过 Server-Sent Events (SSE) 流式推送执行事件

### 命令执行器

- 前台、后台 shell 命令
- 通过进程组管理正确转发信号
- 实时 stdout/stderr 流式输出
- 支持上下文感知的中断

### 文件系统

- 围绕沙箱文件系统的 CRUD 辅助工具
- Glob 模式匹配文件搜索
- 支持断点续传的分块上传/下载
- 权限管理

### 可观测性

- 轻量级指标端点（CPU、内存、运行时间）
- 结构化流式日志
- 基于 SSE 的实时监控

## 架构设计

### 目录结构

| 路径                     | 说明                                         |
|------------------------|--------------------------------------------|
| `main.go`              | 程序入口，初始化 Beego、CLI 标志和路由                   |
| `pkg/flag/`            | 命令行与环境变量配置                                 |
| `pkg/web/`             | HTTP 层（控制器、模型、路由、SSE 辅助）                   |
| `pkg/web/controller/`  | 文件、代码、命令、指标的请求处理器                          |
| `pkg/web/model/`       | 请求/响应模型与 SSE 事件类型                          |
| `pkg/runtime/`         | 运行时控制器，调度到 Jupyter、Shell执行器                |
| `pkg/jupyter/`         | 精简 Jupyter 客户端（kernels/sessions/WebSocket） |
| `pkg/jupyter/execute/` | 执行结果类型与流解析器                                |
| `pkg/jupyter/session/` | 会话管理与生命周期                                  |
| `pkg/util/`            | 通用工具（安全 goroutine、glob 辅助）                 |
| `tests/`               | 测试脚本和工具                                    |

## 快速开始

### 环境要求

- **Go 1.24+**（在 `go.mod` 中定义）
- **Jupyter Server**（代码执行上下文所需）
- **Docker**（可选，用于容器化构建）
- **Make**（可选，用于便捷命令）

### 快速启动

#### 1. 克隆并构建

```bash
git clone git@github.com:alibaba/OpenSandbox.git
cd OpenSandbox/components/execd
go mod download
make build
```

#### 2. 启动 Jupyter Server

```bash
# 方式 1：使用提供的脚本
./tests/jupyter.sh

# 方式 2：手动启动
jupyter notebook --port=54321 --no-browser --ip=0.0.0.0 \
  --NotebookApp.token='your-jupyter-token'
```

#### 3. 运行 execd

```bash
./bin/execd \
  --jupyter-host=http://127.0.0.1:54321 \
  --jupyter-token=your-jupyter-token \
  --port=44772
```

#### 4. 验证安装

```bash
curl -v http://localhost:44772/ping
# 期望200状态码
```

### 镜像构建

```bash
docker build -t opensandbox/execd:dev .

# 运行容器
docker run -d \
  -p 44772:44772 \
  -e JUPYTER_HOST=http://jupyter-server \
  -e JUPYTER_TOKEN=your-token \
  --name execd \
  opensandbox/execd:dev
```

## 配置说明

### 命令行标志

| 标志                            | 类型       | 默认值     | 说明                                  |
|-------------------------------|----------|---------|-------------------------------------|
| `--jupyter-host`              | string   | `""`    | 后端 Jupyter server 地址，要求execd进程可访问即可 |
| `--jupyter-token`             | string   | `""`    | Jupyter HTTP/WebSocket 令牌           |
| `--port`                      | int      | `44772` | HTTP 监听端口                           |
| `--log-level`                 | int      | `6`     | Beego 日志级别（0=紧急，7=调试）               |
| `--access-token`              | string   | `""`    | API 共享密钥（可选）                        |
| `--graceful-shutdown-timeout` | duration | `3s`    | 关闭前等待 SSE 的时间                       |

### 环境变量

所有标志都可以通过环境变量设置：

```bash
export JUPYTER_HOST=http://127.0.0.1:8888
export JUPYTER_TOKEN=your-token
```

环境变量优先于默认值，但会被显式的 CLI 标志覆盖。

## API 参考

[API Spec](../../specs/execd-api.yaml)。

## 支持的语言

### 基于 Jupyter 的语言

| 语言         | Kernel      | 特性              |
|------------|-------------|-----------------|
| Python     | IPython     | 完整 Jupyter 协议支持 |
| Java       | IJava       | 基于 JShell 的执行   |
| JavaScript | IJavaScript | Node.js 运行时     |
| TypeScript | ITypeScript | TS 编译 + Node 执行 |
| Go         | gophernotes | Go 解释器          |
| Bash       | Bash kernel | Shell 脚本执行      |

### 原生执行器

| 模式/语言                | 后端      | 特性          |
|----------------------|---------|-------------|
| `command`            | OS exec | 同步 shell 命令 |
| `background-command` | OS exec | 分离的后台进程     |

## 开发指南

开发指南请参见 [DEVELOPMENT.md](./DEVELOPMENT.md)。

## 测试

### 单元测试

```bash
make test
```

### 集成测试

需要真实 Jupyter Server 的集成测试默认跳过：

```bash
export JUPYTER_URL=http://localhost:8888
export JUPYTER_TOKEN=your-token
go test -v ./pkg/jupyter/...
```

### 手动测试工作流

1. 启动 Jupyter：`./tests/jupyter.sh`
2. 启动 execd：`./bin/execd --jupyter-host=http://localhost:54321 --jupyter-token=opensandboxexecdlocaltest`

3. 执行代码：

```bash
curl -X POST -H "Content-Type: application/json" \
  -d '{"language":"python","code":"print(\"test\")"}' \
  http://localhost:44772/code
```

## 配置

### SSE API 优雅结束时间窗口

- 环境变量：`EXECD_API_GRACE_SHUTDOWN`（如 `500ms`、`2s`、`1m`）
- 命令行参数：`--graceful-shutdown-timeout`
- 默认值：`1s`

作用：控制 SSE 响应（代码/命令执行）在发送最后一块数据后，保持连接的宽限时间，方便客户端完全读到尾部输出再关闭。如果设置为 `0s` 则关闭这一等待。

## 可观测性

### 日志记录

全程使用 Beego 的分级日志器：

```go
logs.Info("message") // 常规信息
logs.Warning("message") // 警告条件
logs.Error("message")   // 错误条件
logs.Debug("message") // 调试级别消息
```

- 环境变量：`EXECD_LOG_FILE` 指定日志输出文件；未设置时日志输出到标准输出（stdout）。

日志级别（0-7）：

- 0：紧急
- 1：警报
- 2：严重
- 3：错误
- 4：警告
- 5：注意
- 6：信息（默认）
- 7：调试

### 指标采集

`/metrics` 端点提供：

- CPU 使用百分比
- 内存总量/已用（GB）
- 内存使用百分比
- 进程运行时间
- 当前时间戳

对于实时监控，使用 `/metrics/watch`，每秒通过 SSE 流式推送更新。

## 性能基准

### 典型延迟（localhost）

| 操作            | 延迟       |
|---------------|----------|
| `/ping`       | < 1ms    |
| `/files/info` | < 5ms    |
| 代码执行（Python）  | 50-200ms |
| 文件上传（1MB）     | 10-50ms  |
| 指标快照          | < 10ms   |

### 资源使用（空闲）

- 内存：~50MB
- CPU：< 1%
- Goroutines：~15

### 可扩展性

- 支持 100+ 并发 SSE 连接
- 文件操作随文件大小线性扩展
- Jupyter 会话是有状态的，需要专用资源

## 贡献

1. Fork 仓库
2. 创建特性分支
3. 遵循编码规范（见 DEVELOPMENT.md）
4. 为新功能添加测试
5. 运行 `make fmt` 和 `make test`
6. 提交 pull request

## 许可证

`execd` 是 OpenSandbox 项目的一部分。详见仓库根目录的 [LICENSE](../../LICENSE)。

## 支持

- 问题：[GitHub Issues](https://github.com/alibaba/OpenSandbox/issues)
- 文档：[OpenSandbox Docs](https://github.com/alibaba/OpenSandbox/wiki)
- 社区：[Discussions](https://github.com/alibaba/OpenSandbox/discussions)


================================================
FILE: components/execd/bootstrap.sh
================================================
#!/bin/sh

# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

EXECD="${EXECD:=/opt/opensandbox/execd}"

if [ -z "${EXECD_ENVS:-}" ]; then
	EXECD_ENVS="/opt/opensandbox/.env"
fi
# Best-effort ensure file exists.
if ! mkdir -p "$(dirname "$EXECD_ENVS")" 2>/dev/null; then
	echo "warning: failed to create dir for EXECD_ENVS=$EXECD_ENVS" >&2
fi
if ! touch "$EXECD_ENVS" 2>/dev/null; then
	echo "warning: failed to touch EXECD_ENVS=$EXECD_ENVS" >&2
fi
export EXECD_ENVS

echo "starting OpenSandbox Execd daemon at $EXECD."
$EXECD &

# Allow chained shell commands (e.g., /test1.sh && /test2.sh)
# Usage:
#   bootstrap.sh -c "/test1.sh && /test2.sh"
# Or set BOOTSTRAP_CMD="/test1.sh && /test2.sh"
CMD=""
if [ "${BOOTSTRAP_CMD:-}" != "" ]; then
	CMD="$BOOTSTRAP_CMD"
elif [ $# -ge 1 ] && [ "$1" = "-c" ]; then
	shift
	CMD="$*"
fi

SHELL_BIN="${BOOTSTRAP_SHELL:-}"
if [ -z "$SHELL_BIN" ]; then
	if command -v bash >/dev/null 2>&1; then
		SHELL_BIN="$(command -v bash)"
	elif command -v sh >/dev/null 2>&1; then
		SHELL_BIN="$(command -v sh)"
	else
		echo "error: neither bash nor sh found in PATH" >&2
		exit 1
	fi
fi

set -x
if [ "$CMD" != "" ]; then
	exec "$SHELL_BIN" -c "$CMD"
fi

if [ $# -eq 0 ]; then
	exec "$SHELL_BIN"
fi

exec "$@"


================================================
FILE: components/execd/build.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

TAG=${TAG:-latest}
VERSION=${VERSION:-$(git describe --tags --always --dirty 2>/dev/null || echo "dev")}
GIT_COMMIT=${GIT_COMMIT:-$(git rev-parse HEAD 2>/dev/null || echo "unknown")}
BUILD_TIME=${BUILD_TIME:-$(date -u +"%Y-%m-%dT%H:%M:%SZ")}

REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || realpath "$(dirname "$0")/../..")
cd "${REPO_ROOT}"

docker buildx rm execd-builder || true

docker buildx create --use --name execd-builder

docker buildx inspect --bootstrap

docker buildx ls

LATEST_TAGS=()
if [[ "${TAG}" == v* ]]; then
  LATEST_TAGS+=(-t opensandbox/execd:latest -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:latest)
fi

docker buildx build \
  -t opensandbox/execd:${TAG} \
  -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:${TAG} \
  "${LATEST_TAGS[@]}" \
  -f components/execd/Dockerfile \
  --build-arg VERSION="${VERSION}" \
  --build-arg GIT_COMMIT="${GIT_COMMIT}" \
  --build-arg BUILD_TIME="${BUILD_TIME}" \
  --platform linux/amd64,linux/arm64 \
  --push \
  .


================================================
FILE: components/execd/go.mod
================================================
module github.com/alibaba/opensandbox/execd

go 1.24.0

require (
	github.com/alibaba/opensandbox/internal v0.0.0
	github.com/bmatcuk/doublestar/v4 v4.9.1
	github.com/gin-gonic/gin v1.10.0
	github.com/go-playground/validator/v10 v10.28.0
	github.com/go-sql-driver/mysql v1.8.1
	github.com/google/uuid v1.6.0
	github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674
	github.com/shirou/gopsutil v3.21.11+incompatible
	github.com/stretchr/testify v1.10.0
	go.uber.org/automaxprocs v1.6.0
	k8s.io/apimachinery v0.34.2
	k8s.io/client-go v0.34.2
)

require (
	filippo.io/edwards25519 v1.1.1 // indirect
	github.com/bytedance/sonic v1.11.6 // indirect
	github.com/bytedance/sonic/loader v0.1.1 // indirect
	github.com/cloudwego/base64x v0.1.4 // indirect
	github.com/cloudwego/iasm v0.2.0 // indirect
	github.com/davecgh/go-spew v1.1.1 // indirect
	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
	github.com/gabriel-vasile/mimetype v1.4.10 // indirect
	github.com/gin-contrib/sse v0.1.0 // indirect
	github.com/go-logr/logr v1.4.2 // indirect
	github.com/go-ole/go-ole v1.2.6 // indirect
	github.com/go-playground/locales v0.14.1 // indirect
	github.com/go-playground/universal-translator v0.18.1 // indirect
	github.com/goccy/go-json v0.10.2 // indirect
	github.com/gogo/protobuf v1.3.2 // indirect
	github.com/json-iterator/go v1.1.12 // indirect
	github.com/klauspost/cpuid/v2 v2.2.7 // indirect
	github.com/kr/pretty v0.3.1 // indirect
	github.com/leodido/go-urn v1.4.0 // indirect
	github.com/mattn/go-isatty v0.0.20 // indirect
	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
	github.com/pmezard/go-difflib v1.0.0 // indirect
	github.com/tklauser/go-sysconf v0.3.16 // indirect
	github.com/tklauser/numcpus v0.11.0 // indirect
	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
	github.com/ugorji/go/codec v1.2.12 // indirect
	github.com/x448/float16 v0.8.4 // indirect
	github.com/yusufpapurcu/wmi v1.2.4 // indirect
	go.uber.org/multierr v1.10.0 // indirect
	go.uber.org/zap v1.27.0 // indirect
	go.yaml.in/yaml/v2 v2.4.2 // indirect
	golang.org/x/arch v0.8.0 // indirect
	golang.org/x/crypto v0.45.0 // indirect
	golang.org/x/net v0.47.0 // indirect
	golang.org/x/sys v0.38.0 // indirect
	golang.org/x/text v0.31.0 // indirect
	google.golang.org/protobuf v1.36.5 // indirect
	gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
	gopkg.in/inf.v0 v0.9.1 // indirect
	gopkg.in/yaml.v3 v3.0.1 // indirect
	k8s.io/klog/v2 v2.130.1 // indirect
	k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect
	sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
	sigs.k8s.io/randfill v1.0.0 // indirect
	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
)

replace github.com/alibaba/opensandbox/internal => ../internal


================================================
FILE: components/execd/go.sum
================================================
filippo.io/edwards25519 v1.1.1 h1:YpjwWWlNmGIDyXOn8zLzqiD+9TyIlPhGFG96P39uBpw=
filippo.io/edwards25519 v1.1.1/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
github.com/bmatcuk/doublestar/v4 v4.9.1 h1:X8jg9rRZmJd4yRy7ZeNDRnM+T3ZfHv15JiBJ/avrEXE=
github.com/bmatcuk/doublestar/v4 v4.9.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc=
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
github.com/gabriel-vasile/mimetype v1.4.10 h1:zyueNbySn/z8mJZHLt6IPw0KoZsiQNszIpU+bX4+ZK0=
github.com/gabriel-vasile/mimetype v1.4.10/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.28.0 h1:Q7ibns33JjyW48gHkuFT91qX48KG0ktULL6FgHdG688=
github.com/go-playground/validator/v10 v10.28.0/go.mod h1:GoI6I1SjPBh9p7ykNE/yj3fFYbyDOpwMn5KXd+m2hUU=
github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo=
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA=
github.com/tklauser/go-sysconf v0.3.16/go.mod h1:/qNL9xxDhc7tx3HSRsLWNnuzbVfh3e7gh/BmM179nYI=
github.com/tklauser/numcpus v0.11.0 h1:nSTwhKH5e1dMNsCdVBukSZrURJRoHbSEQjdEbY+9RXw=
github.com/tklauser/numcpus v0.11.0/go.mod h1:z+LwcLq54uWZTX0u/bGobaV34u6V7KNlTZejzM6/3MQ=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ=
go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
k8s.io/apimachinery v0.34.2 h1:zQ12Uk3eMHPxrsbUJgNF8bTauTVR2WgqJsTmwTE/NW4=
k8s.io/apimachinery v0.34.2/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
k8s.io/client-go v0.34.2 h1:Co6XiknN+uUZqiddlfAjT68184/37PS4QAzYvQvDR8M=
k8s.io/client-go v0.34.2/go.mod h1:2VYDl1XXJsdcAxw7BenFslRQX28Dxz91U9MWKjX97fE=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y=
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=
sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE=
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=


================================================
FILE: components/execd/main.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"fmt"
	"os"

	"github.com/alibaba/opensandbox/internal/version"

	_ "go.uber.org/automaxprocs/maxprocs"

	"github.com/alibaba/opensandbox/execd/pkg/flag"
	"github.com/alibaba/opensandbox/execd/pkg/log"
	_ "github.com/alibaba/opensandbox/execd/pkg/util/safego"
	"github.com/alibaba/opensandbox/execd/pkg/web"
	"github.com/alibaba/opensandbox/execd/pkg/web/controller"
)

// main initializes and starts the execd server.
func main() {
	version.EchoVersion("OpenSandbox Execd")

	flag.InitFlags()

	log.Init(flag.ServerLogLevel)

	controller.InitCodeRunner()
	engine := web.NewRouter(flag.ServerAccessToken)
	addr := fmt.Sprintf(":%d", flag.ServerPort)
	log.Info("execd listening on %s", addr)
	if err := engine.Run(addr); err != nil {
		log.Error("failed to start execd server: %v", err)
		os.Exit(1)
	}
}


================================================
FILE: components/execd/pkg/flag/flags.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package flag

import "time"

var (
	// JupyterServerHost points to the target Jupyter instance.
	JupyterServerHost string

	// JupyterServerToken authenticates requests to the Jupyter server.
	JupyterServerToken string

	// ServerPort controls the HTTP listener port.
	ServerPort int

	// ServerLogLevel controls the server log verbosity.
	ServerLogLevel int

	// ServerAccessToken guards API entrypoints when set.
	ServerAccessToken string

	// ApiGracefulShutdownTimeout waits before tearing down SSE streams.
	ApiGracefulShutdownTimeout time.Duration
)


================================================
FILE: components/execd/pkg/flag/parser.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package flag

import (
	"flag"
	stdlog "log"
	"os"
	"strings"
	"time"

	"github.com/alibaba/opensandbox/execd/pkg/log"
)

const (
	jupyterHostEnv             = "JUPYTER_HOST"
	jupyterTokenEnv            = "JUPYTER_TOKEN"
	gracefulShutdownTimeoutEnv = "EXECD_API_GRACE_SHUTDOWN"
)

// InitFlags registers CLI flags and env overrides.
func InitFlags() {
	// Set default values
	ServerPort = 44772
	ServerLogLevel = 6
	ServerAccessToken = ""
	ApiGracefulShutdownTimeout = time.Second * 1

	// First, set default values from environment variables
	if jupyterFromEnv := os.Getenv(jupyterHostEnv); jupyterFromEnv != "" {
		if !strings.HasPrefix(jupyterFromEnv, "http://") && !strings.HasPrefix(jupyterFromEnv, "https://") {
			stdlog.Panic("Invalid JUPYTER_HOST format: must start with http:// or https://")
		}
		JupyterServerHost = jupyterFromEnv
	}

	if jupyterTokenFromEnv := os.Getenv(jupyterTokenEnv); jupyterTokenFromEnv != "" {
		JupyterServerToken = jupyterTokenFromEnv
	}

	// Then define flags with current values as defaults
	flag.StringVar(&JupyterServerHost, "jupyter-host", JupyterServerHost, "Jupyter server host address (e.g., http://localhost, http://192.168.1.100)")
	flag.StringVar(&JupyterServerToken, "jupyter-token", JupyterServerToken, "Jupyter server authentication token")
	flag.IntVar(&ServerPort, "port", ServerPort, "Server listening port (default: 44772)")
	flag.IntVar(&ServerLogLevel, "log-level", ServerLogLevel, "Server log level (0=LevelEmergency, 1=LevelAlert, 2=LevelCritical, 3=LevelError, 4=LevelWarning, 5=LevelNotice, 6=LevelInformational, 7=LevelDebug, default: 6)")
	flag.StringVar(&ServerAccessToken, "access-token", ServerAccessToken, "Server access token for API authentication")

	if graceShutdownTimeout := os.Getenv(gracefulShutdownTimeoutEnv); graceShutdownTimeout != "" {
		duration, err := time.ParseDuration(graceShutdownTimeout)
		if err != nil {
			stdlog.Panicf("Failed to parse graceful shutdown timeout from env: %v", err)
		}
		ApiGracefulShutdownTimeout = duration
	}

	flag.DurationVar(&ApiGracefulShutdownTimeout, "graceful-shutdown-timeout", ApiGracefulShutdownTimeout, "API graceful shutdown timeout duration (default: 3s)")

	// Parse flags - these will override environment variables if provided
	flag.Parse()

	// Log final values
	log.Info("Jupyter server host is: %s", JupyterServerHost)
	log.Info("Jupyter server token is: %s", JupyterServerToken)
}


================================================
FILE: components/execd/pkg/jupyter/auth/auth.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package auth

import (
	"fmt"
	"net/url"
)

// Auth represents authentication configuration.
type Auth struct {
	Token    string
	Username string
	Password string
}

// NewTokenAuth builds a token-based config.
func NewTokenAuth(token string) *Auth {
	return &Auth{
		Token: token,
	}
}

// NewBasicAuth builds a basic-auth config.
func NewBasicAuth(username, password string) *Auth {
	return &Auth{
		Username: username,
		Password: password,
	}
}

// Validate reports which auth mode is configured.
func (a *Auth) Validate() string {
	if a.Token != "" {
		return "token"
	}
	if a.Username != "" {
		return "basic"
	}
	return "none"
}

// AddAuthToURL appends token query parameters to the URL.
func (a *Auth) AddAuthToURL(baseURL string) (string, error) {
	parsedURL, err := url.Parse(baseURL)
	if err != nil {
		return "", fmt.Errorf("failed to parse URL: %w", err)
	}

	query := parsedURL.Query()

	if a.Token != "" {
		query.Set("token", a.Token)
	}

	parsedURL.RawQuery = query.Encode()
	return parsedURL.String(), nil
}


================================================
FILE: components/execd/pkg/jupyter/auth/auth_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package auth

import (
	"net/http"
	"net/http/httptest"
	"testing"
)

func TestTokenAuthentication(t *testing.T) {
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		token := r.Header.Get("Authorization")
		expectedToken := "token test-token"
		if token != expectedToken {
			w.WriteHeader(http.StatusUnauthorized)
			return
		}
		w.WriteHeader(http.StatusOK)
	}))
	defer server.Close()

	auth := NewAuth()
	auth.Token = "test-token"

	client := NewClient(&http.Client{}, auth)

	req, err := http.NewRequest("GET", server.URL, nil)
	if err != nil {
		t.Fatalf("Failed to create request: %v", err)
	}

	resp, err := client.Do(req)
	if err != nil {
		t.Fatalf("Failed to send request: %v", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		t.Errorf("Expected status code %d, got %d", http.StatusOK, resp.StatusCode)
	}
}

func TestBasicAuthentication(t *testing.T) {
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		username, password, ok := r.BasicAuth()
		if !ok || username != "testuser" || password != "testpass" {
			w.WriteHeader(http.StatusUnauthorized)
			return
		}
		w.WriteHeader(http.StatusOK)
	}))
	defer server.Close()

	auth := NewAuth()
	auth.Username = "testuser"
	auth.Password = "testpass"

	client := NewClient(&http.Client{}, auth)

	req, err := http.NewRequest("GET", server.URL, nil)
	if err != nil {
		t.Fatalf("Failed to create request: %v", err)
	}

	resp, err := client.Do(req)
	if err != nil {
		t.Fatalf("Failed to send request: %v", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		t.Errorf("Expected status code %d, got %d", http.StatusOK, resp.StatusCode)
	}
}

func TestAuthValidation(t *testing.T) {
	emptyAuth := NewAuth()
	if emptyAuth.IsValid() {
		t.Error("Empty Auth should be invalid, but was determined to be valid")
	}

	tokenAuth := NewAuth()
	tokenAuth.Token = "test-token"
	if !tokenAuth.IsValid() {
		t.Error("Auth with token should be valid, but was determined to be invalid")
	}

	basicAuth := NewAuth()
	basicAuth.Username = "testuser"
	basicAuth.Password = "testpass"
	if !basicAuth.IsValid() {
		t.Error("Auth with Basic Auth should be valid, but was determined to be invalid")
	}

	invalidBasicAuth := NewAuth()
	invalidBasicAuth.Username = "testuser"
	if invalidBasicAuth.IsValid() {
		t.Error("Auth with only username and no password should be invalid, but was determined to be valid")
	}

	mixedAuth := NewAuth()
	mixedAuth.Token = "test-token"
	mixedAuth.Username = "testuser"
	mixedAuth.Password = "testpass"
	if !mixedAuth.IsValid() {
		t.Error("Auth with both token and Basic Auth should be valid, but was determined to be invalid")
	}
}


================================================
FILE: components/execd/pkg/jupyter/auth/client.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package auth

import (
	"fmt"
	"io"
	"net/http"
)

// Client wraps http.Client and injects auth headers.
type Client struct {
	httpClient *http.Client
	auth       *Auth
}

// NewClient creates a new authenticated HTTP client.
func NewClient(httpClient *http.Client, auth *Auth) *Client {
	return &Client{
		httpClient: httpClient,
		auth:       auth,
	}
}

// Do sends an HTTP request and automatically adds authentication data.
func (c *Client) Do(req *http.Request) (*http.Response, error) {
	if c.auth == nil {
		return c.httpClient.Do(req)
	}

	if c.auth.Token != "" {
		req.Header.Set("Authorization", fmt.Sprintf("token %s", c.auth.Token))
	} else if c.auth.Username != "" {
		req.SetBasicAuth(c.auth.Username, c.auth.Password)
	}

	return c.httpClient.Do(req)
}

// Get sends a GET request.
func (c *Client) Get(url string) (*http.Response, error) {
	req, err := http.NewRequest(http.MethodGet, url, nil)
	if err != nil {
		return nil, err
	}
	return c.Do(req)
}

// Post sends a POST request.
func (c *Client) Post(url, contentType string, body io.Reader) (*http.Response, error) {
	req, err := http.NewRequest(http.MethodPost, url, body)
	if err != nil {
		return nil, err
	}
	req.Header.Set("Content-Type", contentType)
	return c.Do(req)
}

// Put sends a PUT request.
func (c *Client) Put(url, contentType string, body io.Reader) (*http.Response, error) {
	req, err := http.NewRequest(http.MethodPut, url, body)
	if err != nil {
		return nil, err
	}
	req.Header.Set("Content-Type", contentType)
	return c.Do(req)
}

// Delete sends a DELETE request.
func (c *Client) Delete(url string) (*http.Response, error) {
	req, err := http.NewRequest(http.MethodDelete, url, nil)
	if err != nil {
		return nil, err
	}
	return c.Do(req)
}


================================================
FILE: components/execd/pkg/jupyter/auth/types.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package auth

const (
	AuthTypeNone          = "none"
	AuthTypeToken         = "token"
	AuthTypeBasic         = "basic"
	AuthHeaderKey         = "Authorization"
	AuthHeaderValuePrefix = "token "
	AuthURLParamKey       = "token"
)

// NewAuth creates an empty authentication configuration.
func NewAuth() *Auth {
	return &Auth{}
}

// IsValid reports whether token or username/password are present.
func (a *Auth) IsValid() bool {
	return a.Token != "" || (a.Username != "" && a.Password != "")
}

// GetAuthType returns token/basic/none.
func (a *Auth) GetAuthType() string {
	return a.Validate()
}


================================================
FILE: components/execd/pkg/jupyter/client.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package jupyter

import (
	"errors"
	"fmt"
	"net/http"
	"net/url"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/auth"
	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
	"github.com/alibaba/opensandbox/execd/pkg/jupyter/kernel"
	"github.com/alibaba/opensandbox/execd/pkg/jupyter/session"
)

// Client interacts with the Jupyter server.
type Client struct {
	BaseURL       string
	httpClient    *http.Client
	Auth          *auth.Auth
	kernelClient  *kernel.Client
	sessionClient *session.Client
	executeClient *execute.Client
	authClient    *auth.Client
}

type ClientOption func(*Client)

// WithHTTPClient sets a custom HTTP client.
func WithHTTPClient(client *http.Client) ClientOption {
	return func(c *Client) {
		c.httpClient = client
	}
}

// WithToken configures the client with an authentication token.
func WithToken(token string) ClientOption {
	return func(c *Client) {
		c.Auth.Token = token
	}
}

// WithBasicAuth configures the client with basic authentication.
func WithBasicAuth(username, password string) ClientOption {
	return func(c *Client) {
		c.Auth.Username = username
		c.Auth.Password = password
	}
}

// NewClient creates a new Jupyter client instance.
func NewClient(baseURL string, options ...ClientOption) *Client {
	client := &Client{
		BaseURL:    baseURL,
		httpClient: http.DefaultClient,
		Auth:       auth.NewAuth(),
	}

	for _, option := range options {
		option(client)
	}

	client.authClient = auth.NewClient(client.httpClient, client.Auth)

	client.kernelClient = kernel.NewClient(baseURL, client.httpClient)
	client.sessionClient = session.NewClient(baseURL, client.httpClient)
	client.executeClient = execute.NewClient(baseURL, client.authClient)

	return client
}

// SetToken configures token authentication.
func (c *Client) SetToken(token string) {
	c.Auth.Token = token
}

// SetBasicAuth configures username/password authentication.
func (c *Client) SetBasicAuth(username, password string) {
	c.Auth.Username = username
	c.Auth.Password = password
}

// ValidateAuth quickly checks that some auth data is present.
func (c *Client) ValidateAuth() (string, error) {
	authType := c.Auth.Validate()
	if authType == "none" {
		return "error", errors.New("no valid authentication information provided")
	}
	return "ok", nil
}

// GetKernelSpecs retrieves available kernel specifications.
func (c *Client) GetKernelSpecs() (*kernel.KernelSpecs, error) {
	return c.kernelClient.GetKernelSpecs()
}

// ListKernels retrieves all running kernels.
func (c *Client) ListKernels() ([]*kernel.Kernel, error) {
	return c.kernelClient.ListKernels()
}

// GetKernel retrieves information about a specific kernel.
func (c *Client) GetKernel(kernelId string) (*kernel.Kernel, error) {
	return c.kernelClient.GetKernel(kernelId)
}

// StartKernel starts a new kernel.
func (c *Client) StartKernel(name string) (*kernel.Kernel, error) {
	return c.kernelClient.StartKernel(name)
}

// RestartKernel restarts the specified kernel.
func (c *Client) RestartKernel(kernelId string) (bool, error) {
	return c.kernelClient.RestartKernel(kernelId)
}

// InterruptKernel interrupts the specified kernel.
func (c *Client) InterruptKernel(kernelId string) error {
	return c.kernelClient.InterruptKernel(kernelId)
}

// ShutdownKernel shuts down (and optionally restarts) the specified kernel.
func (c *Client) ShutdownKernel(kernelId string, restart bool) error {
	return c.kernelClient.ShutdownKernel(kernelId, restart)
}

// ListSessions retrieves active sessions.
func (c *Client) ListSessions() ([]*session.Session, error) {
	return c.sessionClient.ListSessions()
}

// GetSession retrieves information about a specific session.
func (c *Client) GetSession(sessionId string) (*session.Session, error) {
	return c.sessionClient.GetSession(sessionId)
}

// CreateSession creates a new session.
func (c *Client) CreateSession(name, ipynb, kernel string) (*session.Session, error) {
	return c.sessionClient.CreateSession(name, ipynb, kernel)
}

// ModifySession updates an existing session.
func (c *Client) ModifySession(sessionId, name, path, kernel string) (*session.Session, error) {
	return c.sessionClient.ModifySession(sessionId, name, path, kernel)
}

// DeleteSession deletes the specified session.
func (c *Client) DeleteSession(sessionId string) error {
	return c.sessionClient.DeleteSession(sessionId)
}

// ConnectToKernel establishes a websocket connection to the kernel.
func (c *Client) ConnectToKernel(kernelId string) error {
	parsedURL, err := url.Parse(c.BaseURL)
	if err != nil {
		return fmt.Errorf("invalid base URL: %w", err)
	}

	scheme := "ws"
	if parsedURL.Scheme == "https" {
		scheme = "wss"
	}

	wsURL := fmt.Sprintf("%s://%s/api/kernels/%s/channels", scheme, parsedURL.Host, kernelId)

	if c.Auth.Token != "" {
		wsURL = fmt.Sprintf("%s?token=%s", wsURL, c.Auth.Token)
	}

	return c.executeClient.Connect(wsURL)
}

// DisconnectFromKernel closes the websocket connection.
func (c *Client) DisconnectFromKernel(kernelId string) {
	c.executeClient.Disconnect()
}

// ExecuteCodeStream streams execution results into resultChan.
func (c *Client) ExecuteCodeStream(kernelId, code string, resultChan chan *execute.ExecutionResult) error {
	return c.executeClient.ExecuteCodeStream(code, resultChan)
}

// ExecuteCodeWithCallback processes execution events via callbacks.
func (c *Client) ExecuteCodeWithCallback(code string, handler execute.CallbackHandler) error {
	return c.executeClient.ExecuteCodeWithCallback(code, handler)
}


================================================
FILE: components/execd/pkg/jupyter/debug_integration_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package jupyter

import (
	"fmt"
	"net/http"
	"net/http/httputil"
	"testing"
)

// TestDebugServerIntegration logs real server interactions for debugging.
func TestDebugServerIntegration(t *testing.T) {
	jupyterURL := getEnv("JUPYTER_URL", "")
	jupyterToken := getEnv("JUPYTER_TOKEN", "")
	if jupyterURL == "" || jupyterToken == "" {
		t.Skip("JUPYTER_URL and JUPYTER_TOKEN environment variables must be set to run this test")
	}

	t.Logf("Connecting to Jupyter server: %s", jupyterURL)

	httpClient := &http.Client{
		Transport: &debugTransport{t: t},
	}

	client := NewClient(jupyterURL,
		WithToken(jupyterToken),
		WithHTTPClient(httpClient))

	t.Run("Validate Authentication", func(t *testing.T) {
		t.Logf("Calling ValidateAuth...")
		status, err := client.ValidateAuth()
		if err != nil {
			t.Fatalf("Authentication validation failed: %v", err)
		}
		t.Logf("Authentication validation successful! Status: %s", status)
	})

	t.Run("Get API Information", func(t *testing.T) {
		req, err := http.NewRequest("GET", fmt.Sprintf("%s/api", jupyterURL), nil)
		if err != nil {
			t.Fatalf("Failed to create request: %v", err)
		}
		req.Header.Set("Authorization", fmt.Sprintf("Token %s", jupyterToken))

		t.Logf("Sending request to /api endpoint...")
		resp, err := httpClient.Do(req)
		if err != nil {
			t.Fatalf("Failed to send request: %v", err)
		}
		defer resp.Body.Close()

		if resp.StatusCode != http.StatusOK {
			t.Logf("API request returned non-200 status code: %d %s", resp.StatusCode, resp.Status)
		} else {
			t.Logf("API request successful, status code: %d %s", resp.StatusCode, resp.Status)

			respDump, err := httputil.DumpResponse(resp, true)
			if err != nil {
				t.Logf("Unable to dump response: %v", err)
			} else {
				t.Logf("Response details:\n%s", string(respDump))
			}
		}
	})

	t.Run("Test Different Header Combinations", func(t *testing.T) {
		headerSets := []map[string]string{
			{
				"Authorization": fmt.Sprintf("Token %s", jupyterToken),
			},
			{
				"Authorization": fmt.Sprintf("Token %s", jupyterToken),
				"X-XSRFToken":   jupyterToken[:16], // Use first 16 characters of token as XSRF token attempt
			},
			{
				"Authorization": fmt.Sprintf("token %s", jupyterToken), // lowercase token
			},
			{
				"Cookie": fmt.Sprintf("_xsrf=%s; jupyter_token=%s", jupyterToken[:16], jupyterToken),
			},
		}

		for i, headers := range headerSets {
			t.Logf("Testing header combination #%d:", i+1)
			for k, v := range headers {
				t.Logf("  %s: %s", k, v)
			}

			req, err := http.NewRequest("GET", fmt.Sprintf("%s/api/kernelspecs", jupyterURL), nil)
			if err != nil {
				t.Fatalf("Failed to create request: %v", err)
			}

			for k, v := range headers {
				req.Header.Set(k, v)
			}

			t.Logf("Sending request to /api/kernelspecs endpoint...")
			resp, err := httpClient.Do(req)
			if err != nil {
				t.Fatalf("Failed to send request: %v", err)
			}
			defer resp.Body.Close()

			t.Logf("Response status code: %d %s", resp.StatusCode, resp.Status)
			if resp.StatusCode == http.StatusOK {
				t.Logf("Successfully found valid header combination!")

				respDump, err := httputil.DumpResponse(resp, true)
				if err != nil {
					t.Logf("Unable to dump response: %v", err)
				} else {
					maxLen := 500
					respStr := string(respDump)
					if len(respStr) > maxLen {
						t.Logf("Response (truncated):\n%s...", respStr[:maxLen])
					} else {
						t.Logf("Response:\n%s", respStr)
					}
				}
			}
		}
	})
}

// debugTransport logs request and response dumps.
type debugTransport struct {
	t *testing.T
}

func (d *debugTransport) RoundTrip(req *http.Request) (*http.Response, error) {
	reqDump, err := httputil.DumpRequestOut(req, true)
	if err != nil {
		d.t.Logf("Unable to dump request: %v", err)
	} else {
		maxLen := 500
		reqStr := string(reqDump)
		if len(reqStr) > maxLen {
			d.t.Logf("Request (truncated):\n%s...", reqStr[:maxLen])
		} else {
			d.t.Logf("Request:\n%s", reqStr)
		}
	}

	resp, err := http.DefaultTransport.RoundTrip(req)
	if err != nil {
		return nil, err
	}

	d.t.Logf("Response status: %d %s", resp.StatusCode, resp.Status)

	return resp, nil
}


================================================
FILE: components/execd/pkg/jupyter/execute/events.json
================================================
[
  {
    "header": {
      "msg_id": "e5e24851-db96ed91126b13f9b603136f_123284_6",
      "username": "username",
      "session": "e5e24851-db96ed91126b13f9b603136f",
      "date": "2025-06-06T09:20:51.206377Z",
      "msg_type": "status",
      "version": "5.3"
    },
    "parent_header": {
      "msg_id": "e1df6eb2-f395e4906c9cecd23d97b548_7_2",
      "username": "username",
      "session": "e1df6eb2-f395e4906c9cecd23d97b548",
      "date": "2025-06-06T09:20:51.204953Z",
      "msg_type": "kernel_info_request",
      "version": "5.3"
    },
    "metadata": {},
    "content": {
      "execution_state": "busy"
    },
    "buffers": [],
    "channel": "iopub"
  },
  {
    "header": {
      "msg_id": "e5e24851-db96ed91126b13f9b603136f_123284_8",
      "username": "username",
      "session": "e5e24851-db96ed91126b13f9b603136f",
      "date": "2025-06-06T09:20:51.207083Z",
      "msg_type": "status",
      "version": "5.3"
    },
    "parent_header": {
      "msg_id": "e1df6eb2-f395e4906c9cecd23d97b548_7_1",
      "username": "username",
      "session": "e1df6eb2-f395e4906c9cecd23d97b548",
      "date": "2025-06-06T09:20:51.204866Z",
      "msg_type": "kernel_info_request",
      "version": "5.3"
    },
    "metadata": {},
    "content": {
      "execution_state": "idle"
    },
    "buffers": [],
    "channel": "iopub"
  },
  {
    "header": {
      "msg_id": "e5e24851-db96ed91126b13f9b603136f_123284_9",
      "username": "username",
      "session": "e5e24851-db96ed91126b13f9b603136f",
      "date": "2025-06-06T09:20:51.207169Z",
      "msg_type": "status",
      "version": "5.3"
    },
    "parent_header": {
      "msg_id": "e1df6eb2-f395e4906c9cecd23d97b548_7_2",
      "username": "username",
      "session": "e1df6eb2-f395e4906c9cecd23d97b548",
      "date": "2025-06-06T09:20:51.204953Z",
      "msg_type": "kernel_info_request",
      "version": "5.3"
    },
    "metadata": {},
    "content": {
      "execution_state": "idle"
    },
    "buffers": [],
    "channel": "iopub"
  },
  {
    "header": {
      "msg_id": "e5e24851-db96ed91126b13f9b603136f_123284_10",
      "username": "username",
      "session": "e5e24851-db96ed91126b13f9b603136f",
      "date": "2025-06-06T09:20:51.248234Z",
      "msg_type": "status",
      "version": "5.3"
    },
    "parent_header": {
      "msg_id": "e8e7f0af-fdd9-4ea9-8d78-eab629b5c0f0-1",
      "username": "go-client",
      "session": "e8e7f0af-fdd9-4ea9-8d78-eab629b5c0f0",
      "date": "2025-06-06T17:20:51+08:00",
      "msg_type": "execute_request",
      "version": "5.3"
    },
    "metadata": {},
    "content": {
      "execution_state": "busy"
    },
    "buffers": [],
    "channel": "iopub"
  },
  {
    "header": {
      "msg_id": "e5e24851-db96ed91126b13f9b603136f_123284_11",
      "username": "username",
      "session": "e5e24851-db96ed91126b13f9b603136f",
      "date": "2025-06-06T09:20:51.248481Z",
      "msg_type": "execute_input",
      "version": "5.3"
    },
    "parent_header": {
      "msg_id": "e8e7f0af-fdd9-4ea9-8d78-eab629b5c0f0-1",
      "username": "go-client",
      "session": "e8e7f0af-fdd9-4ea9-8d78-eab629b5c0f0",
      "date": "2025-06-06T17:20:51+08:00",
      "msg_type": "execute_request",
      "version": "5.3"
    },
    "metadata": {},
    "content": {
      "code": "print('Hello, Jupyter!')\nresult = 2 + 2\nresult",
      "execution_count": 1
    },
    "buffers": [],
    "channel": "iopub"
  },
  {
    "header": {
      "msg_id": "e5e24851-db96ed91126b13f9b603136f_123284_13",
      "username": "username",
      "session": "e5e24851-db96ed91126b13f9b603136f",
      "date": "2025-06-06T09:20:51.253641Z",
      "msg_type": "stream",
      "version": "5.3"
    },
    "parent_header": {
      "msg_id": "e8e7f0af-fdd9-4ea9-8d78-eab629b5c0f0-1",
      "username": "go-client",
      "session": "e8e7f0af-fdd9-4ea9-8d78-eab629b5c0f0",
      "date": "2025-06-06T17:20:51+08:00",
      "msg_type": "execute_request",
      "version": "5.3"
    },
    "metadata": {},
    "content": {
      "name": "stdout",
      "text": "Hello, Jupyter!\n"
    },
    "buffers": [],
    "channel": "iopub"
  },
  {
    "header": {
      "msg_id": "e5e24851-db96ed91126b13f9b603136f_123284_12",
      "username": "username",
      "session": "e5e24851-db96ed91126b13f9b603136f",
      "date": "2025-06-06T09:20:51.251743Z",
      "msg_type": "execute_result",
      "version": "5.3"
    },
    "parent_header": {
      "msg_id": "e8e7f0af-fdd9-4ea9-8d78-eab629b5c0f0-1",
      "username": "go-client",
      "session": "e8e7f0af-fdd9-4ea9-8d78-eab629b5c0f0",
      "date": "2025-06-06T17:20:51+08:00",
      "msg_type": "execute_request",
      "version": "5.3"
    },
    "metadata": {},
    "content": {
      "data": {
        "text/plain": "4"
      },
      "metadata": {},
      "execution_count": 1
    },
    "buffers": [],
    "channel": "iopub"
  },
  {
    "header": {
      "msg_id": "e5e24851-db96ed91126b13f9b603136f_123284_14",
      "username": "username",
      "session": "e5e24851-db96ed91126b13f9b603136f",
      "date": "2025-06-06T09:20:51.255042Z",
      "msg_type": "execute_reply",
      "version": "5.3"
    },
    "parent_header": {
      "msg_id": "e8e7f0af-fdd9-4ea9-8d78-eab629b5c0f0-1",
      "username": "go-client",
      "session": "e8e7f0af-fdd9-4ea9-8d78-eab629b5c0f0",
      "date": "2025-06-06T17:20:51+08:00",
      "msg_type": "execute_request",
      "version": "5.3"
    },
    "metadata": {
      "dependencies_met": true,
      "engine": "d82231bb-94b0-4296-8372-2913351ee2a1",
      "started": "2025-06-06T09:20:51.248468Z",
      "status": "ok"
    },
    "content": {
      "status": "ok",
      "execution_count": 1,
      "user_expressions": {},
      "payload": []
    },
    "buffers": [],
    "channel": "shell"
  },
  {
    "header": {
      "msg_id": "e5e24851-db96ed91126b13f9b603136f_123284_15",
      "username": "username",
      "session": "e5e24851-db96ed91126b13f9b603136f",
      "date": "2025-06-06T09:20:51.255385Z",
      "msg_type": "status",
      "version": "5.3"
    },
    "parent_header": {
      "msg_id": "e8e7f0af-fdd9-4ea9-8d78-eab629b5c0f0-1",
      "username": "go-client",
      "session": "e8e7f0af-fdd9-4ea9-8d78-eab629b5c0f0",
      "date": "2025-06-06T17:20:51+08:00",
      "msg_type": "execute_request",
      "version": "5.3"
    },
    "metadata": {},
    "content": {
      "execution_state": "idle"
    },
    "buffers": [],
    "channel": "iopub"
  }
]


================================================
FILE: components/execd/pkg/jupyter/execute/execute.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package execute provides functionality for executing Jupyter kernel code via WebSocket
package execute

import (
	"encoding/json"
	"errors"
	"fmt"
	"net/http"
	"sync"
	"time"

	"github.com/google/uuid"
	"github.com/gorilla/websocket"
)

// HTTPClient defines the HTTP client interface
type HTTPClient interface {
	Do(req *http.Request) (*http.Response, error)
}

// Client is the client for code execution
type Client struct {
	// Internal HTTP client for sending HTTP requests
	httpClient HTTPClient

	// WebSocket connection
	conn *websocket.Conn

	// Message handler mappings
	handlers map[MessageType]func(*Message)

	// Session ID
	session string

	// Message ID counter
	msgCounter int

	// Mutex for protecting concurrent access
	mu sync.Mutex

	// WebSocket URL for kernel connection
	wsURL string
}

// NewClient creates a new code execution client
func NewClient(baseURL string, httpClient HTTPClient) *Client {
	return &Client{
		httpClient: httpClient,
		handlers:   make(map[MessageType]func(*Message)),
		session:    uuid.New().String(),
		msgCounter: 0,
	}
}

// Connect connects to the WebSocket of the specified kernel
func (c *Client) Connect(wsURL string) error {
	c.mu.Lock()
	defer c.mu.Unlock()

	// Save WebSocket URL
	c.wsURL = wsURL

	// Connect to WebSocket
	conn, resp, err := websocket.DefaultDialer.Dial(wsURL, nil)
	if resp != nil && err != nil {
		resp.Body.Close()
	}
	if err != nil {
		return fmt.Errorf("failed to connect to kernel: %w", err)
	}
	c.conn = conn

	// Register default message handlers
	c.registerDefaultHandlers()

	// Start message receiving goroutine
	go c.receiveMessages()

	return nil
}

// Disconnect disconnects the WebSocket connection to the kernel
func (c *Client) Disconnect() {
	c.mu.Lock()
	defer c.mu.Unlock()

	if c.conn != nil {
		c.conn.Close()
		c.conn = nil
	}
}

// IsConnected checks if connected to the kernel
func (c *Client) IsConnected() bool {
	c.mu.Lock()
	defer c.mu.Unlock()
	return c.conn != nil
}

// ExecuteCodeStream executes code in streaming mode, sending results to the provided channel
func (c *Client) ExecuteCodeStream(code string, resultChan chan *ExecutionResult) error {
	if !c.IsConnected() {
		return errors.New("not connected to kernel, please call Connect method")
	}

	// record start time
	startTime := time.Now()

	// prepare execution request
	msgID := c.nextMessageID()
	request := &ExecuteRequest{
		Code:            code,
		Silent:          false,
		StoreHistory:    true,
		UserExpressions: make(map[string]string),
		AllowStdin:      false,
		StopOnError:     true,
	}

	// serialize request content
	content, err := json.Marshal(request)
	if err != nil {
		return fmt.Errorf("failed to serialize request: %w", err)
	}

	// create message
	msg := &Message{
		Header: Header{
			MessageID:   msgID,
			Username:    "go-client",
			Session:     c.session,
			Date:        time.Now().Format(time.RFC3339),
			MessageType: string(MsgExecuteRequest),
			Version:     "5.3",
		},
		ParentHeader: Header{},
		Metadata:     make(map[string]interface{}),
		Content:      content,
		Channel:      "shell",
	}

	// Create result object
	result := &ExecutionResult{
		Status:        "ok",
		Stream:        make([]*StreamOutput, 0),
		ExecutionTime: 0,
	}

	// Register temporary handler to receive execution result
	var executeDone bool
	var executeMutex sync.Mutex
	var executeResult *ExecuteResult

	// Create mutex to protect result object
	var resultMutex sync.Mutex

	// Clear temporary handlers
	c.clearTemporaryHandlers()

	c.registerHandler(MsgExecuteReply, func(msg *Message) {
		var execReply ExecuteReply
		if err := json.Unmarshal(msg.Content, &execReply); err != nil {
			return
		}

		resultMutex.Lock()
		result.ExecutionCount = execReply.ExecutionCount
		if execReply.EName != "" {
			result.Error = &execReply.ErrorOutput
		}
		resultMutex.Unlock()
	})

	// register execution result handler
	c.registerHandler(MsgExecuteResult, func(msg *Message) {
		var execResult ExecuteResult
		if err := json.Unmarshal(msg.Content, &execResult); err != nil {
			return
		}

		executeMutex.Lock()
		executeResult = &execResult
		executeMutex.Unlock()

		resultMutex.Lock()
		result.ExecutionCount = execResult.ExecutionCount

		notify := &ExecutionResult{}
		notify.ExecutionCount = executeResult.ExecutionCount
		notify.ExecutionData = executeResult.Data

		resultChan <- notify
		resultMutex.Unlock()
	})

	// Register stream output handler
	c.registerHandler(MsgStream, func(msg *Message) {
		var stream StreamOutput
		if err := json.Unmarshal(msg.Content, &stream); err != nil {
			return
		}

		resultMutex.Lock()
		result.Stream = append(result.Stream, &stream)

		notify := &ExecutionResult{}
		notify.Stream = []*StreamOutput{&stream}

		resultChan <- notify
		resultMutex.Unlock()
	})

	// register error handler
	c.registerHandler(MsgError, func(msg *Message) {
		var errOutput ErrorOutput
		if err := json.Unmarshal(msg.Content, &errOutput); err != nil {
			return
		}

		resultMutex.Lock()
		result.Status = "error"
		result.Error = &errOutput

		notify := &ExecutionResult{}
		notify.Error = &errOutput
		notify.Status = "error"

		resultChan <- notify
		resultMutex.Unlock()
	})

	// register status handler
	c.registerHandler(MsgStatus, func(msg *Message) {
		var status StatusUpdate
		if err := json.Unmarshal(msg.Content, &status); err != nil {
			return
		}

		if status.ExecutionState == StateIdle {
			executeMutex.Lock()

			// Check whether execution can be completed
			if !executeDone {
				executeDone = true
				go func() {
					// calculate execution time
					resultMutex.Lock()
					result.ExecutionTime = time.Since(startTime)

					// Send final result
					notify := &ExecutionResult{}
					notify.ExecutionTime = result.ExecutionTime

					resultChan <- notify
					resultMutex.Unlock()

					for result.ExecutionCount <= 0 && result.Error == nil {
						time.Sleep(300 * time.Millisecond)
					}

					// Close result channel
					close(resultChan)
				}()
			}
			executeMutex.Unlock()
		}
	})

	// send execution request
	c.mu.Lock()
	err = c.conn.WriteJSON(msg)
	c.mu.Unlock()
	if err != nil {
		return fmt.Errorf("failed to send execution request: %w", err)
	}

	return nil
}

// ExecuteCodeWithCallback executes code using callback functions
func (c *Client) ExecuteCodeWithCallback(code string, handler CallbackHandler) error {
	if !c.IsConnected() {
		return errors.New("not connected to kernel, please call Connect method")
	}

	// prepare execution request
	msgID := c.nextMessageID()
	request := &ExecuteRequest{
		Code:            code,
		Silent:          false,
		StoreHistory:    true,
		UserExpressions: make(map[string]string),
		AllowStdin:      false,
		StopOnError:     true,
	}

	// serialize request content
	content, err := json.Marshal(request)
	if err != nil {
		return fmt.Errorf("failed to serialize request: %w", err)
	}

	// create message
	msg := &Message{
		Header: Header{
			MessageID:   msgID,
			Username:    "go-client",
			Session:     c.session,
			Date:        time.Now().Format(time.RFC3339),
			MessageType: string(MsgExecuteRequest),
			Version:     "5.3",
		},
		ParentHeader: Header{},
		Metadata:     make(map[string]interface{}),
		Content:      content,
		Channel:      "shell",
	}

	// register execution result handler
	if handler.OnExecuteResult != nil {
		c.registerHandler(MsgExecuteResult, func(msg *Message) {
			var execResult ExecuteResult
			if err := json.Unmarshal(msg.Content, &execResult); err != nil {
				return
			}

			// calls callback functions
			handler.OnExecuteResult(&execResult)
		})
	}

	// Register stream output handler
	if handler.OnStream != nil {
		c.registerHandler(MsgStream, func(msg *Message) {
			var stream StreamOutput
			if err := json.Unmarshal(msg.Content, &stream); err != nil {
				return
			}

			// calls callback functions
			handler.OnStream(&stream)
		})
	}

	// Register display data handler
	if handler.OnDisplayData != nil {
		c.registerHandler(MsgDisplayData, func(msg *Message) {
			var display DisplayData
			if err := json.Unmarshal(msg.Content, &display); err != nil {
				return
			}

			// calls callback functions
			handler.OnDisplayData(&display)
		})
	}

	// register error handler
	if handler.OnError != nil {
		c.registerHandler(MsgError, func(msg *Message) {
			var errOutput ErrorOutput
			if err := json.Unmarshal(msg.Content, &errOutput); err != nil {
				return
			}

			// calls callback functions
			handler.OnError(&errOutput)
		})
	}

	// register status handler
	if handler.OnStatus != nil {
		c.registerHandler(MsgStatus, func(msg *Message) {
			var status StatusUpdate
			if err := json.Unmarshal(msg.Content, &status); err != nil {
				return
			}

			// calls callback functions
			handler.OnStatus(&status)
		})
	}

	// send execution request
	c.mu.Lock()
	err = c.conn.WriteJSON(msg)
	c.mu.Unlock()
	if err != nil {
		return fmt.Errorf("failed to send execution request: %w", err)
	}

	return nil
}

// Register default message handlers
func (c *Client) registerDefaultHandlers() {
	// default message handlers can be registered here
}

// Register temporary message handler
func (c *Client) registerHandler(msgType MessageType, handler func(*Message)) {
	c.mu.Lock()
	defer c.mu.Unlock()
	c.handlers[msgType] = handler
}

// Clear temporary message handlers
func (c *Client) clearTemporaryHandlers() {
	c.mu.Lock()
	defer c.mu.Unlock()
	c.handlers = make(map[MessageType]func(*Message))
	c.registerDefaultHandlers()
}

// Receive WebSocket messages
func (c *Client) receiveMessages() {
	for {
		c.mu.Lock()
		conn := c.conn
		c.mu.Unlock()

		if conn == nil {
			break
		}

		// Receive message
		var msg Message
		err := conn.ReadJSON(&msg)
		if err != nil {
			// connection may already be closed
			break
		}

		// Process message
		c.handleMessage(&msg)
	}
}

// Handle received messages
func (c *Client) handleMessage(msg *Message) {
	// Extract message type
	msgType := MessageType(msg.Header.MessageType)

	// call the corresponding handler
	c.mu.Lock()
	handler, ok := c.handlers[msgType]
	c.mu.Unlock()

	if ok && handler != nil {
		handler(msg)
	}
}

// generate next messageID
func (c *Client) nextMessageID() string {
	c.mu.Lock()
	defer c.mu.Unlock()
	c.msgCounter++
	return fmt.Sprintf("%s-%d", c.session, c.msgCounter)
}


================================================
FILE: components/execd/pkg/jupyter/execute/execute_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package execute

import (
	"encoding/json"
	"net/http"
	"net/http/httptest"
	"strings"
	"testing"
	"time"

	"github.com/gorilla/websocket"
)

// Create WebSocket test server
func createTestServer(t *testing.T, handleFunc func(conn *websocket.Conn)) *httptest.Server {
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		// Validate request path
		if !strings.HasPrefix(r.URL.Path, "/api/kernels/") {
			t.Errorf("expected path to start with '/api/kernels/', got '%s'", r.URL.Path)
		}
		if !strings.HasSuffix(r.URL.Path, "/channels") {
			t.Errorf("expected path to end with '/channels', got '%s'", r.URL.Path)
		}

		// Upgrade HTTP connection to WebSocket
		upgrader := websocket.Upgrader{
			CheckOrigin: func(r *http.Request) bool { return true },
		}
		conn, err := upgrader.Upgrade(w, r, nil)
		if err != nil {
			t.Fatalf("failed to upgrade to WebSocket: %v", err)
		}
		defer conn.Close()

		// Handle WebSocket connection
		handleFunc(conn)
	}))

	return server
}

// Test streaming code execution
func TestExecuteCodeStream(t *testing.T) {
	// Spin up mock WebSocket server
	server := createTestServer(t, func(conn *websocket.Conn) {
		// Read execution request
		var executeRequest Message
		err := conn.ReadJSON(&executeRequest)
		if err != nil {
			t.Fatalf("failed to read execution request: %v", err)
		}

		// Send multiple stream messages
		for i := 0; i < 3; i++ {
			streamContent, _ := json.Marshal(StreamOutput{
				Name: StreamStdout,
				Text: "Line " + string(rune('0'+i)) + "\n",
			})

			streamMsg := Message{
				Header: Header{
					MessageID:   "stream-msg-id-" + string(rune('0'+i)),
					Session:     executeRequest.Header.Session,
					MessageType: string(MsgStream),
				},
				ParentHeader: executeRequest.Header,
				Content:      json.RawMessage(streamContent),
			}
			conn.WriteJSON(streamMsg)
			time.Sleep(100 * time.Millisecond)
		}

		// Send execution result
		resultContent, _ := json.Marshal(ExecuteResult{
			ExecutionCount: 1,
			Data: map[string]interface{}{
				"text/plain": "Completed",
			},
			Metadata: map[string]interface{}{},
		})

		executeResultMsg := Message{
			Header: Header{
				MessageID:   "result-msg-id",
				Session:     executeRequest.Header.Session,
				MessageType: string(MsgExecuteResult),
			},
			ParentHeader: executeRequest.Header,
			Content:      json.RawMessage(resultContent),
		}
		conn.WriteJSON(executeResultMsg)

		// Send status message
		statusContent, _ := json.Marshal(StatusUpdate{
			ExecutionState: StateIdle,
		})

		statusMsg := Message{
			Header: Header{
				MessageID:   "status-msg-id",
				Session:     executeRequest.Header.Session,
				MessageType: string(MsgStatus),
			},
			ParentHeader: executeRequest.Header,
			Content:      json.RawMessage(statusContent),
		}
		conn.WriteJSON(statusMsg)
	})
	defer server.Close()

	// Convert HTTP URL to WebSocket URL
	wsURL := "ws" + strings.TrimPrefix(server.URL, "http") + "/api/kernels/test-kernel-id/channels"

	// Create executor client
	executor := NewExecutor(wsURL, nil)

	// Connect to WebSocket
	err := executor.Connect()
	if err != nil {
		t.Fatalf("failed to connect to WebSocket: %v", err)
	}
	defer executor.Disconnect()

	// Execute code in streaming mode
	resultChan := make(chan *ExecutionResult, 10)
	err = executor.ExecuteCodeStream("for i in range(3):\n    print(f'Line {i}')", resultChan)
	if err != nil {
		t.Fatalf("failed to start streaming execution: %v", err)
	}

	// Receive and verify stream results
	resultCount := 0
	for result := range resultChan {
		if result == nil {
			break
		}
		resultCount++
	}

	// Should receive at least 4 results (3 stream outputs + 1 final result)
	if resultCount < 4 {
		t.Errorf("expected at least 4 results, got %d", resultCount)
	}
}


================================================
FILE: components/execd/pkg/jupyter/execute/executor.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package execute

// Executor is the interface for code execution
type Executor struct {
	// Internal client
	client *Client
	// WebSocket URL
	wsURL string
}

// NewExecutor creates a new code executor
func NewExecutor(wsURL string, httpClient HTTPClient) *Executor {
	client := NewClient("", httpClient)
	return &Executor{
		client: client,
		wsURL:  wsURL,
	}
}

// Connect connects to the kernel
func (e *Executor) Connect() error {
	return e.client.Connect(e.wsURL)
}

// Disconnect disconnects from the kernel
func (e *Executor) Disconnect() {
	e.client.Disconnect()
}

// ExecuteCodeStream executes code in streaming mode, sending results to the provided channel
func (e *Executor) ExecuteCodeStream(code string, resultChan chan *ExecutionResult) error {
	return e.client.ExecuteCodeStream(code, resultChan)
}

// ExecuteCodeWithCallback executes code using callback functions
func (e *Executor) ExecuteCodeWithCallback(code string, handler CallbackHandler) error {
	return e.client.ExecuteCodeWithCallback(code, handler)
}


================================================
FILE: components/execd/pkg/jupyter/execute/types.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package execute provides functionality for executing Jupyter kernel code via WebSocket
package execute

import (
	"encoding/json"
	"fmt"
	"strings"
	"time"
)

// MessageType represents Jupyter message types
type MessageType string

const (
	// MsgExecuteRequest requests code execution
	MsgExecuteRequest MessageType = "execute_request"

	// MsgExecuteInput represents the input code
	MsgExecuteInput MessageType = "execute_input"

	// MsgExecuteResult represents execution results
	MsgExecuteResult MessageType = "execute_result"

	// MsgDisplayData represents data to be displayed
	MsgDisplayData MessageType = "display_data"

	// MsgStream represents stream output (stdout/stderr)
	MsgStream MessageType = "stream"

	// MsgError represents errors during execution
	MsgError MessageType = "error"

	// MsgStatus represents kernel status updates
	MsgStatus MessageType = "status"

	// MsgClearOutput represents clearing output
	MsgClearOutput MessageType = "clear_output"

	// MsgComm represents communication messages
	MsgComm MessageType = "comm"

	// MsgCommOpen represents opening communication
	MsgCommOpen MessageType = "comm_open"

	// MsgCommClose represents closing communication
	MsgCommClose MessageType = "comm_close"

	// MsgCommMsg representscommunication message content
	MsgCommMsg MessageType = "comm_msg"

	// MsgKernelInfo represents kernel information request
	MsgKernelInfo MessageType = "kernel_info_request"

	// MsgKernelInfoReply represents kernel information response
	MsgKernelInfoReply MessageType = "kernel_info_reply"

	MsgExecuteReply MessageType = "execute_reply"
)

// StreamType representsoutput stream type
type StreamType string

const (
	// StreamStdout represents standard output stream
	StreamStdout StreamType = "stdout"

	// StreamStderr representsstandard error stream
	StreamStderr StreamType = "stderr"
)

// ExecutionState represents kernel execution state
type ExecutionState string

const (
	// StateIdle representskernel is idle
	StateIdle ExecutionState = "idle"

	// StateBusy representskernel is busy
	StateBusy ExecutionState = "busy"

	// StateStarting representskernel is starting
	StateStarting ExecutionState = "starting"
)

// Header defines Jupyter message header
type Header struct {
	// MessageID is the unique identifier of the message
	MessageID string `json:"msg_id"`

	// Username is the username sending the message
	Username string `json:"username"`

	// Session is the session identifier
	Session string `json:"session"`

	// Date is the timestamp when the message was sent
	Date string `json:"date"`

	// MessageType is the type of the message
	MessageType string `json:"msg_type"`

	// Version is the version of the message protocol
	Version string `json:"version"`
}

// Message defines the basic structure of Jupyter messages
type Message struct {
	// Header is the message header
	Header Header `json:"header"`

	// ParentHeader is the parent message header, used to track requests and responses
	ParentHeader Header `json:"parent_header"`

	// Metadata is the metadata related to the message
	Metadata map[string]interface{} `json:"metadata"`

	// Content is the actual content of the message
	Content json.RawMessage `json:"content"`

	// Buffers is the binary buffer
	Buffers [][]byte `json:"buffers"`

	// Channel is the channel of the message
	Channel string `json:"channel"`
}

// ExecuteRequest defines the request content for code execution
type ExecuteRequest struct {
	// Code is the code to execute
	Code string `json:"code"`

	// Silent represents whether to execute in silent mode
	Silent bool `json:"silent"`

	// StoreHistory represents whether to store execution history
	StoreHistory bool `json:"store_history"`

	// UserExpressions contains expressions to evaluate in the execution context
	UserExpressions map[string]string `json:"user_expressions"`

	// AllowStdin represents whether to allow reading from standard input
	AllowStdin bool `json:"allow_stdin"`

	// StopOnError represents whether to stop execution when an error is encountered
	StopOnError bool `json:"stop_on_error"`
}

// StreamOutput represents stream output content
type StreamOutput struct {
	// Name is the stream name (stdout or stderr)
	Name StreamType `json:"name"`

	// Text is the text content of the stream
	Text string `json:"text"`
}

// ExecuteResult represents the result of code execution
type ExecuteResult struct {
	// ExecutionCount is the execution counter value
	ExecutionCount int `json:"execution_count"`

	// Data contains result data in different formats
	Data map[string]interface{} `json:"data"`

	// Metadata is the metadata related to the result
	Metadata map[string]interface{} `json:"metadata"`
}

type ExecuteReply struct {
	// ExecutionCount is the execution counter value
	ExecutionCount int `json:"execution_count"`

	Status string `json:"status"`

	ErrorOutput `json:",inline"`
}

// DisplayData representsdata to display
type DisplayData struct {
	// Data contains display data in different formats
	Data map[string]interface{} `json:"data"`

	// Metadata is the metadata related to display data
	Metadata map[string]interface{} `json:"metadata"`
}

// ErrorOutput representserrors during execution
type ErrorOutput struct {
	// EName is the name of the error
	EName string `json:"ename"`

	// EValue is the value of the error
	EValue string `json:"evalue"`

	// Traceback is the traceback of the error
	Traceback []string `json:"traceback"`
}

func (e *ErrorOutput) String() string {
	return fmt.Sprintf(`
Error: %s
Value: %s
Traceback: %s
`, e.EName, e.EValue, strings.Join(e.Traceback, "\n"))
}

// StatusUpdate represents kernel status update
type StatusUpdate struct {
	// ExecutionState is the execution state of the kernel
	ExecutionState ExecutionState `json:"execution_state"`
}

// ExecutionResult represents the complete result of code execution
type ExecutionResult struct {
	// Status represents the status of execution
	Status string `json:"status"`

	// ExecutionCount is the execution counter value
	ExecutionCount int `json:"execution_count"`

	// Stream contains all stream output
	Stream []*StreamOutput `json:"stream"`

	// Error contains errors during execution (if any)
	Error *ErrorOutput `json:"error"`

	// ExecutionTime is the total time of code execution
	ExecutionTime time.Duration `json:"execution_time"`

	// ExecutionData
	ExecutionData map[string]interface{} `json:"execution_data"`
}

// CallbackHandler defines callback functions for handling different types of messages
type CallbackHandler struct {
	// OnExecuteResult handles execution result messages
	OnExecuteResult func(*ExecuteResult)

	// OnStream handles stream output messages
	OnStream func(...*StreamOutput)

	// OnDisplayData handles display data messages
	OnDisplayData func(*DisplayData)

	// OnError handles error messages
	OnError func(*ErrorOutput)

	// OnStatus handles status update messages
	OnStatus func(*StatusUpdate)
}


================================================
FILE: components/execd/pkg/jupyter/execute/zz_generated.deepcopy.go
================================================
//go:build !ignore_autogenerated

/*
Copyright 2022.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Code generated by controller-gen. DO NOT EDIT.

package execute

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ErrorOutput) DeepCopyInto(out *ErrorOutput) {
	*out = *in
	if in.Traceback != nil {
		in, out := &in.Traceback, &out.Traceback
		*out = make([]string, len(*in))
		copy(*out, *in)
	}
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ErrorOutput.
func (in *ErrorOutput) DeepCopy() *ErrorOutput {
	if in == nil {
		return nil
	}
	out := new(ErrorOutput)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExecutionResult) DeepCopyInto(out *ExecutionResult) {
	*out = *in
	if in.Stream != nil {
		in, out := &in.Stream, &out.Stream
		*out = make([]*StreamOutput, len(*in))
		for i := range *in {
			if (*in)[i] != nil {
				in, out := &(*in)[i], &(*out)[i]
				*out = new(StreamOutput)
				**out = **in
			}
		}
	}
	if in.Error != nil {
		in, out := &in.Error, &out.Error
		*out = new(ErrorOutput)
		(*in).DeepCopyInto(*out)
	}
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExecutionResult.
func (in *ExecutionResult) DeepCopy() *ExecutionResult {
	if in == nil {
		return nil
	}
	out := new(ExecutionResult)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *StreamOutput) DeepCopyInto(out *StreamOutput) {
	*out = *in
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StreamOutput.
func (in *StreamOutput) DeepCopy() *StreamOutput {
	if in == nil {
		return nil
	}
	out := new(StreamOutput)
	in.DeepCopyInto(out)
	return out
}


================================================
FILE: components/execd/pkg/jupyter/integration_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package jupyter

import (
	"encoding/json"
	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
	"net/http"
	"net/http/httptest"
	"strings"
	"testing"

	"github.com/gorilla/websocket"
)

// Test integration flow: authentication -> get kernel specs -> create session -> execute code -> close session
func TestIntegrationFlow(t *testing.T) {
	// Create mock HTTP server
	httpServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		// Handle authentication validation request
		if r.URL.Path == "/api/status" {
			// Check authentication token
			auth := r.Header.Get("Authorization")
			if auth != "token test-token" {
				w.WriteHeader(http.StatusUnauthorized)
				return
			}

			// Return status information
			w.Header().Set("Content-Type", "application/json")
			w.Write([]byte(`{"status": "ok"}`))
			return
		}

		// Handle kernel specs request
		if r.URL.Path == "/api/kernelspecs" {
			// Return kernel specs
			w.Header().Set("Content-Type", "application/json")
			w.Write([]byte(`{
				"default": "python3",
				"kernelspecs": {
					"python3": {
						"name": "python3",
						"display_name": "Python 3",
						"language": "python"
					}
				}
			}`))
			return
		}

		// Handle session-related requests
		if r.URL.Path == "/api/sessions" {
			if r.Method == http.MethodGet {
				// List sessions
				w.Header().Set("Content-Type", "application/json")
				w.Write([]byte(`[{
					"id": "test-session-id",
					"path": "/path/to/notebook.ipynb",
					"name": "Test Session",
					"type": "notebook",
					"kernel": {
						"id": "test-kernel-id",
						"name": "python3"
					}
				}]`))
				return
			} else if r.Method == http.MethodPost {
				// Create session
				w.Header().Set("Content-Type", "application/json")
				w.WriteHeader(http.StatusCreated)
				w.Write([]byte(`{
					"id": "test-session-id",
					"path": "/path/to/notebook.ipynb",
					"name": "Test Session",
					"type": "notebook",
					"kernel": {
						"id": "test-kernel-id",
						"name": "python3"
					}
				}`))
				return
			}
		}

		// Handle specific session requests
		if strings.HasPrefix(r.URL.Path, "/api/sessions/test-session-id") {
			if r.Method == http.MethodDelete {
				// Delete session
				w.WriteHeader(http.StatusNoContent)
				return
			} else if r.Method == http.MethodPatch {
				// Modify session
				w.Header().Set("Content-Type", "application/json")
				w.Write([]byte(`{
					"id": "test-session-id",
					"path": "/path/to/updated-notebook.ipynb",
					"name": "Updated Test Session",
					"type": "notebook",
					"kernel": {
						"id": "test-kernel-id",
						"name": "python3"
					}
				}`))
				return
			} else if r.Method == http.MethodGet {
				// Get session
				w.Header().Set("Content-Type", "application/json")
				w.Write([]byte(`{
					"id": "test-session-id",
					"path": "/path/to/notebook.ipynb",
					"name": "Test Session",
					"type": "notebook",
					"kernel": {
						"id": "test-kernel-id",
						"name": "python3"
					}
				}`))
				return
			}
		}

		// Handle kernel requests
		if r.URL.Path == "/api/kernels" {
			if r.Method == http.MethodGet {
				// List kernels
				w.Header().Set("Content-Type", "application/json")
				w.Write([]byte(`[{
					"id": "test-kernel-id",
					"name": "python3",
					"execution_state": "idle"
				}]`))
				return
			}
		}

		// Handle specific kernel requests
		if strings.HasPrefix(r.URL.Path, "/api/kernels/test-kernel-id") {
			if r.Method == http.MethodGet {
				// Get kernel
				w.Header().Set("Content-Type", "application/json")
				w.Write([]byte(`{
					"id": "test-kernel-id",
					"name": "python3",
					"execution_state": "idle"
				}`))
				return
			} else if r.Method == http.MethodPost && strings.HasSuffix(r.URL.Path, "/restart") {
				// Restart kernel
				w.Header().Set("Content-Type", "application/json")
				w.Write([]byte(`{
					"id": "test-kernel-id",
					"name": "python3",
					"restarted": true
				}`))
				return
			}
		}

		// If it's a WebSocket connection request, upgrade to WebSocket
		if strings.HasSuffix(r.URL.Path, "/channels") {
			// Return 404, as WebSocket connections will be handled by a dedicated WebSocket server
			w.WriteHeader(http.StatusNotFound)
			return
		}

		// For other requests, return 404
		w.WriteHeader(http.StatusNotFound)
	}))
	defer httpServer.Close()

	// Create mock WebSocket server for code execution
	wsServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		if !strings.HasSuffix(r.URL.Path, "/channels") {
			w.WriteHeader(http.StatusNotFound)
			return
		}

		// Upgrade HTTP connection to WebSocket
		upgrader := websocket.Upgrader{
			CheckOrigin: func(r *http.Request) bool { return true },
		}
		conn, err := upgrader.Upgrade(w, r, nil)
		if err != nil {
			t.Fatalf("Failed to upgrade connection to WebSocket: %v", err)
		}
		defer conn.Close()

		// Continuously handle WebSocket messages
		for {
			// Read request message
			var msg execute.Message
			err := conn.ReadJSON(&msg)
			if err != nil {
				break
			}

			// If it's an execute request, send mock response
			if msg.Header.MessageType == string(execute.MsgExecuteRequest) {
				// Send stream output
				streamContent, _ := json.Marshal(execute.StreamOutput{
					Name: execute.StreamStdout,
					Text: "Hello from test WebSocket!\n",
				})

				streamMsg := execute.Message{
					Header: execute.Header{
						MessageID:   "stream-msg-id",
						Session:     msg.Header.Session,
						MessageType: string(execute.MsgStream),
					},
					ParentHeader: msg.Header,
					Content:      json.RawMessage(streamContent),
				}
				conn.WriteJSON(streamMsg)

				// Send execution result
				resultContent, _ := json.Marshal(execute.ExecuteResult{
					ExecutionCount: 1,
					Data: map[string]interface{}{
						"text/plain": "Integration test result",
					},
					Metadata: map[string]interface{}{},
				})

				executeResultMsg := execute.Message{
					Header: execute.Header{
						MessageID:   "result-msg-id",
						Session:     msg.Header.Session,
						MessageType: string(execute.MsgExecuteResult),
					},
					ParentHeader: msg.Header,
					Content:      json.RawMessage(resultContent),
				}
				conn.WriteJSON(executeResultMsg)

				// Send status message
				statusContent, _ := json.Marshal(execute.StatusUpdate{
					ExecutionState: execute.StateIdle,
				})

				statusMsg := execute.Message{
					Header: execute.Header{
						MessageID:   "status-msg-id",
						Session:     msg.Header.Session,
						MessageType: string(execute.MsgStatus),
					},
					ParentHeader: msg.Header,
					Content:      json.RawMessage(statusContent),
				}
				conn.WriteJSON(statusMsg)
			}
		}
	}))
	defer wsServer.Close()

	// Create Jupyter client
	client := NewClient(httpServer.URL)
	client.SetToken("test-token")

	// Test 1: Validate authentication
	status, err := client.ValidateAuth()
	if err != nil {
		t.Fatalf("Authentication validation failed: %v", err)
	}
	if status != "ok" {
		t.Errorf("Authentication status incorrect, expected 'ok', got '%s'", status)
	}

	// Test 2: Get kernel specs
	specs, err := client.GetKernelSpecs()
	if err != nil {
		t.Fatalf("Failed to get kernel specs: %v", err)
	}
	if specs.Default != "python3" {
		t.Errorf("Default kernel incorrect, expected 'python3', got '%s'", specs.Default)
	}
	if len(specs.Kernelspecs) != 1 {
		t.Errorf("Kernel count incorrect, expected 1, got %d", len(specs.Kernelspecs))
	}

	// Test 3: Create session
	session, err := client.CreateSession("Test Session", "/path/to/notebook.ipynb", "python3")
	if err != nil {
		t.Fatalf("Failed to create session: %v", err)
	}
	if session.ID != "test-session-id" {
		t.Errorf("Session ID incorrect, expected 'test-session-id', got '%s'", session.ID)
	}
	if session.Kernel.ID != "test-kernel-id" {
		t.Errorf("Kernel ID incorrect, expected 'test-kernel-id', got '%s'", session.Kernel.ID)
	}

	// Modify WebSocket URL to point to WebSocket test server
	wsURL := "ws" + strings.TrimPrefix(wsServer.URL, "http") + "/api/kernels/test-kernel-id/channels"

	// Test 4: Connect to kernel and execute code
	executor := execute.NewExecutor(wsURL, nil)
	err = executor.Connect()
	if err != nil {
		t.Fatalf("Failed to connect to kernel: %v", err)
	}
	defer executor.Disconnect()

	// Execute code
	err = executor.ExecuteCodeWithCallback("print('Hello from integration test!')", execute.CallbackHandler{})
	if err != nil {
		t.Fatalf("Failed to execute code: %v", err)
	}

	// Test 5: Delete session
	err = client.DeleteSession(session.ID)
	if err != nil {
		t.Fatalf("Failed to delete session: %v", err)
	}
}


================================================
FILE: components/execd/pkg/jupyter/kernel/kernel.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package kernel provides functionality for managing Jupyter kernels
package kernel

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
)

// Client is the client for kernel management
type Client struct {
	// baseURL is the base URL of the Jupyter server
	baseURL string

	// httpClient is the client for sending HTTP requests, with authentication support
	httpClient *http.Client
}

// NewClient creates a new kernel management client
func NewClient(baseURL string, httpClient *http.Client) *Client {
	return &Client{
		baseURL:    baseURL,
		httpClient: httpClient,
	}
}

// GetKernelSpecs retrieves the list of available kernel specifications
func (c *Client) GetKernelSpecs() (*KernelSpecs, error) {
	// Build request URL
	url := fmt.Sprintf("%s/api/kernelspecs", c.baseURL)

	// Send GET request
	resp, err := c.httpClient.Get(url)
	if err != nil {
		return nil, fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	// Read response
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to read response: %w", err)
	}

	// Parse JSON response
	var specs KernelSpecs
	if err := json.Unmarshal(body, &specs); err != nil {
		return nil, fmt.Errorf("failed to parse response: %w", err)
	}

	return &specs, nil
}

// ListKernels retrieves the list of all running kernels
func (c *Client) ListKernels() ([]*Kernel, error) {
	// Build request URL
	url := fmt.Sprintf("%s/api/kernels", c.baseURL)

	// Send GET request
	resp, err := c.httpClient.Get(url)
	if err != nil {
		return nil, fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	// Read response
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to read response: %w", err)
	}

	// Parse JSON response
	var kernels []*Kernel
	if err := json.Unmarshal(body, &kernels); err != nil {
		return nil, fmt.Errorf("failed to parse response: %w", err)
	}

	return kernels, nil
}

// GetKernel retrieves information about a specific kernel
func (c *Client) GetKernel(kernelId string) (*Kernel, error) {
	// Build request URL
	url := fmt.Sprintf("%s/api/kernels/%s", c.baseURL, kernelId)

	// Send GET request
	resp, err := c.httpClient.Get(url)
	if err != nil {
		return nil, fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	// Read response
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to read response: %w", err)
	}

	// Parse JSON response
	var kernel Kernel
	if err := json.Unmarshal(body, &kernel); err != nil {
		return nil, fmt.Errorf("failed to parse response: %w", err)
	}

	return &kernel, nil
}

// StartKernel starts a new kernel
func (c *Client) StartKernel(name string) (*Kernel, error) {
	// Build request URL
	url := fmt.Sprintf("%s/api/kernels", c.baseURL)

	// Build request body
	reqBody := &KernelStartRequest{
		Name: name,
	}

	// Serialize request body to JSON
	jsonData, err := json.Marshal(reqBody)
	if err != nil {
		return nil, fmt.Errorf("failed to serialize request: %w", err)
	}

	// Create POST request
	req, err := http.NewRequest(http.MethodPost, url, bytes.NewBuffer(jsonData))
	if err != nil {
		return nil, fmt.Errorf("failed to create request: %w", err)
	}
	req.Header.Set("Content-Type", "application/json")

	// Send request
	resp, err := c.httpClient.Do(req)
	if err != nil {
		return nil, fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	// Read response
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to read response: %w", err)
	}

	// Parse JSON response
	var kernel Kernel
	if err := json.Unmarshal(body, &kernel); err != nil {
		return nil, fmt.Errorf("failed to parse response: %w", err)
	}

	return &kernel, nil
}

// RestartKernel restarts the specified kernel
func (c *Client) RestartKernel(kernelId string) (bool, error) {
	// Build request URL
	url := fmt.Sprintf("%s/api/kernels/%s/restart", c.baseURL, kernelId)

	// Create POST request
	req, err := http.NewRequest(http.MethodPost, url, nil)
	if err != nil {
		return false, fmt.Errorf("failed to create request: %w", err)
	}
	req.Header.Set("Content-Type", "application/json")

	// Send request
	resp, err := c.httpClient.Do(req)
	if err != nil {
		return false, fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusOK {
		return false, fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	// Read response
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return false, fmt.Errorf("failed to read response: %w", err)
	}

	// Parse JSON response
	var response KernelRestartResponse
	if err := json.Unmarshal(body, &response); err != nil {
		return false, fmt.Errorf("failed to parse response: %w", err)
	}

	return response.Restarted, nil
}

// InterruptKernel interrupts the specified kernel
func (c *Client) InterruptKernel(kernelId string) error {
	// Build request URL
	url := fmt.Sprintf("%s/api/kernels/%s/interrupt", c.baseURL, kernelId)

	// Create POST request
	req, err := http.NewRequest(http.MethodPost, url, nil)
	if err != nil {
		return fmt.Errorf("failed to create request: %w", err)
	}
	req.Header.Set("Content-Type", "application/json")

	// Send request
	resp, err := c.httpClient.Do(req)
	if err != nil {
		return fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
		return fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	return nil
}

// ShutdownKernel shuts down the specified kernel
func (c *Client) ShutdownKernel(kernelId string, restart bool) error {
	// Build request URL
	url := fmt.Sprintf("%s/api/kernels/%s", c.baseURL, kernelId)

	// Build request body
	reqBody := &KernelShutdownRequest{
		Restart: restart,
	}

	// Serialize request body to JSON
	jsonData, err := json.Marshal(reqBody)
	if err != nil {
		return fmt.Errorf("failed to serialize request: %w", err)
	}

	// Create DELETE request
	req, err := http.NewRequest(http.MethodDelete, url, bytes.NewBuffer(jsonData))
	if err != nil {
		return fmt.Errorf("failed to create request: %w", err)
	}
	req.Header.Set("Content-Type", "application/json")

	// Send request
	resp, err := c.httpClient.Do(req)
	if err != nil {
		return fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
		return fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	return nil
}


================================================
FILE: components/execd/pkg/jupyter/kernel/kernelspecs.json
================================================
{
  "default" : "python3",
  "kernelspecs" : {
    "python3" : {
      "name" : "python3",
      "spec" : {
        "argv" : [ "/opt/conda/bin/python", "-m", "ipykernel_launcher", "-f", "{connection_file}" ],
        "env" : { },
        "display_name" : "Python 3 (ipykernel)",
        "language" : "python",
        "interrupt_mode" : "signal",
        "metadata" : {
          "debugger" : true
        }
      },
      "resources" : {
        "logo-svg" : "/kernelspecs/python3/logo-svg.svg",
        "logo-64x64" : "/kernelspecs/python3/logo-64x64.png",
        "logo-32x32" : "/kernelspecs/python3/logo-32x32.png"
      }
    }
  }
}


================================================
FILE: components/execd/pkg/jupyter/kernel/types.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package kernel provides functionality for managing Jupyter kernels
package kernel

import (
	"time"
)

// KernelSpecs contains available kernel specification information
type KernelSpecs struct {
	// Default is the name of the default kernel
	Default string `json:"default"`

	// Kernelspecs is a mapping from kernel names to kernel specifications
	Kernelspecs map[string]*KernelSpecInfo `json:"kernelspecs"`
}

// KernelSpecInfo contains detailed kernel specification information
type KernelSpecInfo struct {
	// Name is the name of the kernel
	Name string `json:"name"`

	Spec KernelSpecDetail `json:"spec"`

	// Resources contains resource paths related to the kernel
	Resources map[string]string `json:"resources,omitempty"`
}

type KernelSpecDetail struct {
	Argv []string `json:"argv,omitempty"`

	// DisplayName is the display name of the kernel
	DisplayName string `json:"display_name"`

	// Language is the programming language used by the kernel
	Language string `json:"language,omitempty"`

	// InterruptMode is the interrupt mode of the kernel
	InterruptMode string `json:"interrupt_mode,omitempty"`
}

// Kernel represents a running kernel instance
type Kernel struct {
	// ID is the unique identifier of the kernel
	ID string `json:"id"`

	// Name is the name of the kernel
	Name string `json:"name"`

	// LastActivity is the timestamp of the kernel's last activity
	LastActivity time.Time `json:"last_activity,omitempty"`

	// Connections is the number of clients currently connected to the kernel
	Connections int `json:"connections,omitempty"`

	// ExecutionState is the execution state of the kernel (e.g., idle, busy)
	ExecutionState string `json:"execution_state,omitempty"`
}

// KernelStartRequest is the request for starting a new kernel
type KernelStartRequest struct {
	// Name is the name of the kernel to start
	Name string `json:"name"`

	// Path is the optional path for the kernel
	Path string `json:"path,omitempty"`
}

// KernelRestartResponse representsresponse of kernel restart
type KernelRestartResponse struct {
	// ID is the ID of the restarted kernel
	ID string `json:"id"`

	// Name is the restarted kernel name
	Name string `json:"name"`

	// Restarted represents whether the kernel was successfully restarted
	Restarted bool `json:"restarted"`

	// LastActivity is the timestamp of the kernel's last activity
	LastActivity time.Time `json:"last_activity,omitempty"`
}

// KernelInterruptRequest request to interrupt a kernel
type KernelInterruptRequest struct {
	// Restart represents whether to restart the kernel after interruption
	Restart bool `json:"restart,omitempty"`
}

// KernelShutdownRequest request to close a kernel
type KernelShutdownRequest struct {
	// Restart representswhether torestart kernel after shutdown
	Restart bool `json:"restart"`
}

// KernelStatus represents the status of the kernel
type KernelStatus string

const (
	// KernelStatusIdle representskernel is idle
	KernelStatusIdle KernelStatus = "idle"

	// KernelStatusBusy representskernel is busy
	KernelStatusBusy KernelStatus = "busy"

	// KernelStatusStarting representskernel is starting
	KernelStatusStarting KernelStatus = "starting"

	// KernelStatusRestarting represents the kernel is restarting
	KernelStatusRestarting KernelStatus = "restarting"

	// KernelStatusDead represents the kernel is dead
	KernelStatusDead KernelStatus = "dead"
)


================================================
FILE: components/execd/pkg/jupyter/live_integration_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package jupyter

import (
	"fmt"
	"net/http"
	"os"
	"testing"
	"time"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
)

// authTransport is a custom transport layer for adding authentication headers
type authTransport struct {
	token string
	base  http.RoundTripper
}

// RoundTrip implements the http.RoundTripper interface, adding authentication headers to each request
func (t *authTransport) RoundTrip(req *http.Request) (*http.Response, error) {
	// Clone the request to avoid modifying the original request
	reqClone := req.Clone(req.Context())
	// Add authentication header
	reqClone.Header.Set("Authorization", "Token "+t.token)
	// Send the request using the base transport layer
	return t.base.RoundTrip(reqClone)
}

// TestLiveServerIntegration tests SDK integration with a real Jupyter server
func TestLiveServerIntegration(t *testing.T) {
	t.Skip()
	// Get configuration from environment variables, use default values if not set
	jupyterURL := getEnv("JUPYTER_URL", "")
	jupyterToken := getEnv("JUPYTER_TOKEN", "")
	if jupyterURL == "" || jupyterToken == "" {
		t.Skip("JUPYTER_URL and JUPYTER_TOKEN environment variables must be set to run this test")
	}

	// Output test information
	t.Logf("Connecting to Jupyter server: %s", jupyterURL)

	// Create HTTP client with authentication capability
	httpClient := &http.Client{
		Transport: &authTransport{
			token: jupyterToken,
			base:  http.DefaultTransport,
		},
	}

	// Create client and set authentication
	client := NewClient(jupyterURL,
		WithToken(jupyterToken), // Keep Token setting to support ValidateAuth and WebSocket connections
		WithHTTPClient(httpClient))

	// Test 1: Validate authentication
	t.Run("Validate Authentication", func(t *testing.T) {
		status, err := client.ValidateAuth()
		if err != nil {
			t.Fatalf("Authentication validation failed: %v", err)
		}
		if status != "ok" {
			t.Errorf("Authentication status incorrect, expected 'ok', got '%s'", status)
		}
		t.Logf("Authentication validation successful! Status: %s", status)
	})

	// Test 2: Get kernel specs
	var kernelName string
	t.Run("Get Kernel Specs", func(t *testing.T) {
		specs, err := client.GetKernelSpecs()
		if err != nil {
			t.Fatalf("Failed to get kernel specs: %v", err)
		}
		if specs.Default == "" {
			t.Errorf("No default kernel")
		}
		if len(specs.Kernelspecs) == 0 {
			t.Errorf("No available kernels")
		}

		// Use default kernel or Python kernel (if available)
		kernelName = specs.Default
		for name, spec := range specs.Kernelspecs {
			if spec.Spec.Language == "python" {
				kernelName = name
				break
			}
		}

		t.Logf("Get kernel specs successful! Default kernel: %s, Selected kernel: %s", specs.Default, kernelName)
		t.Logf("Available kernels: %v", specs.Kernelspecs)
	})

	// Test 3: List sessions
	t.Run("List Sessions", func(t *testing.T) {
		sessions, err := client.ListSessions()
		if err != nil {
			t.Fatalf("Failed to list sessions: %v", err)
		}
		t.Logf("List sessions successful! Number of existing sessions: %d", len(sessions))
		for i, s := range sessions {
			t.Logf("Session %d: ID=%s, Path=%s, Kernel=%s", i+1, s.ID, s.Path, s.Kernel.Name)
		}
	})

	// Test 4: Create new session
	var sessionID string
	t.Run("Create Session", func(t *testing.T) {
		// Generate unique name for test session
		sessionName := fmt.Sprintf("test-session-%d", time.Now().Unix())
		sessionPath := "/test-notebook.ipynb"

		session, err := client.CreateSession(sessionName, sessionPath, kernelName)
		if err != nil {
			t.Fatalf("Failed to create session: %v", err)
		}
		if session.ID == "" {
			t.Errorf("Created session has no ID")
		}
		if session.Kernel.ID == "" {
			t.Errorf("Created session has no kernel ID")
		}

		// Save session ID for subsequent tests
		sessionID = session.ID

		t.Logf("Create session successful! Session ID: %s, Kernel ID: %s", session.ID, session.Kernel.ID)
	})

	// Test 5: Get created session
	var kernelID string
	t.Run("Get Session", func(t *testing.T) {
		if sessionID == "" {
			t.Skip("No session ID, skipping test")
		}

		session, err := client.GetSession(sessionID)
		if err != nil {
			t.Fatalf("Failed to get session: %v", err)
		}
		if session.ID != sessionID {
			t.Errorf("Session ID mismatch, expected '%s', got '%s'", sessionID, session.ID)
		}

		// Save kernel ID for subsequent tests
		kernelID = session.Kernel.ID

		t.Logf("Get session successful! Session name: %s, Kernel name: %s", session.Name, session.Kernel.Name)
	})

	// Test 6: List all kernels
	t.Run("List Kernels", func(t *testing.T) {
		kernels, err := client.ListKernels()
		if err != nil {
			t.Fatalf("Failed to list kernels: %v", err)
		}
		t.Logf("List kernels successful! Number of kernels: %d", len(kernels))
		for i, k := range kernels {
			t.Logf("Kernel %d: ID=%s, Name=%s, State=%s", i+1, k.ID, k.Name, k.ExecutionState)
		}

		// Verify that the created kernel is in the list
		if kernelID != "" {
			found := false
			for _, k := range kernels {
				if k.ID == kernelID {
					found = true
					break
				}
			}
			if !found {
				t.Errorf("Cannot find created kernel in kernel list ID=%s", kernelID)
			}
		}
	})

	// Test 7: Connect to kernel and execute code
	t.Run("Execute Code", func(t *testing.T) {
		if kernelID == "" {
			t.Skip("No kernel ID, skipping test")
		}

		// Connect to kernel
		err := client.ConnectToKernel(kernelID)
		if err != nil {
			t.Fatalf("Failed to connect to kernel: %v", err)
		}
		defer client.DisconnectFromKernel(kernelID)

		// Execute simple code
		code := "print('Hello, Jupyter!')\nresult = 2 + 2\nresult"
		t.Logf("Executing code:\n%s", code)

		err = client.ExecuteCodeWithCallback(code, execute.CallbackHandler{})
		if err != nil {
			t.Fatalf("Failed to execute code: %v", err)
		}
	})

	// Test 7: Connect to kernel and execute code
	t.Run("Execute Code", func(t *testing.T) {
		if kernelID == "" {
			t.Skip("No kernel ID, skipping test")
		}

		// Connect to kernel
		err := client.ConnectToKernel(kernelID)
		if err != nil {
			t.Fatalf("Failed to connect to kernel: %v", err)
		}
		defer client.DisconnectFromKernel(kernelID)

		// Execute simple code
		code := "print(f'2 + 2 = {result}')\nresult"
		t.Logf("Executing code:\n%s", code)

		err = client.ExecuteCodeWithCallback(code, execute.CallbackHandler{})
		if err != nil {
			t.Fatalf("Failed to execute code: %v", err)
		}
	})

	// Test 8: Execute complex code with different types of output
	t.Run("Execute Complex Code", func(t *testing.T) {
		if kernelID == "" {
			t.Skip("No kernel ID, skipping test")
		}

		// Connect to kernel
		err := client.ConnectToKernel(kernelID)
		if err != nil {
			t.Fatalf("Failed to connect to kernel: %v", err)
		}
		defer client.DisconnectFromKernel(kernelID)

		// Execute code that generates multiple output types
		code := `
# Display table data
import pandas as pd
import numpy as np
try:
    df = pd.DataFrame({
        'A': np.random.rand(5),
        'B': np.random.rand(5)
    })
    display(df)
    print("DataFrame created successfully")
except Exception as e:
    print(f"Error creating DataFrame: {e}")

# Generate error
try:
    print(undefined_variable)
except Exception as e:
    print(f"Expected error: {e}")

# Return dictionary
{'hello': 'world', 'number': 42}
`

		t.Logf("Executing complex code...")

		err = client.ExecuteCodeWithCallback(code, execute.CallbackHandler{})
		if err != nil {
			t.Fatalf("Failed to execute complex code: %v", err)
		}
	})

	// Test 9: Restart kernel
	t.Run("Restart Kernel", func(t *testing.T) {
		if kernelID == "" {
			t.Skip("No kernel ID, skipping test")
		}

		// Restart kernel
		restarted, err := client.RestartKernel(kernelID)
		if err != nil {
			t.Fatalf("Failed to restart kernel: %v", err)
		}

		// Wait for kernel restart to complete
		time.Sleep(2 * time.Second)

		// Verify kernel state
		kernel, err := client.GetKernel(kernelID)
		if err != nil {
			t.Fatalf("Failed to get kernel: %v", err)
		}

		t.Logf("Restart kernel successful! Restart status: %v, Kernel state: %s", restarted, kernel.ExecutionState)
	})

	// Test 10: Close session
	t.Run("Close Session", func(t *testing.T) {
		if sessionID == "" {
			t.Skip("No session ID, skipping test")
		}

		// Delete session
		err := client.DeleteSession(sessionID)
		if err != nil {
			t.Fatalf("Failed to delete session: %v", err)
		}

		// Verify session is deleted
		sessions, err := client.ListSessions()
		if err != nil {
			t.Fatalf("Failed to list sessions: %v", err)
		}

		for _, s := range sessions {
			if s.ID == sessionID {
				t.Errorf("Session still exists, not properly deleted ID=%s", sessionID)
				break
			}
		}

		t.Logf("Close session successful!")
	})
}

// Helper function: Get environment variable, use default value if not exists
func getEnv(key, defaultValue string) string {
	value := os.Getenv(key)
	if value == "" {
		return defaultValue
	}
	return value
}

// Helper function: Truncate string
func truncateString(s string, maxLen int) string {
	if len(s) <= maxLen {
		return s
	}
	return s[:maxLen] + "..."
}

// Helper function: Get all keys from map
func getKeys(m map[string]interface{}) []string {
	keys := make([]string, 0, len(m))
	for k := range m {
		keys = append(keys, k)
	}
	return keys
}


================================================
FILE: components/execd/pkg/jupyter/session/session.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package session provides functionality for managing Jupyter sessions
package session

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
)

// Client is the client for session management
type Client struct {
	// baseURL is the base URL of the Jupyter server
	baseURL string

	// httpClient is the client for sending HTTP requests, with authentication support
	httpClient *http.Client
}

// NewClient creates a new session management client
func NewClient(baseURL string, httpClient *http.Client) *Client {
	return &Client{
		baseURL:    baseURL,
		httpClient: httpClient,
	}
}

// ListSessions retrieves the list of all active sessions
func (c *Client) ListSessions() ([]*Session, error) {
	// Build request URL
	url := fmt.Sprintf("%s/api/sessions", c.baseURL)

	// Send GET request
	resp, err := c.httpClient.Get(url)
	if err != nil {
		return nil, fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	// Read response
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to read response: %w", err)
	}

	// Parse JSON response
	var sessions []*Session
	if err := json.Unmarshal(body, &sessions); err != nil {
		return nil, fmt.Errorf("failed to parse response: %w", err)
	}

	return sessions, nil
}

// GetSession retrieves information about a specific session
func (c *Client) GetSession(sessionId string) (*Session, error) {
	// Build request URL
	url := fmt.Sprintf("%s/api/sessions/%s", c.baseURL, sessionId)

	// Send GET request
	resp, err := c.httpClient.Get(url)
	if err != nil {
		return nil, fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	// Read response
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to read response: %w", err)
	}

	// Parse JSON response
	var session Session
	if err := json.Unmarshal(body, &session); err != nil {
		return nil, fmt.Errorf("failed to parse response: %w", err)
	}

	return &session, nil
}

// CreateSession creates a new session
func (c *Client) CreateSession(name, ipynb, kernel string) (*Session, error) {
	// Build request URL
	url := fmt.Sprintf("%s/api/sessions", c.baseURL)

	// Build request body
	reqBody := &SessionCreateRequest{
		Path: ipynb,
		Name: name,
		Type: DefaultSessionType,
		Kernel: &KernelSpec{
			Name: kernel,
		},
	}

	// Serialize request body to JSON
	jsonData, err := json.Marshal(reqBody)
	if err != nil {
		return nil, fmt.Errorf("failed to serialize request: %w", err)
	}

	// Create POST request
	req, err := http.NewRequest(http.MethodPost, url, bytes.NewBuffer(jsonData))
	if err != nil {
		return nil, fmt.Errorf("failed to create request: %w", err)
	}
	req.Header.Set("Content-Type", "application/json")

	// Send request
	resp, err := c.httpClient.Do(req)
	if err != nil {
		return nil, fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	// Read response
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to read response: %w", err)
	}

	// Parse JSON response
	var session Session
	if err := json.Unmarshal(body, &session); err != nil {
		return nil, fmt.Errorf("failed to parse response: %w", err)
	}

	return &session, nil
}

// ModifySession modifies properties of an existing session
func (c *Client) ModifySession(sessionId, name, path, kernel string) (*Session, error) {
	// Build request URL
	url := fmt.Sprintf("%s/api/sessions/%s", c.baseURL, sessionId)

	// Build request body
	reqBody := &SessionUpdateRequest{}
	if name != "" {
		reqBody.Name = name
	}
	if path != "" {
		reqBody.Path = path
	}
	if kernel != "" {
		reqBody.Kernel = &KernelSpec{
			Name: kernel,
		}
	}

	// Serialize request body to JSON
	jsonData, err := json.Marshal(reqBody)
	if err != nil {
		return nil, fmt.Errorf("failed to serialize request: %w", err)
	}

	// Create PATCH request
	req, err := http.NewRequest(http.MethodPatch, url, bytes.NewBuffer(jsonData))
	if err != nil {
		return nil, fmt.Errorf("failed to create request: %w", err)
	}
	req.Header.Set("Content-Type", "application/json")

	// Send request
	resp, err := c.httpClient.Do(req)
	if err != nil {
		return nil, fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	// Read response
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to read response: %w", err)
	}

	// Parse JSON response
	var session Session
	if err := json.Unmarshal(body, &session); err != nil {
		return nil, fmt.Errorf("failed to parse response: %w", err)
	}

	return &session, nil
}

// DeleteSession deletes the specified session
func (c *Client) DeleteSession(sessionId string) error {
	// Build request URL
	url := fmt.Sprintf("%s/api/sessions/%s", c.baseURL, sessionId)

	// Create DELETE request
	req, err := http.NewRequest(http.MethodDelete, url, nil)
	if err != nil {
		return fmt.Errorf("failed to create request: %w", err)
	}

	// Send request
	resp, err := c.httpClient.Do(req)
	if err != nil {
		return fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
		return fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	return nil
}

// CreateSessionWithOptions usingoption to create a new session
func (c *Client) CreateSessionWithOptions(options *SessionOptions) (*Session, error) {
	// Build request URL
	url := fmt.Sprintf("%s/api/sessions", c.baseURL)

	// Build request body
	reqBody := &SessionCreateRequest{
		Path: options.Path,
		Name: options.Name,
	}

	// set session type
	if options.Type != "" {
		reqBody.Type = options.Type
	} else {
		reqBody.Type = DefaultSessionType
	}

	// set kernel information
	if options.KernelID != "" {
		// If kernel ID is provided, use existing kernel
		reqBody.Kernel = &KernelSpec{
			ID: options.KernelID,
		}
	} else if options.KernelName != "" {
		// If kernel name is provided, start new kernel
		reqBody.Kernel = &KernelSpec{
			Name: options.KernelName,
		}
	}

	// Serialize request body to JSON
	jsonData, err := json.Marshal(reqBody)
	if err != nil {
		return nil, fmt.Errorf("failed to serialize request: %w", err)
	}

	// Create POST request
	req, err := http.NewRequest(http.MethodPost, url, bytes.NewBuffer(jsonData))
	if err != nil {
		return nil, fmt.Errorf("failed to create request: %w", err)
	}
	req.Header.Set("Content-Type", "application/json")

	// Send request
	resp, err := c.httpClient.Do(req)
	if err != nil {
		return nil, fmt.Errorf("failed to send request: %w", err)
	}
	defer resp.Body.Close()

	// Check response status
	if resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("server returned error status code: %d", resp.StatusCode)
	}

	// Read response
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to read response: %w", err)
	}

	// Parse JSON response
	var session Session
	if err := json.Unmarshal(body, &session); err != nil {
		return nil, fmt.Errorf("failed to parse response: %w", err)
	}

	return &session, nil
}


================================================
FILE: components/execd/pkg/jupyter/session/session_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package session

import (
	"encoding/json"
	"net/http"
	"net/http/httptest"
	"testing"
)

// Test listing sessions
func TestListSessions(t *testing.T) {
	// Create mock server
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		// Verify request method and path
		if r.Method != http.MethodGet {
			t.Errorf("expected request method GET, got %s", r.Method)
		}
		if r.URL.Path != "/api/sessions" {
			t.Errorf("expected request path /api/sessions, got %s", r.URL.Path)
		}

		// Return mocked session list
		response := `[
			{
				"id": "session-1",
				"path": "/path/to/notebook1.ipynb",
				"name": "Session 1",
				"type": "notebook",
				"kernel": {
					"id": "kernel-1",
					"name": "python3",
					"last_activity": "2023-01-01T00:00:00Z",
					"execution_state": "idle",
					"connections": 1
				}
			},
			{
				"id": "session-2",
				"path": "/path/to/notebook2.ipynb",
				"name": "Session 2",
				"type": "notebook",
				"kernel": {
					"id": "kernel-2",
					"name": "python3",
					"last_activity": "2023-01-01T00:00:00Z",
					"execution_state": "idle",
					"connections": 1
				}
			}
		]`

		w.Header().Set("Content-Type", "application/json")
		w.WriteHeader(http.StatusOK)
		w.Write([]byte(response))
	}))
	defer server.Close()

	// Create client
	client := NewClient(server.URL, &http.Client{})

	// Fetch session list
	sessions, err := client.ListSessions()
	if err != nil {
		t.Fatalf("failed to list sessions: %v", err)
	}

	// Validate session count
	if len(sessions) != 2 {
		t.Errorf("expected 2 sessions, got %d", len(sessions))
	}

	// Validate first session fields
	if sessions[0].ID != "session-1" {
		t.Errorf("expected session ID 'session-1', got '%s'", sessions[0].ID)
	}
	if sessions[0].Name != "Session 1" {
		t.Errorf("expected session name 'Session 1', got '%s'", sessions[0].Name)
	}
	if sessions[0].Path != "/path/to/notebook1.ipynb" {
		t.Errorf("expected session path '/path/to/notebook1.ipynb', got '%s'", sessions[0].Path)
	}
	if sessions[0].Type != "notebook" {
		t.Errorf("expected session type 'notebook', got '%s'", sessions[0].Type)
	}

	// Validate first session kernel fields
	if sessions[0].Kernel.ID != "kernel-1" {
		t.Errorf("expected kernel ID 'kernel-1', got '%s'", sessions[0].Kernel.ID)
	}
	if sessions[0].Kernel.Name != "python3" {
		t.Errorf("expected kernel name 'python3', got '%s'", sessions[0].Kernel.Name)
	}
}

// Test creating session
func TestCreateSession(t *testing.T) {
	// Create mock server
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		// Verify request method and path
		if r.Method != http.MethodPost {
			t.Errorf("expected request method POST, got %s", r.Method)
		}
		if r.URL.Path != "/api/sessions" {
			t.Errorf("expected request path /api/sessions, got %s", r.URL.Path)
		}

		// Parse request body
		var requestBody SessionCreateRequest
		decoder := json.NewDecoder(r.Body)
		if err := decoder.Decode(&requestBody); err != nil {
			t.Fatalf("failed to decode request body: %v", err)
		}

		// Validate request params
		if requestBody.Name != "Test Session" {
			t.Errorf("expected session name 'Test Session', got '%s'", requestBody.Name)
		}
		if requestBody.Path != "/path/to/notebook.ipynb" {
			t.Errorf("expected session path '/path/to/notebook.ipynb', got '%s'", requestBody.Path)
		}
		if requestBody.Type != "notebook" {
			t.Errorf("expected session type 'notebook', got '%s'", requestBody.Type)
		}
		if requestBody.Kernel.Name != "python3" {
			t.Errorf("expected kernel name 'python3', got '%s'", requestBody.Kernel.Name)
		}

		// Return mocked create response
		response := `{
			"id": "new-session-id",
			"path": "/path/to/notebook.ipynb",
			"name": "Test Session",
			"type": "notebook",
			"kernel": {
				"id": "new-kernel-id",
				"name": "python3",
				"last_activity": "2023-01-01T00:00:00Z",
				"execution_state": "idle",
				"connections": 0
			}
		}`

		w.Header().Set("Content-Type", "application/json")
		w.WriteHeader(http.StatusCreated)
		w.Write([]byte(response))
	}))
	defer server.Close()

	// Create client
	client := NewClient(server.URL, &http.Client{})

	// Create session
	newSession, err := client.CreateSession("Test Session", "/path/to/notebook.ipynb", "python3")
	if err != nil {
		t.Fatalf("failed to create session: %v", err)
	}

	// Validate created session
	if newSession.ID != "new-session-id" {
		t.Errorf("expected session ID 'new-session-id', got '%s'", newSession.ID)
	}
	if newSession.Name != "Test Session" {
		t.Errorf("expected session name 'Test Session', got '%s'", newSession.Name)
	}
	if newSession.Path != "/path/to/notebook.ipynb" {
		t.Errorf("expected session path '/path/to/notebook.ipynb', got '%s'", newSession.Path)
	}
	if newSession.Kernel.ID != "new-kernel-id" {
		t.Errorf("expected kernel ID 'new-kernel-id', got '%s'", newSession.Kernel.ID)
	}
}

// Test fetching a specific session
func TestGetSession(t *testing.T) {
	sessionID := "test-session-id"

	// Create mock server
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		// Verify request method and path
		if r.Method != http.MethodGet {
			t.Errorf("expected request method GET, got %s", r.Method)
		}

		expectedPath := "/api/sessions/" + sessionID
		if r.URL.Path != expectedPath {
			t.Errorf("expected request path '%s', got '%s'", expectedPath, r.URL.Path)
		}

		// Return mocked session
		response := `{
			"id": "test-session-id",
			"path": "/path/to/notebook.ipynb",
			"name": "Test Session",
			"type": "notebook",
			"kernel": {
				"id": "test-kernel-id",
				"name": "python3",
				"last_activity": "2023-01-01T00:00:00Z",
				"execution_state": "idle",
				"connections": 1
			}
		}`

		w.Header().Set("Content-Type", "application/json")
		w.WriteHeader(http.StatusOK)
		w.Write([]byte(response))
	}))
	defer server.Close()

	// Create client
	client := NewClient(server.URL, &http.Client{})

	// Fetch session
	session, err := client.GetSession(sessionID)
	if err != nil {
		t.Fatalf("failed to get session: %v", err)
	}

	// Validate session
	if session.ID != sessionID {
		t.Errorf("expected session ID '%s', got '%s'", sessionID, session.ID)
	}
	if session.Name != "Test Session" {
		t.Errorf("expected session name 'Test Session', got '%s'", session.Name)
	}
	if session.Kernel.ID != "test-kernel-id" {
		t.Errorf("expected kernel ID 'test-kernel-id', got '%s'", session.Kernel.ID)
	}
}


================================================
FILE: components/execd/pkg/jupyter/session/sessions.json
================================================
[ {
  "id" : "cb1baca9-a60e-4937-a1d0-18bc1fc45e60",
  "path" : "my_notebook.ipynb",
  "name" : "my_session",
  "type" : "notebook",
  "kernel" : {
    "id" : "d7052326-5c98-4575-bb18-a7902ef5f623",
    "name" : "python3",
    "last_activity" : "2025-06-05T09:09:54.420827Z",
    "execution_state" : "idle",
    "connections" : 0
  },
  "notebook" : {
    "path" : "my_notebook.ipynb",
    "name" : "my_session"
  }
}, {
  "id" : "a3378ca1-ba62-4341-9db5-3bc612fb3517",
  "path" : "Untitled.ipynb",
  "name" : "Untitled.ipynb",
  "type" : "notebook",
  "kernel" : {
    "id" : "d7052326-5c98-4575-bb18-a7902ef5f623",
    "name" : "python3",
    "last_activity" : "2025-06-05T09:09:54.420827Z",
    "execution_state" : "idle",
    "connections" : 0
  },
  "notebook" : {
    "path" : "Untitled.ipynb",
    "name" : "Untitled.ipynb"
  }
}, {
  "id" : "c4829f29-8430-4dce-b1f5-9d2ac6c4f570",
  "path" : "/tmp/example_notebook.ipynb",
  "name" : "example_session",
  "type" : "notebook",
  "kernel" : {
    "id" : "00349e07-3877-4eb0-a676-0df5b886d770",
    "name" : "python3",
    "last_activity" : "2025-06-05T11:51:22.194821Z",
    "execution_state" : "starting",
    "connections" : 0
  },
  "notebook" : {
    "path" : "/tmp/example_notebook.ipynb",
  "name" : "example_session"
  }
}, {
  "id" : "9a8e1857-b737-41a6-8f81-6039f6ae0ac1",
  "path" : "e0ebd37c-578a-443c-8f58-236984aea7ff",
  "name" : "session_5c4e8183-9e8a-4879-93b2-5622518193d7",
  "type" : "notebook",
  "kernel" : {
    "id" : "e8792c3e-3190-4b11-92e8-b7ec9ef44da9",
    "name" : "python3",
    "last_activity" : "2025-06-05T12:26:01.610210Z",
    "execution_state" : "starting",
    "connections" : 0
  },
  "notebook" : {
    "path" : "e0ebd37c-578a-443c-8f58-236984aea7ff",
    "name" : "session_5c4e8183-9e8a-4879-93b2-5622518193d7"
  }
}, {
  "id" : "cc06c06d-4f6b-45a5-a546-11a5b5f246f8",
  "path" : "notebook.ipynb",
  "name" : null,
  "type" : "notebook",
  "kernel" : {
    "id" : "62e7fd9e-ea50-4045-861b-3a5a7073ee22",
    "name" : "python3",
    "last_activity" : "2025-06-05T12:26:51.714871Z",
    "execution_state" : "starting",
    "connections" : 0
  },
  "notebook" : {
    "path" : "notebook.ipynb",
    "name" : null
  }
}, {
  "id" : "db123df4-ec13-4fe0-b3c3-ef85464b8a42",
  "path" : "/tmp/test.ipynb",
  "name" : "",
  "type" : "notebook",
  "kernel" : {
    "id" : "7d3091af-8b0a-474a-be04-f64191a43d0f",
    "name" : "python3",
    "last_activity" : "2025-06-06T01:29:16.712732Z",
    "execution_state" : "starting",
    "connections" : 0
  },
  "notebook" : {
    "path" : "/tmp/test.ipynb",
    "name" : ""
  }
} ]


================================================
FILE: components/execd/pkg/jupyter/session/types.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package session provides functionality for managing Jupyter sessions
package session

import (
	"time"
)

// Session represents a Jupyter session
type Session struct {
	// ID is the unique identifier of the session
	ID string `json:"id"`

	// Path is the path associated with the session (typically the notebook file path)
	Path string `json:"path"`

	// Name is the name of the session
	Name string `json:"name"`

	// Type is the type of the session (e.g., notebook, console)
	Type string `json:"type"`

	// Kernel contains information about the kernel associated with the session
	Kernel *KernelInfo `json:"kernel"`

	// CreatedAt is the timestamp when the session was created
	CreatedAt time.Time `json:"created,omitempty"`

	// LastModified is the timestamp when the session was last modified
	LastModified time.Time `json:"last_modified,omitempty"`
}

// KernelInfo contains basic kernel information
type KernelInfo struct {
	// ID is the unique identifier of the kernel
	ID string `json:"id"`

	// Name is the name of the kernel (e.g., python3, ir)
	Name string `json:"name"`

	// LastActivity is the timestamp of the kernel's last activity
	LastActivity time.Time `json:"last_activity,omitempty"`

	// Connections is the number of clients currently connected to the kernel
	Connections int `json:"connections,omitempty"`

	// ExecutionState is the execution state of the kernel (e.g., idle, busy)
	ExecutionState string `json:"execution_state,omitempty"`
}

// SessionCreateRequest is the request for creating a new session
type SessionCreateRequest struct {
	// Path is the path associated with the session (typically the notebook file path)
	Path string `json:"path"`

	// Name is the name of the session
	Name string `json:"name,omitempty"`

	// Type is the type of the session (defaults to "notebook")
	Type string `json:"type,omitempty"`

	// Kernel contains information about the kernel to start
	Kernel *KernelSpec `json:"kernel,omitempty"`
}

// KernelSpec contains kernel specification information
type KernelSpec struct {
	// Name is the name of the kernel (e.g., python3, ir)
	Name string `json:"name"`

	// ID is the unique identifier of the kernel (optional, used only when reusing existing kernel)
	ID string `json:"id,omitempty"`
}

// SessionUpdateRequest request to update an existing session
type SessionUpdateRequest struct {
	// Path is the new session path
	Path string `json:"path,omitempty"`

	// Name is the new session name
	Name string `json:"name,omitempty"`

	// Type is the new session type
	Type string `json:"type,omitempty"`

	// Kernel contains the new kernel information
	Kernel *KernelSpec `json:"kernel,omitempty"`
}

// SessionListResponse represents the response for listing sessions
type SessionListResponse []*Session

// SessionOptions contains options for creating or updating sessions
type SessionOptions struct {
	// Name is the name of the session
	Name string

	// Path is the path associated with the session
	Path string

	// Type is the type of the session (defaults to "notebook")
	Type string

	// KernelName is the kernel name to use (e.g., python3, ir, etc.)
	KernelName string

	// KernelID is the ID of the existing kernel to reuse (if provided, KernelName will be ignored)
	KernelID string
}

// DefaultSessionType is the default session type
const DefaultSessionType = "notebook"


================================================
FILE: components/execd/pkg/jupyter/transport.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package jupyter

import "net/http"

type AuthTransport struct {
	Token string
	Base  http.RoundTripper
}

func (t *AuthTransport) RoundTrip(req *http.Request) (*http.Response, error) {
	reqClone := req.Clone(req.Context())
	reqClone.Header.Set("Authorization", "Token "+t.Token)
	return t.Base.RoundTrip(reqClone)
}


================================================
FILE: components/execd/pkg/log/log.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package log

import (
	"os"

	slogger "github.com/alibaba/opensandbox/internal/logger"
)

const logFileEnvKey = "EXECD_LOG_FILE"

var current slogger.Logger

// Init constructs the singleton logger. Call once during startup.
// Legacy levels: 0/1/2=fatal, 3=error, 4=warn, 5/6=info, 7+=debug.
func Init(level int) {
	current = newLogger(mapLevel(level))
}

func mapLevel(level int) string {
	switch {
	case level <= 2:
		return "fatal"
	case level == 3:
		return "error"
	case level == 4:
		return "warn"
	case level == 5 || level == 6:
		return "info"
	default:
		return "debug"
	}
}

func newLogger(level string) slogger.Logger {
	cfg := slogger.Config{
		Level: level,
	}
	if logFile := os.Getenv(logFileEnvKey); logFile != "" {
		cfg.OutputPaths = []string{logFile}
		cfg.ErrorOutputPaths = cfg.OutputPaths
	}
	return slogger.MustNew(cfg)
}

func getLogger() slogger.Logger {
	if current != nil {
		return current
	}
	l := newLogger("info")
	current = l
	return l
}

func Debug(format string, args ...any) {
	getLogger().Debugf(format, args...)
}

func Info(format string, args ...any) {
	getLogger().Infof(format, args...)
}

func Warn(format string, args ...any) {
	getLogger().Warnf(format, args...)
}

// Warning is an alias to Warn for compatibility.
func Warning(format string, args ...any) {
	Warn(format, args...)
}

func Error(format string, args ...any) {
	getLogger().Errorf(format, args...)
}


================================================
FILE: components/execd/pkg/runtime/bash_session.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package runtime

import (
	"bufio"
	"context"
	"errors"
	"fmt"
	"os"
	"os/exec"
	"sort"
	"strconv"
	"strings"
	"syscall"
	"time"

	"github.com/google/uuid"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
	"github.com/alibaba/opensandbox/execd/pkg/log"
)

const (
	envDumpStartMarker = "__ENV_DUMP_START__"
	envDumpEndMarker   = "__ENV_DUMP_END__"
	exitMarkerPrefix   = "__EXIT_CODE__:"
	pwdMarkerPrefix    = "__PWD__:"
)

func (c *Controller) createBashSession(req *CreateContextRequest) (string, error) {
	session := newBashSession(req.Cwd)
	if err := session.start(); err != nil {
		return "", fmt.Errorf("failed to start bash session: %w", err)
	}

	c.bashSessionClientMap.Store(session.config.Session, session)
	log.Info("created bash session %s", session.config.Session)
	return session.config.Session, nil
}

func (c *Controller) runBashSession(ctx context.Context, request *ExecuteCodeRequest) error {
	session := c.getBashSession(request.Context)
	if session == nil {
		return ErrContextNotFound
	}

	return session.run(ctx, request)
}

func (c *Controller) getBashSession(sessionId string) *bashSession {
	if v, ok := c.bashSessionClientMap.Load(sessionId); ok {
		if s, ok := v.(*bashSession); ok {
			return s
		}
	}
	return nil
}

func (c *Controller) closeBashSession(sessionId string) error {
	session := c.getBashSession(sessionId)
	if session == nil {
		return ErrContextNotFound
	}

	err := session.close()
	if err != nil {
		return err
	}

	c.bashSessionClientMap.Delete(sessionId)
	return nil
}

func (c *Controller) CreateBashSession(req *CreateContextRequest) (string, error) {
	return c.createBashSession(req)
}

func (c *Controller) RunInBashSession(ctx context.Context, req *ExecuteCodeRequest) error {
	return c.runBashSession(ctx, req)
}

func (c *Controller) DeleteBashSession(sessionID string) error {
	return c.closeBashSession(sessionID)
}

// Session implementation (pipe-based, no PTY)
func newBashSession(cwd string) *bashSession {
	config := &bashSessionConfig{
		Session:        uuidString(),
		StartupTimeout: 5 * time.Second,
	}

	env := make(map[string]string)
	for _, kv := range os.Environ() {
		if k, v, ok := splitEnvPair(kv); ok {
			env[k] = v
		}
	}

	return &bashSession{
		config: config,
		env:    env,
		cwd:    cwd,
	}
}

func (s *bashSession) start() error {
	s.mu.Lock()
	defer s.mu.Unlock()

	if s.started {
		return errors.New("session already started")
	}

	s.started = true
	return nil
}

func (s *bashSession) trackCurrentProcess(pid int) {
	s.mu.Lock()
	defer s.mu.Unlock()
	s.currentProcessPid = pid
}

func (s *bashSession) untrackCurrentProcess() {
	s.mu.Lock()
	defer s.mu.Unlock()
	s.currentProcessPid = 0
}

//nolint:gocognit
func (s *bashSession) run(ctx context.Context, request *ExecuteCodeRequest) error {
	s.mu.Lock()
	if !s.started {
		s.mu.Unlock()
		return errors.New("session not started")
	}

	envSnapshot := copyEnvMap(s.env)

	cwd := s.cwd
	// override original cwd if specified
	if request.Cwd != "" {
		cwd = request.Cwd
	}
	sessionID := s.config.Session
	s.mu.Unlock()

	startAt := time.Now()
	if request.Hooks.OnExecuteInit != nil {
		request.Hooks.OnExecuteInit(sessionID)
	}

	wait := request.Timeout
	if wait <= 0 {
		wait = 24 * 3600 * time.Second // max to 24 hours
	}

	ctx, cancel := context.WithTimeout(ctx, wait)
	defer cancel()

	script := buildWrappedScript(request.Code, envSnapshot, cwd)
	scriptFile, err := os.CreateTemp("", "execd_bash_*.sh")
	if err != nil {
		return fmt.Errorf("create script file: %w", err)
	}
	scriptPath := scriptFile.Name()
	if _, err := scriptFile.WriteString(script); err != nil {
		_ = scriptFile.Close()
		return fmt.Errorf("write script file: %w", err)
	}
	if err := scriptFile.Close(); err != nil {
		return fmt.Errorf("close script file: %w", err)
	}

	cmd := exec.CommandContext(ctx, "bash", "--noprofile", "--norc", scriptPath)
	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
	// Do not pass envSnapshot via cmd.Env to avoid "argument list too long" when session env is large.
	// Child inherits parent env (nil => default in Go). The script file already has "export K=V" for
	// all session vars at the top, so the session environment is applied when the script runs.
	stdout, err := cmd.StdoutPipe()
	if err != nil {
		return fmt.Errorf("stdout pipe: %w", err)
	}
	cmd.Stderr = cmd.Stdout

	if err := cmd.Start(); err != nil {
		log.Error("start bash session failed: %v (command: %q)", err, request.Code)
		return fmt.Errorf("start bash: %w", err)
	}
	defer s.untrackCurrentProcess()
	s.trackCurrentProcess(cmd.Process.Pid)

	scanner := bufio.NewScanner(stdout)
	scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024)

	var (
		envLines []string
		pwdLine  string
		exitCode *int
		inEnv    bool
	)

	for scanner.Scan() {
		line := scanner.Text()
		switch {
		case line == envDumpStartMarker:
			inEnv = true
		case line == envDumpEndMarker:
			inEnv = false
		case strings.HasPrefix(line, exitMarkerPrefix):
			if code, err := strconv.Atoi(strings.TrimPrefix(line, exitMarkerPrefix)); err == nil {
				exitCode = &code //nolint:ineffassign
			}
		case strings.HasPrefix(line, pwdMarkerPrefix):
			pwdLine = strings.TrimPrefix(line, pwdMarkerPrefix)
		default:
			if inEnv {
				envLines = append(envLines, line)
				continue
			}
			if request.Hooks.OnExecuteStdout != nil {
				request.Hooks.OnExecuteStdout(line)
			}
		}
	}

	scanErr := scanner.Err()
	waitErr := cmd.Wait()

	if scanErr != nil {
		log.Error("read stdout failed: %v (command: %q)", scanErr, request.Code)
		return fmt.Errorf("read stdout: %w", scanErr)
	}

	if errors.Is(ctx.Err(), context.DeadlineExceeded) {
		log.Error("timeout after %s while running command: %q", wait, request.Code)
		return fmt.Errorf("timeout after %s while running command %q", wait, request.Code)
	}

	if exitCode == nil && cmd.ProcessState != nil {
		code := cmd.ProcessState.ExitCode() //nolint:staticcheck
		exitCode = &code                    //nolint:ineffassign
	}

	updatedEnv := parseExportDump(envLines)
	s.mu.Lock()
	if len(updatedEnv) > 0 {
		s.env = updatedEnv
	}
	if pwdLine != "" {
		s.cwd = pwdLine
	}
	s.mu.Unlock()

	var exitErr *exec.ExitError
	if waitErr != nil && !errors.As(waitErr, &exitErr) {
		log.Error("command wait failed: %v (command: %q)", waitErr, request.Code)
		return waitErr
	}

	userExitCode := 0
	if exitCode != nil {
		userExitCode = *exitCode
	}

	if userExitCode != 0 {
		errMsg := fmt.Sprintf("command exited with code %d", userExitCode)
		if waitErr != nil {
			errMsg = waitErr.Error()
		}
		if request.Hooks.OnExecuteError != nil {
			request.Hooks.OnExecuteError(&execute.ErrorOutput{
				EName:     "CommandExecError",
				EValue:    strconv.Itoa(userExitCode),
				Traceback: []string{errMsg},
			})
		}
		log.Error("CommandExecError: %s (command: %q)", errMsg, request.Code)
		return nil
	}

	if request.Hooks.OnExecuteComplete != nil {
		request.Hooks.OnExecuteComplete(time.Since(startAt))
	}

	return nil
}

func buildWrappedScript(command string, env map[string]string, cwd string) string {
	var b strings.Builder

	keys := make([]string, 0, len(env))
	for k := range env {
		v := env[k]
		if isValidEnvKey(k) && !envKeysNotPersisted[k] && len(v) <= maxPersistedEnvValueSize {
			keys = append(keys, k)
		}
	}
	sort.Strings(keys)
	for _, k := range keys {
		b.WriteString("export ")
		b.WriteString(k)
		b.WriteString("=")
		b.WriteString(shellEscape(env[k]))
		b.WriteString("\n")
	}

	if cwd != "" {
		b.WriteString("cd ")
		b.WriteString(shellEscape(cwd))
		b.WriteString("\n")
	}

	b.WriteString(command)
	if !strings.HasSuffix(command, "\n") {
		b.WriteString("\n")
	}

	b.WriteString("__USER_EXIT_CODE__=$?\n")
	b.WriteString("printf \"\\n%s\\n\" \"" + envDumpStartMarker + "\"\n")
	b.WriteString("export -p\n")
	b.WriteString("printf \"%s\\n\" \"" + envDumpEndMarker + "\"\n")
	b.WriteString("printf \"" + pwdMarkerPrefix + "%s\\n\" \"$(pwd)\"\n")
	b.WriteString("printf \"" + exitMarkerPrefix + "%s\\n\" \"$__USER_EXIT_CODE__\"\n")
	b.WriteString("exit \"$__USER_EXIT_CODE__\"\n")

	return b.String()
}

// envKeysNotPersisted are not carried across runs (prompt/display vars).
var envKeysNotPersisted = map[string]bool{
	"PS1": true, "PS2": true, "PS3": true, "PS4": true,
	"PROMPT_COMMAND": true,
}

// maxPersistedEnvValueSize caps single env value length as a safeguard.
const maxPersistedEnvValueSize = 8 * 1024

func parseExportDump(lines []string) map[string]string {
	if len(lines) == 0 {
		return nil
	}
	env := make(map[string]string, len(lines))
	for _, line := range lines {
		k, v, ok := parseExportLine(line)
		if !ok || envKeysNotPersisted[k] || len(v) > maxPersistedEnvValueSize {
			continue
		}
		env[k] = v
	}
	return env
}

func parseExportLine(line string) (string, string, bool) {
	const prefix = "declare -x "
	if !strings.HasPrefix(line, prefix) {
		return "", "", false
	}
	rest := strings.TrimSpace(strings.TrimPrefix(line, prefix))
	if rest == "" {
		return "", "", false
	}
	name, value := rest, ""
	if eq := strings.Index(rest, "="); eq >= 0 {
		name = rest[:eq]
		raw := rest[eq+1:]
		if unquoted, err := strconv.Unquote(raw); err == nil {
			value = unquoted
		} else {
			value = strings.Trim(raw, `"`)
		}
	}
	if !isValidEnvKey(name) {
		return "", "", false
	}
	return name, value, true
}

func shellEscape(value string) string {
	return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'"
}

func isValidEnvKey(key string) bool {
	if key == "" {
		return false
	}

	for i, r := range key {
		if i == 0 {
			if (r < 'A' || (r > 'Z' && r < 'a') || r > 'z') && r != '_' {
				return false
			}
			continue
		}
		if (r < 'A' || (r > 'Z' && r < 'a') || r > 'z') && (r < '0' || r > '9') && r != '_' {
			return false
		}
	}

	return true
}

func copyEnvMap(src map[string]string) map[string]string {
	if src == nil {
		return map[string]string{}
	}

	dst := make(map[string]string, len(src))
	for k, v := range src {
		dst[k] = v
	}
	return dst
}

func splitEnvPair(kv string) (string, string, bool) {
	parts := strings.SplitN(kv, "=", 2)
	if len(parts) != 2 {
		return "", "", false
	}
	if !isValidEnvKey(parts[0]) {
		return "", "", false
	}
	return parts[0], parts[1], true
}

func (s *bashSession) close() error {
	s.mu.Lock()
	defer s.mu.Unlock()

	pid := s.currentProcessPid
	s.currentProcessPid = 0
	s.started = false
	s.env = nil
	s.cwd = ""

	if pid != 0 {
		if err := syscall.Kill(-pid, syscall.SIGKILL); err != nil {
			log.Warning("kill session process group %d: %v (process may have already exited)", pid, err)
		}
	}
	return nil
}

func uuidString() string {
	return uuid.New().String()
}


================================================
FILE: components/execd/pkg/runtime/bash_session_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package runtime

import (
	"context"
	"fmt"
	"os/exec"
	"strings"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
)

func TestBashSession_NonZeroExitEmitsError(t *testing.T) {
	if _, err := exec.LookPath("bash"); err != nil {
		t.Skip("bash not found in PATH")
	}

	c := NewController("", "")

	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	var (
		sessionID  string
		stdoutLine string
		errCh      = make(chan *execute.ErrorOutput, 1)
		completeCh = make(chan struct{}, 1)
	)

	req := &ExecuteCodeRequest{
		Language: Bash,
		Code:     `echo "before"; exit 7`,
		Cwd:      t.TempDir(),
		Timeout:  5 * time.Second,
		Hooks: ExecuteResultHook{
			OnExecuteInit:   func(s string) { sessionID = s },
			OnExecuteStdout: func(s string) { stdoutLine = s },
			OnExecuteError:  func(err *execute.ErrorOutput) { errCh <- err },
			OnExecuteComplete: func(_ time.Duration) {
				completeCh <- struct{}{}
			},
		},
	}

	session, err := c.createBashSession(&CreateContextRequest{})
	assert.NoError(t, err)
	req.Context = session
	require.NoError(t, c.runBashSession(ctx, req))

	var gotErr *execute.ErrorOutput
	select {
	case gotErr = <-errCh:
	case <-time.After(2 * time.Second):
		require.Fail(t, "expected error hook to be called")
	}
	require.NotNil(t, gotErr, "expected non-nil error output")
	require.Equal(t, "CommandExecError", gotErr.EName)
	require.Equal(t, "7", gotErr.EValue)
	require.NotEmpty(t, sessionID, "expected session id to be set")
	require.Equal(t, "before", stdoutLine)

	select {
	case <-completeCh:
		require.Fail(t, "did not expect completion hook on non-zero exit")
	default:
	}
}

func TestBashSession_envAndExitCode(t *testing.T) {
	session := newBashSession("")
	t.Cleanup(func() { _ = session.close() })

	require.NoError(t, session.start())

	var (
		initCalls     int
		completeCalls int
		stdoutLines   []string
	)

	hooks := ExecuteResultHook{
		OnExecuteInit: func(ctx string) {
			require.Equal(t, session.config.Session, ctx, "unexpected session in OnExecuteInit")
			initCalls++
		},
		OnExecuteStdout: func(text string) {
			t.Log(text)
			stdoutLines = append(stdoutLines, text)
		},
		OnExecuteComplete: func(_ time.Duration) {
			completeCalls++
		},
	}

	// 1) export an env var
	request := &ExecuteCodeRequest{
		Code:    "export FOO=hello",
		Hooks:   hooks,
		Timeout: 3 * time.Second,
	}
	require.NoError(t, session.run(context.Background(), request))
	exportStdoutCount := len(stdoutLines)

	// 2) verify env is persisted
	request = &ExecuteCodeRequest{
		Code:    "echo $FOO",
		Hooks:   hooks,
		Timeout: 3 * time.Second,
	}
	require.NoError(t, session.run(context.Background(), request))
	echoLines := stdoutLines[exportStdoutCount:]
	foundHello := false
	for _, line := range echoLines {
		if strings.TrimSpace(line) == "hello" {
			foundHello = true
			break
		}
	}
	require.True(t, foundHello, "expected echo $FOO to output 'hello', got %v", echoLines)

	// 3) ensure exit code of previous command is reflected in shell state
	request = &ExecuteCodeRequest{
		Code:    "false; echo EXIT:$?",
		Hooks:   hooks,
		Timeout: 3 * time.Second,
	}
	prevCount := len(stdoutLines)
	require.NoError(t, session.run(context.Background(), request))
	exitLines := stdoutLines[prevCount:]
	foundExit := false
	for _, line := range exitLines {
		if strings.Contains(line, "EXIT:1") {
			foundExit = true
			break
		}
	}
	require.True(t, foundExit, "expected exit code output 'EXIT:1', got %v", exitLines)
	require.Equal(t, 3, initCalls, "OnExecuteInit expected 3 calls")
	require.Equal(t, 3, completeCalls, "OnExecuteComplete expected 3 calls")
}

func TestBashSession_envLargeOutputChained(t *testing.T) {
	session := newBashSession("")
	t.Cleanup(func() { _ = session.close() })

	require.NoError(t, session.start())

	var (
		initCalls     int
		completeCalls int
		stdoutLines   []string
	)

	hooks := ExecuteResultHook{
		OnExecuteInit: func(ctx string) {
			require.Equal(t, session.config.Session, ctx, "unexpected session in OnExecuteInit")
			initCalls++
		},
		OnExecuteStdout: func(text string) {
			t.Log(text)
			stdoutLines = append(stdoutLines, text)
		},
		OnExecuteComplete: func(_ time.Duration) {
			completeCalls++
		},
	}

	runAndCollect := func(cmd string) []string {
		start := len(stdoutLines)
		request := &ExecuteCodeRequest{
			Code:    cmd,
			Hooks:   hooks,
			Timeout: 10 * time.Second,
		}
		require.NoError(t, session.run(context.Background(), request))
		return append([]string(nil), stdoutLines[start:]...)
	}

	lines1 := runAndCollect("export FOO=hello1; for i in $(seq 1 60); do echo A${i}:$FOO; done")
	require.GreaterOrEqual(t, len(lines1), 60, "expected >=60 lines for cmd1")
	require.True(t, containsLine(lines1, "A1:hello1") && containsLine(lines1, "A60:hello1"), "env not reflected in cmd1 output, got %v", lines1[:3])

	lines2 := runAndCollect("export FOO=${FOO}_next; export BAR=bar1; for i in $(seq 1 60); do echo B${i}:$FOO:$BAR; done")
	require.GreaterOrEqual(t, len(lines2), 60, "expected >=60 lines for cmd2")
	require.True(t, containsLine(lines2, "B1:hello1_next:bar1") && containsLine(lines2, "B60:hello1_next:bar1"), "env not propagated to cmd2 output, sample %v", lines2[:3])

	lines3 := runAndCollect("export BAR=${BAR}_last; for i in $(seq 1 60); do echo C${i}:$FOO:$BAR; done; echo FINAL_FOO=$FOO; echo FINAL_BAR=$BAR")
	require.GreaterOrEqual(t, len(lines3), 62, "expected >=62 lines for cmd3") // 60 lines + 2 finals
	require.True(t, containsLine(lines3, "C1:hello1_next:bar1_last") && containsLine(lines3, "C60:hello1_next:bar1_last"), "env not propagated to cmd3 output, sample %v", lines3[:3])
	require.True(t, containsLine(lines3, "FINAL_FOO=hello1_next") && containsLine(lines3, "FINAL_BAR=bar1_last"), "final env lines missing, got %v", lines3[len(lines3)-5:])
	require.Equal(t, 3, initCalls, "OnExecuteInit expected 3 calls")
	require.Equal(t, 3, completeCalls, "OnExecuteComplete expected 3 calls")
}

func TestBashSession_cwdPersistsWithoutOverride(t *testing.T) {
	session := newBashSession("")
	t.Cleanup(func() { _ = session.close() })

	require.NoError(t, session.start())

	targetDir := t.TempDir()
	var stdoutLines []string
	hooks := ExecuteResultHook{
		OnExecuteStdout: func(line string) {
			stdoutLines = append(stdoutLines, line)
		},
	}

	runAndCollect := func(req *ExecuteCodeRequest) []string {
		start := len(stdoutLines)
		require.NoError(t, session.run(context.Background(), req))
		return append([]string(nil), stdoutLines[start:]...)
	}

	firstRunLines := runAndCollect(&ExecuteCodeRequest{
		Code:    fmt.Sprintf("cd %s\npwd", targetDir),
		Hooks:   hooks,
		Timeout: 3 * time.Second,
	})
	require.True(t, containsLine(firstRunLines, targetDir), "expected cd to update cwd to %q, got %v", targetDir, firstRunLines)

	secondRunLines := runAndCollect(&ExecuteCodeRequest{
		Code:    "pwd",
		Hooks:   hooks,
		Timeout: 3 * time.Second,
	})
	require.True(t, containsLine(secondRunLines, targetDir), "expected subsequent run to inherit cwd %q, got %v", targetDir, secondRunLines)

	session.mu.Lock()
	finalCwd := session.cwd
	session.mu.Unlock()
	require.Equal(t, targetDir, finalCwd, "expected session cwd to stay at %q", targetDir)
}

func TestBashSession_requestCwdOverridesAfterCd(t *testing.T) {
	session := newBashSession("")
	t.Cleanup(func() { _ = session.close() })

	require.NoError(t, session.start())

	initialDir := t.TempDir()
	overrideDir := t.TempDir()

	var stdoutLines []string
	hooks := ExecuteResultHook{
		OnExecuteStdout: func(line string) {
			stdoutLines = append(stdoutLines, line)
		},
	}

	runAndCollect := func(req *ExecuteCodeRequest) []string {
		start := len(stdoutLines)
		require.NoError(t, session.run(context.Background(), req))
		return append([]string(nil), stdoutLines[start:]...)
	}

	// First request: change session cwd via script.
	firstRunLines := runAndCollect(&ExecuteCodeRequest{
		Code:    fmt.Sprintf("cd %s\npwd", initialDir),
		Hooks:   hooks,
		Timeout: 3 * time.Second,
	})
	require.True(t, containsLine(firstRunLines, initialDir), "expected cd to update cwd to %q, got %v", initialDir, firstRunLines)

	// Second request: explicit Cwd overrides session cwd.
	secondRunLines := runAndCollect(&ExecuteCodeRequest{
		Code:    "pwd",
		Cwd:     overrideDir,
		Hooks:   hooks,
		Timeout: 3 * time.Second,
	})
	require.True(t, containsLine(secondRunLines, overrideDir), "expected command to run in override cwd %q, got %v", overrideDir, secondRunLines)

	session.mu.Lock()
	finalCwd := session.cwd
	session.mu.Unlock()
	require.Equal(t, overrideDir, finalCwd, "expected session cwd updated to override dir %q", overrideDir)
}

func TestBashSession_envDumpNotLeakedWhenNoTrailingNewline(t *testing.T) {
	session := newBashSession("")
	t.Cleanup(func() { _ = session.close() })

	require.NoError(t, session.start())

	var stdoutLines []string
	hooks := ExecuteResultHook{
		OnExecuteStdout: func(line string) {
			stdoutLines = append(stdoutLines, line)
		},
	}

	request := &ExecuteCodeRequest{
		Code:    `set +x; printf '{"foo":1}'`,
		Hooks:   hooks,
		Timeout: 3 * time.Second,
	}
	require.NoError(t, session.run(context.Background(), request))

	require.Len(t, stdoutLines, 1, "expected exactly one stdout line")
	require.Equal(t, `{"foo":1}`, strings.TrimSpace(stdoutLines[0]))
	for _, line := range stdoutLines {
		require.NotContains(t, line, envDumpStartMarker, "env dump leaked into stdout: %v", stdoutLines)
		require.NotContains(t, line, "declare -x", "env dump leaked into stdout: %v", stdoutLines)
	}
}

func TestBashSession_envDumpNotLeakedWhenNoOutput(t *testing.T) {
	session := newBashSession("")
	t.Cleanup(func() { _ = session.close() })

	require.NoError(t, session.start())

	var stdoutLines []string
	hooks := ExecuteResultHook{
		OnExecuteStdout: func(line string) {
			stdoutLines = append(stdoutLines, line)
		},
	}

	request := &ExecuteCodeRequest{
		Code:    `set +x; true`,
		Hooks:   hooks,
		Timeout: 3 * time.Second,
	}
	require.NoError(t, session.run(context.Background(), request))

	require.LessOrEqual(t, len(stdoutLines), 1, "expected at most one stdout line, got %v", stdoutLines)
	if len(stdoutLines) == 1 {
		require.Empty(t, strings.TrimSpace(stdoutLines[0]), "expected empty stdout")
	}
	for _, line := range stdoutLines {
		require.NotContains(t, line, envDumpStartMarker, "env dump leaked into stdout: %v", stdoutLines)
		require.NotContains(t, line, "declare -x", "env dump leaked into stdout: %v", stdoutLines)
	}
}

func TestBashSession_heredoc(t *testing.T) {
	rewardDir := t.TempDir()
	controller := NewController("", "")

	sessionID, err := controller.CreateBashSession(&CreateContextRequest{})
	require.NoError(t, err)
	t.Cleanup(func() { _ = controller.DeleteBashSession(sessionID) })

	hooks := ExecuteResultHook{
		OnExecuteStdout: func(line string) {
			fmt.Printf("[stdout] %s\n", line)
		},
		OnExecuteComplete: func(d time.Duration) {
			fmt.Printf("[complete] %s\n", d)
		},
	}

	// First run: heredoc + reward file write.
	script := fmt.Sprintf(`
set -x
reward_dir=%q
mkdir -p "$reward_dir"

cat > /tmp/repro_script.sh <<'SHEOF'
#!/usr/bin/env sh
echo "hello heredoc"
SHEOF

chmod +x /tmp/repro_script.sh
/tmp/repro_script.sh
echo "after heredoc"
echo 1 > "$reward_dir/reward.txt"
cat "$reward_dir/reward.txt"
`, rewardDir)

	ctx := context.Background()
	require.NoError(t, controller.RunInBashSession(ctx, &ExecuteCodeRequest{
		Context:  sessionID,
		Language: Bash,
		Timeout:  10 * time.Second,
		Code:     script,
		Hooks:    hooks,
	}))

	// Second run: ensure the session keeps working.
	require.NoError(t, controller.RunInBashSession(ctx, &ExecuteCodeRequest{
		Context:  sessionID,
		Language: Bash,
		Timeout:  5 * time.Second,
		Code:     "echo 'second command works'",
		Hooks:    hooks,
	}))
}

func TestBashSession_execReplacesShell(t *testing.T) {
	session := newBashSession("")
	t.Cleanup(func() { _ = session.close() })

	require.NoError(t, session.start())

	var stdoutLines []string
	hooks := ExecuteResultHook{
		OnExecuteStdout: func(line string) {
			stdoutLines = append(stdoutLines, line)
		},
	}

	script := `
cat > /tmp/exec_child.sh <<'EOF'
echo "child says hi"
EOF
chmod +x /tmp/exec_child.sh
exec /tmp/exec_child.sh
`

	request := &ExecuteCodeRequest{
		Code:    script,
		Hooks:   hooks,
		Timeout: 5 * time.Second,
	}
	require.NoError(t, session.run(context.Background(), request), "expected exec to complete without killing the session")
	require.True(t, containsLine(stdoutLines, "child says hi"), "expected child output, got %v", stdoutLines)

	// Subsequent run should still work because we restart bash per run.
	request = &ExecuteCodeRequest{
		Code:    "echo still-alive",
		Hooks:   hooks,
		Timeout: 2 * time.Second,
	}
	stdoutLines = nil
	require.NoError(t, session.run(context.Background(), request), "expected run to succeed after exec replaced the shell")
	require.True(t, containsLine(stdoutLines, "still-alive"), "expected follow-up output, got %v", stdoutLines)
}

func TestBashSession_complexExec(t *testing.T) {
	session := newBashSession("")
	t.Cleanup(func() { _ = session.close() })

	require.NoError(t, session.start())

	var stdoutLines []string
	hooks := ExecuteResultHook{
		OnExecuteStdout: func(line string) {
			stdoutLines = append(stdoutLines, line)
		},
	}

	script := `
LOG_FILE=$(mktemp)
export LOG_FILE
exec 3>&1 4>&2
exec > >(tee "$LOG_FILE") 2>&1

set -x
echo "from-complex-exec"
exec 1>&3 2>&4 # step record
echo "after-restore"
`

	request := &ExecuteCodeRequest{
		Code:    script,
		Hooks:   hooks,
		Timeout: 5 * time.Second,
	}
	require.NoError(t, session.run(context.Background(), request), "expected complex exec to finish")
	require.True(t, containsLine(stdoutLines, "from-complex-exec") && containsLine(stdoutLines, "after-restore"), "expected exec outputs, got %v", stdoutLines)

	// Session should still be usable.
	request = &ExecuteCodeRequest{
		Code:    "echo still-alive",
		Hooks:   hooks,
		Timeout: 2 * time.Second,
	}
	stdoutLines = nil
	require.NoError(t, session.run(context.Background(), request), "expected run to succeed after complex exec")
	require.True(t, containsLine(stdoutLines, "still-alive"), "expected follow-up output, got %v", stdoutLines)
}

func containsLine(lines []string, target string) bool {
	for _, l := range lines {
		if strings.TrimSpace(l) == target {
			return true
		}
	}
	return false
}

// TestBashSession_CloseKillsRunningProcess verifies that session.close() kills the active
// process group so that a long-running command (e.g. sleep) does not keep running after close.
func TestBashSession_CloseKillsRunningProcess(t *testing.T) {
	if _, err := exec.LookPath("bash"); err != nil {
		t.Skip("bash not found in PATH")
	}

	session := newBashSession("")
	require.NoError(t, session.start())

	runDone := make(chan error, 1)
	req := &ExecuteCodeRequest{
		Code:    "sleep 30",
		Timeout: 60 * time.Second,
		Hooks:   ExecuteResultHook{},
	}
	go func() {
		runDone <- session.run(context.Background(), req)
	}()

	// Give the child process time to start.
	time.Sleep(200 * time.Millisecond)

	// Close should kill the process group; run() should return soon (it may return nil
	// because the code path treats non-zero exit as success after calling OnExecuteError).
	require.NoError(t, session.close())

	select {
	case <-runDone:
		// run() returned; process was killed so we did not wait 30s
	case <-time.After(3 * time.Second):
		require.Fail(t, "run did not return within 3s after close (process was not killed)")
	}
}

// TestBashSession_DeleteBashSessionKillsRunningProcess verifies that DeleteBashSession
// (close path) kills the active run and removes the session from the controller.
func TestBashSession_DeleteBashSessionKillsRunningProcess(t *testing.T) {
	if _, err := exec.LookPath("bash"); err != nil {
		t.Skip("bash not found in PATH")
	}

	c := NewController("", "")
	sessionID, err := c.CreateBashSession(&CreateContextRequest{})
	require.NoError(t, err)

	runDone := make(chan error, 1)
	req := &ExecuteCodeRequest{
		Language: Bash,
		Context:  sessionID,
		Code:     "sleep 30",
		Timeout:  60 * time.Second,
		Hooks:    ExecuteResultHook{},
	}
	go func() {
		runDone <- c.RunInBashSession(context.Background(), req)
	}()

	time.Sleep(200 * time.Millisecond)

	require.NoError(t, c.DeleteBashSession(sessionID))

	select {
	case <-runDone:
		// RunInBashSession returned; process was killed
	case <-time.After(3 * time.Second):
		require.Fail(t, "RunInBashSession did not return within 3s after DeleteBashSession")
	}

	// Session should be gone; deleting again should return ErrContextNotFound.
	err = c.DeleteBashSession(sessionID)
	require.Error(t, err)
	require.ErrorIs(t, err, ErrContextNotFound)
}

// TestBashSession_CloseWithNoActiveRun verifies that close() with no running command
// completes without error and does not hang.
func TestBashSession_CloseWithNoActiveRun(t *testing.T) {
	session := newBashSession("")
	require.NoError(t, session.start())

	done := make(chan struct{}, 1)
	go func() {
		_ = session.close()
		done <- struct{}{}
	}()

	select {
	case <-done:
		// close() returned
	case <-time.After(2 * time.Second):
		require.Fail(t, "close() did not return within 2s when no run was active")
	}
}


================================================
FILE: components/execd/pkg/runtime/bash_session_windows.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build windows
// +build windows

package runtime

import (
	"context"
	"errors"
)

var errBashSessionNotSupported = errors.New("bash session is not supported on windows")

// CreateBashSession is not supported on Windows.
func (c *Controller) CreateBashSession(_ *CreateContextRequest) (string, error) { //nolint:revive
	return "", errBashSessionNotSupported
}

// RunInBashSession is not supported on Windows.
func (c *Controller) RunInBashSession(_ context.Context, _ *ExecuteCodeRequest) error { //nolint:revive
	return errBashSessionNotSupported
}

// DeleteBashSession is not supported on Windows.
func (c *Controller) DeleteBashSession(_ string) error { //nolint:revive
	return errBashSessionNotSupported
}


================================================
FILE: components/execd/pkg/runtime/command.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package runtime

import (
	"context"
	"errors"
	"fmt"
	"os"
	"os/exec"
	"os/signal"
	"os/user"
	"strconv"
	"sync"
	"syscall"
	"time"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
	"github.com/alibaba/opensandbox/execd/pkg/log"
	"github.com/alibaba/opensandbox/execd/pkg/util/safego"
)

// getShell returns the preferred shell, falling back to sh if bash is not available.
// This is needed for Alpine-based Docker images that only have sh by default.
func getShell() string {
	if _, err := exec.LookPath("bash"); err == nil {
		return "bash"
	}
	return "sh"
}

func buildCredential(uid, gid *uint32) (*syscall.Credential, error) {
	if uid == nil && gid == nil {
		return nil, nil //nolint:nilnil
	}

	cred := &syscall.Credential{}
	if uid != nil {
		cred.Uid = *uid
		// Load user info to get primary GID and supplemental groups
		u, err := user.LookupId(strconv.FormatUint(uint64(*uid), 10))
		if err == nil {
			// Set primary GID if not explicitly provided
			if gid == nil {
				primaryGid, err := strconv.ParseUint(u.Gid, 10, 32)
				if err == nil {
					cred.Gid = uint32(primaryGid)
				}
			}

			// Load supplemental groups
			gids, err := u.GroupIds()
			if err == nil {
				for _, g := range gids {
					id, err := strconv.ParseUint(g, 10, 32)
					if err == nil {
						cred.Groups = append(cred.Groups, uint32(id))
					}
				}
			}
		}
	}

	// Override Gid if explicitly provided
	if gid != nil {
		cred.Gid = *gid
	}

	return cred, nil
}

// runCommand executes shell commands and streams their output.
func (c *Controller) runCommand(ctx context.Context, request *ExecuteCodeRequest) error {
	session := c.newContextID()

	signals := make(chan os.Signal, 1)
	defer close(signals)
	signal.Notify(signals)
	defer signal.Reset()

	stdout, stderr, err := c.stdLogDescriptor(session)
	if err != nil {
		return fmt.Errorf("failed to get stdlog descriptor: %w", err)
	}
	defer stdout.Close()
	defer stderr.Close()
	stdoutPath := c.stdoutFileName(session)
	stderrPath := c.stderrFileName(session)

	startAt := time.Now()
	log.Info("received command: %v", request.Code)
	shell := getShell()
	cmd := exec.CommandContext(ctx, shell, "-c", request.Code)

	// Configure credentials and process group
	cred, err := buildCredential(request.Uid, request.Gid)
	if err != nil {
		return fmt.Errorf("failed to build credential: %w", err)
	}
	cmd.SysProcAttr = &syscall.SysProcAttr{
		Setpgid:    true,
		Credential: cred,
	}

	cmd.Stdout = stdout
	cmd.Stderr = stderr
	extraEnv := mergeExtraEnvs(loadExtraEnvFromFile(), request.Envs)
	cmd.Env = mergeEnvs(os.Environ(), extraEnv)
	cmd.Dir = request.Cwd

	done := make(chan struct{}, 1)
	var wg sync.WaitGroup
	wg.Add(2)
	safego.Go(func() {
		defer wg.Done()
		c.tailStdPipe(stdoutPath, request.Hooks.OnExecuteStdout, done)
	})
	safego.Go(func() {
		defer wg.Done()
		c.tailStdPipe(stderrPath, request.Hooks.OnExecuteStderr, done)
	})

	err = cmd.Start()
	if err != nil {
		close(done)
		wg.Wait()
		request.Hooks.OnExecuteInit(session)
		request.Hooks.OnExecuteError(&execute.ErrorOutput{EName: "CommandExecError", EValue: err.Error()})
		log.Error("CommandExecError: error starting commands: %v", err)
		return nil
	}

	kernel := &commandKernel{
		pid:          cmd.Process.Pid,
		stdoutPath:   stdoutPath,
		stderrPath:   stderrPath,
		startedAt:    startAt,
		running:      true,
		content:      request.Code,
		isBackground: false,
	}
	c.storeCommandKernel(session, kernel)
	request.Hooks.OnExecuteInit(session)

	go func() {
		for {
			select {
			case <-ctx.Done():
				return
			case sig := <-signals:
				if sig == nil {
					continue
				}
				// DO NOT forward syscall.SIGURG to children processes.
				if sig != syscall.SIGCHLD && sig != syscall.SIGURG {
					_ = syscall.Kill(-cmd.Process.Pid, sig.(syscall.Signal))
				}
			}
		}
	}()

	err = cmd.Wait()
	close(done)
	wg.Wait()
	if err != nil {
		var eName, eValue string
		var eCode int
		var traceback []string

		var exitError *exec.ExitError
		if errors.As(err, &exitError) {
			exitCode := exitError.ExitCode()
			eName = "CommandExecError"
			eValue = strconv.Itoa(exitCode)
			eCode = exitCode
		} else {
			eName = "CommandExecError"
			eValue = err.Error()
			eCode = 1
		}
		traceback = []string{err.Error()}

		request.Hooks.OnExecuteError(&execute.ErrorOutput{
			EName:     eName,
			EValue:    eValue,
			Traceback: traceback,
		})

		log.Error("CommandExecError: error running commands: %v", err)
		c.markCommandFinished(session, eCode, err.Error())
		return nil
	}

	c.markCommandFinished(session, 0, "")
	request.Hooks.OnExecuteComplete(time.Since(startAt))
	return nil
}

// runBackgroundCommand executes shell commands in detached mode.
func (c *Controller) runBackgroundCommand(ctx context.Context, cancel context.CancelFunc, request *ExecuteCodeRequest) error {
	session := c.newContextID()
	request.Hooks.OnExecuteInit(session)

	pipe, err := c.combinedOutputDescriptor(session)
	if err != nil {
		cancel()
		return fmt.Errorf("failed to get combined output descriptor: %w", err)
	}
	stdoutPath := c.combinedOutputFileName(session)
	stderrPath := c.combinedOutputFileName(session)

	signals := make(chan os.Signal, 1)
	defer close(signals)
	signal.Notify(signals)
	defer signal.Reset()

	startAt := time.Now()
	log.Info("received command: %v", request.Code)
	shell := getShell()
	cmd := exec.CommandContext(ctx, shell, "-c", request.Code)
	cmd.Dir = request.Cwd
	// Configure credentials and process group
	cred, err := buildCredential(request.Uid, request.Gid)
	if err != nil {
		log.Error("failed to build credentials: %v", err)
	}
	cmd.SysProcAttr = &syscall.SysProcAttr{
		Setpgid:    true,
		Credential: cred,
	}

	cmd.Stdout = pipe
	cmd.Stderr = pipe
	extraEnv := mergeExtraEnvs(loadExtraEnvFromFile(), request.Envs)
	cmd.Env = mergeEnvs(os.Environ(), extraEnv)

	// use DevNull as stdin so interactive programs exit immediately.
	devNull, err := os.Open(os.DevNull)
	if err == nil {
		cmd.Stdin = devNull
		defer devNull.Close()
	}

	err = cmd.Start()
	kernel := &commandKernel{
		pid:          -1,
		stdoutPath:   stdoutPath,
		stderrPath:   stderrPath,
		startedAt:    startAt,
		running:      true,
		content:      request.Code,
		isBackground: true,
	}
	if err != nil {
		cancel()
		log.Error("CommandExecError: error starting commands: %v", err)
		kernel.running = false
		c.storeCommandKernel(session, kernel)
		c.markCommandFinished(session, 255, err.Error())
		return fmt.Errorf("failed to start commands: %w", err)
	}

	safego.Go(func() {
		defer pipe.Close()

		kernel.running = true
		kernel.pid = cmd.Process.Pid
		c.storeCommandKernel(session, kernel)

		err = cmd.Wait()
		cancel()
		if err != nil {
			log.Error("CommandExecError: error running commands: %v", err)
			exitCode := 1
			var exitError *exec.ExitError
			if errors.As(err, &exitError) {
				exitCode = exitError.ExitCode()
			}
			c.markCommandFinished(session, exitCode, err.Error())
			return
		}
		c.markCommandFinished(session, 0, "")
	})

	// ensure we kill the whole process group if the context is cancelled (e.g., timeout).
	safego.Go(func() {
		<-ctx.Done()
		if cmd.Process != nil {
			_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) // best-effort
		}
	})

	request.Hooks.OnExecuteComplete(time.Since(startAt))
	return nil
}


================================================
FILE: components/execd/pkg/runtime/command_common.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"bufio"
	"bytes"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"sync"
	"time"
)

// tailStdPipe streams appended log data until the process finishes.
func (c *Controller) tailStdPipe(file string, onExecute func(text string), done <-chan struct{}) {
	lastPos := int64(0)
	ticker := time.NewTicker(100 * time.Millisecond)
	defer ticker.Stop()

	mutex := &sync.Mutex{}
	for {
		select {
		case <-done:
			c.readFromPos(mutex, file, lastPos, onExecute, true)
			return
		case <-ticker.C:
			newPos := c.readFromPos(mutex, file, lastPos, onExecute, false)
			lastPos = newPos
		}
	}
}

// getCommandKernel retrieves a command execution context.
func (c *Controller) getCommandKernel(sessionID string) *commandKernel {
	if v, ok := c.commandClientMap.Load(sessionID); ok {
		if kernel, ok := v.(*commandKernel); ok {
			return kernel
		}
	}
	return nil
}

// storeCommandKernel registers a command execution context.
func (c *Controller) storeCommandKernel(sessionID string, kernel *commandKernel) {
	c.commandClientMap.Store(sessionID, kernel)
}

// stdLogDescriptor creates temporary files for capturing command output.
// It ensures the temp directory exists before opening files, so that commands
// continue to work even after the /tmp directory has been removed and recreated.
func (c *Controller) stdLogDescriptor(session string) (io.WriteCloser, io.WriteCloser, error) {
	logDir := os.TempDir()
	if err := os.MkdirAll(logDir, 0o755); err != nil {
		return nil, nil, fmt.Errorf("failed to create temp dir %s: %w", logDir, err)
	}

	stdout, err := os.OpenFile(c.stdoutFileName(session), os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.ModePerm)
	if err != nil {
		return nil, nil, err
	}
	stderr, err := os.OpenFile(c.stderrFileName(session), os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.ModePerm)
	if err != nil {
		stdout.Close()
		return nil, nil, err
	}

	return stdout, stderr, nil
}

func (c *Controller) combinedOutputDescriptor(session string) (io.WriteCloser, error) {
	logDir := os.TempDir()
	if err := os.MkdirAll(logDir, 0o755); err != nil {
		return nil, fmt.Errorf("failed to create temp dir %s: %w", logDir, err)
	}
	return os.OpenFile(c.combinedOutputFileName(session), os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.ModePerm)
}

// stdoutFileName constructs the stdout log path.
func (c *Controller) stdoutFileName(session string) string {
	return filepath.Join(os.TempDir(), session+".stdout")
}

// stderrFileName constructs the stderr log path.
func (c *Controller) stderrFileName(session string) string {
	return filepath.Join(os.TempDir(), session+".stderr")
}

func (c *Controller) combinedOutputFileName(session string) string {
	return filepath.Join(os.TempDir(), session+".output")
}

// readFromPos streams new content from a file starting at startPos.
func (c *Controller) readFromPos(mutex *sync.Mutex, filepath string, startPos int64, onExecute func(string), flushIncomplete bool) int64 {
	if !mutex.TryLock() {
		return -1
	}
	defer mutex.Unlock()

	file, err := os.Open(filepath)
	if err != nil {
		return startPos
	}
	defer file.Close()

	_, _ = file.Seek(startPos, 0) //nolint:errcheck

	reader := bufio.NewReader(file)
	var buffer bytes.Buffer
	var currentPos int64 = startPos

	for {
		b, err := reader.ReadByte()
		if err != nil {
			if err == io.EOF {
				// If buffer has content but no newline, flush if needed, otherwise wait for next read
				if flushIncomplete && buffer.Len() > 0 {
					onExecute(buffer.String())
					buffer.Reset()
				}
			}
			break
		}
		currentPos++

		// Check if it's a line terminator (\n or \r)
		if b == '\n' || b == '\r' {
			// If buffer has content, output this line
			if buffer.Len() > 0 {
				onExecute(buffer.String())
				buffer.Reset()
			}
			// Skip line terminator
			continue
		}

		buffer.WriteByte(b)
	}

	endPos, _ := file.Seek(0, 1)
	// If the last read position doesn't end with a newline, return buffer start position and wait for next flush
	if !flushIncomplete && buffer.Len() > 0 {
		return currentPos - int64(buffer.Len())
	}
	return endPos
}


================================================
FILE: components/execd/pkg/runtime/command_status.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"fmt"
	"io"
	"os"
	"time"
)

// CommandStatus describes the lifecycle state of a command.
type CommandStatus struct {
	Session    string     `json:"session"`
	Running    bool       `json:"running"`
	ExitCode   *int       `json:"exit_code,omitempty"`
	Error      string     `json:"error,omitempty"`
	StartedAt  time.Time  `json:"started_at,omitempty"`
	FinishedAt *time.Time `json:"finished_at,omitempty"`
	Content    string     `json:"content,omitempty"`
}

// CommandOutput contains non-streamed stdout/stderr plus status.
type CommandOutput struct {
	CommandStatus
	Stdout string `json:"stdout"`
	Stderr string `json:"stderr"`
}

func (c *Controller) commandSnapshot(session string) *commandKernel {
	var kernel *commandKernel
	if v, ok := c.commandClientMap.Load(session); ok {
		kernel, _ = v.(*commandKernel)
	}
	if kernel == nil {
		return nil
	}

	cp := *kernel
	return &cp
}

// GetCommandStatus returns the execution status for a command session.
func (c *Controller) GetCommandStatus(session string) (*CommandStatus, error) {
	kernel := c.commandSnapshot(session)
	if kernel == nil {
		return nil, fmt.Errorf("command not found: %s", session)
	}

	status := &CommandStatus{
		Session:    session,
		Running:    kernel.running,
		ExitCode:   kernel.exitCode,
		Error:      kernel.errMsg,
		StartedAt:  kernel.startedAt,
		FinishedAt: kernel.finishedAt,
		Content:    kernel.content,
	}
	return status, nil
}

// SeekBackgroundCommandOutput returns accumulated stdout/stderr and status for a session.
func (c *Controller) SeekBackgroundCommandOutput(session string, cursor int64) ([]byte, int64, error) {
	kernel := c.commandSnapshot(session)
	if kernel == nil {
		return nil, -1, fmt.Errorf("command not found: %s", session)
	}

	if !kernel.isBackground {
		return nil, -1, fmt.Errorf("command %s is not running in background", session)
	}

	file, err := os.Open(kernel.stdoutPath)
	if err != nil {
		return nil, -1, fmt.Errorf("error open combined output file for command %s: %w", session, err)
	}
	defer file.Close()

	// Seek to the cursor position
	_, err = file.Seek(cursor, 0)
	if err != nil {
		return nil, -1, fmt.Errorf("error seek file: %w", err)
	}

	// Read all content from cursor to end
	data, err := io.ReadAll(file)
	if err != nil {
		return nil, -1, fmt.Errorf("error read file: %w", err)
	}

	// Get current file position (end of file)
	currentPos, err := file.Seek(0, 1)
	if err != nil {
		return nil, -1, fmt.Errorf("error get current position: %w", err)
	}

	return data, currentPos, nil
}

// markCommandFinished updates bookkeeping when a command exits.
func (c *Controller) markCommandFinished(session string, exitCode int, errMsg string) {
	now := time.Now()

	c.mu.Lock()
	defer c.mu.Unlock()

	var kernel *commandKernel
	if v, ok := c.commandClientMap.Load(session); ok {
		kernel, _ = v.(*commandKernel)
	}
	if kernel == nil {
		return
	}

	kernel.exitCode = &exitCode
	kernel.errMsg = errMsg
	kernel.running = false
	kernel.finishedAt = &now
}


================================================
FILE: components/execd/pkg/runtime/command_status_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"context"
	"os"
	"path/filepath"
	"strings"
	"testing"
	"time"

	"github.com/stretchr/testify/require"
)

func TestGetCommandStatus_NotFound(t *testing.T) {
	c := NewController("", "")

	_, err := c.GetCommandStatus("missing")
	require.Error(t, err, "expected error for missing session")
}

func TestGetCommandStatus_Running(t *testing.T) {
	c := NewController("", "")

	var session string
	req := &ExecuteCodeRequest{
		Language: BackgroundCommand,
		Code:     "sleep 2",
		Hooks: ExecuteResultHook{
			OnExecuteInit:     func(id string) { session = id },
			OnExecuteComplete: func(time.Duration) {},
		},
	}

	ctx, cancel := context.WithCancel(context.Background())
	require.NoError(t, c.runBackgroundCommand(ctx, cancel, req))
	require.NotEmpty(t, session, "session should be set by OnExecuteInit")

	// Poll until status is registered (runBackgroundCommand stores kernel asynchronously).
	deadline := time.Now().Add(5 * time.Second)
	var (
		status *CommandStatus
		err    error
	)
	for time.Now().Before(deadline) {
		status, err = c.GetCommandStatus(session)
		if err == nil {
			break
		}
		if strings.Contains(err.Error(), "not found") {
			time.Sleep(50 * time.Millisecond)
			continue
		}
		require.NoError(t, err, "GetCommandStatus unexpected error")
	}
	require.NoError(t, err, "GetCommandStatus error after retry")

	require.NotNil(t, status)
	require.True(t, status.Running, "expected running=true")
	require.Nil(t, status.ExitCode, "expected exitCode to be nil while running")
	require.Nil(t, status.FinishedAt, "expected finishedAt to be nil while running")
	require.False(t, status.StartedAt.IsZero(), "expected startedAt to be set")
	t.Log(status)
}

func TestSeekBackgroundCommandOutput_Completed(t *testing.T) {
	c := NewController("", "")

	tmpDir := t.TempDir()
	session := "sess-done"
	stdoutPath := filepath.Join(tmpDir, session+".stdout")

	stdoutContent := "hello stdout"
	require.NoError(t, os.WriteFile(stdoutPath, []byte(stdoutContent), 0o644))

	started := time.Now().Add(-2 * time.Second)
	finished := time.Now()
	exitCode := 0
	kernel := &commandKernel{
		pid:          456,
		stdoutPath:   stdoutPath,
		isBackground: true,
		startedAt:    started,
		finishedAt:   &finished,
		exitCode:     &exitCode,
		errMsg:       "",
		running:      false,
	}
	c.storeCommandKernel(session, kernel)

	output, cursor, err := c.SeekBackgroundCommandOutput(session, 0)
	require.NoError(t, err, "GetCommandOutput error")

	require.Greater(t, cursor, int64(0), "expected cursor>=0")
	require.Equal(t, stdoutContent, string(output))
}

func TestSeekBackgroundCommandOutput_WithRunBackgroundCommand(t *testing.T) {
	c := NewController("", "")

	expected := "line1\nline2\n"
	var session string
	req := &ExecuteCodeRequest{
		Language: BackgroundCommand,
		Code:     "printf 'line1\nline2\n'",
		Hooks: ExecuteResultHook{
			OnExecuteInit:     func(id string) { session = id },
			OnExecuteComplete: func(executionTime time.Duration) {},
			// other hooks unused in this test
		},
	}

	ctx, cancel := context.WithCancel(context.Background())
	require.NoError(t, c.runBackgroundCommand(ctx, cancel, req))
	require.NotEmpty(t, session, "session should be set by OnExecuteInit")

	var (
		output []byte
		cursor int64
		err    error
	)

	deadline := time.Now().Add(5 * time.Second)
	for time.Now().Before(deadline) {
		output, cursor, err = c.SeekBackgroundCommandOutput(session, 0)
		if err == nil && len(output) > 0 {
			break
		}
		time.Sleep(100 * time.Millisecond)
	}
	require.NoError(t, err, "SeekBackgroundCommandOutput error")
	require.Equal(t, expected, string(output))
	require.GreaterOrEqual(t, cursor, int64(len(expected)), "cursor should advance to end of file")

	// incremental seek from current cursor should return empty data and same-or-higher cursor
	output2, cursor2, err := c.SeekBackgroundCommandOutput(session, cursor)
	require.NoError(t, err, "SeekBackgroundCommandOutput (second call) error")
	require.Empty(t, output2, "expected no new output")
	require.GreaterOrEqual(t, cursor2, cursor, "cursor should not move backwards")
}


================================================
FILE: components/execd/pkg/runtime/command_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"context"
	"os"
	"os/exec"
	"path/filepath"
	"strings"
	"sync"
	"testing"
	"time"

	goruntime "runtime"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

func TestReadFromPos_SplitsOnCRAndLF(t *testing.T) {
	tmp := t.TempDir()
	logFile := filepath.Join(tmp, "stdout.log")

	mutex := &sync.Mutex{}

	initial := "line1\nprog 10%\rprog 20%\rprog 30%\nlast\n"
	require.NoError(t, os.WriteFile(logFile, []byte(initial), 0o644))

	var got []string
	c := &Controller{}
	nextPos := c.readFromPos(mutex, logFile, 0, func(s string) { got = append(got, s) }, false)

	want := []string{"line1", "prog 10%", "prog 20%", "prog 30%", "last"}
	require.Len(t, got, len(want))
	for i := range want {
		require.Equal(t, want[i], got[i], "token[%d] mismatch", i)
	}

	// append more content and ensure incremental read only yields the new part
	appendPart := "tail1\r\ntail2\n"
	f, err := os.OpenFile(logFile, os.O_APPEND|os.O_WRONLY, 0o644)
	require.NoError(t, err)
	_, err = f.WriteString(appendPart)
	require.NoError(t, err, "append write")
	_ = f.Close()

	got = got[:0]
	c.readFromPos(mutex, logFile, nextPos, func(s string) { got = append(got, s) }, false)
	want = []string{"tail1", "tail2"}
	require.Len(t, got, len(want))
	for i := range want {
		require.Equal(t, want[i], got[i], "incremental token[%d] mismatch", i)
	}
}

func TestReadFromPos_LongLine(t *testing.T) {
	tmp := t.TempDir()
	logFile := filepath.Join(tmp, "stdout.log")

	// construct a single line larger than the default 64KB, but under 5MB
	longLine := strings.Repeat("x", 256*1024) + "\n" // 256KB
	require.NoError(t, os.WriteFile(logFile, []byte(longLine), 0o644))

	var got []string
	c := &Controller{}
	c.readFromPos(&sync.Mutex{}, logFile, 0, func(s string) { got = append(got, s) }, false)

	require.Len(t, got, 1, "expected one token")
	require.Equal(t, strings.TrimSuffix(longLine, "\n"), got[0], "long line mismatch")
}

func TestReadFromPos_FlushesTrailingLine(t *testing.T) {
	tmpDir := t.TempDir()
	file := filepath.Join(tmpDir, "stdout.log")
	content := []byte("line1\nlastline-without-newline")
	err := os.WriteFile(file, content, 0o644)
	assert.NoError(t, err)

	c := NewController("", "")
	mutex := &sync.Mutex{}
	var lines []string
	onExecute := func(text string) {
		lines = append(lines, text)
	}

	// First read: should only get complete lines with newlines
	pos := c.readFromPos(mutex, file, 0, onExecute, false)
	assert.GreaterOrEqual(t, pos, int64(0))
	assert.Equal(t, []string{"line1"}, lines)

	// Flush at end: should output the last line (without newline)
	c.readFromPos(mutex, file, pos, onExecute, true)
	assert.Equal(t, []string{"line1", "lastline-without-newline"}, lines)
}

func TestRunCommand_Echo(t *testing.T) {
	if goruntime.GOOS == "windows" {
		t.Skip("bash not available on windows")
	}
	if _, err := exec.LookPath("bash"); err != nil {
		t.Skip("bash not found in PATH")
	}

	c := NewController("", "")

	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	var (
		sessionID   string
		stdoutLines []string
		stderrLines []string
		completeCh  = make(chan struct{}, 1)
	)

	req := &ExecuteCodeRequest{
		Code:    `echo "hello"; echo "errline" 1>&2`,
		Cwd:     t.TempDir(),
		Timeout: 5 * time.Second,
		Hooks: ExecuteResultHook{
			OnExecuteInit: func(s string) { sessionID = s },
			OnExecuteStdout: func(s string) {
				stdoutLines = append(stdoutLines, s)
			},
			OnExecuteStderr: func(s string) {
				stderrLines = append(stderrLines, s)
			},
			OnExecuteError: func(err *execute.ErrorOutput) {
				require.Failf(t, "unexpected error hook", "%+v", err)
			},
			OnExecuteComplete: func(_ time.Duration) {
				completeCh <- struct{}{}
			},
		},
	}

	require.NoError(t, c.runCommand(ctx, req))

	select {
	case <-completeCh:
	case <-time.After(2 * time.Second):
		require.Fail(t, "timeout waiting for completion hook")
	}

	require.NotEmpty(t, sessionID, "expected session id to be set")
	require.Equal(t, []string{"hello"}, stdoutLines)
	require.Equal(t, []string{"errline"}, stderrLines)
}

func TestRunCommand_Error(t *testing.T) {
	if goruntime.GOOS == "windows" {
		t.Skip("bash not available on windows")
	}
	if _, err := exec.LookPath("bash"); err != nil {
		t.Skip("bash not found in PATH")
	}

	c := NewController("", "")

	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	var (
		sessionID   string
		gotErr      *execute.ErrorOutput
		completeCh  = make(chan struct{}, 2)
		stdoutLines []string
		stderrLines []string
	)

	req := &ExecuteCodeRequest{
		Code:    `echo "before"; exit 3`,
		Cwd:     t.TempDir(),
		Timeout: 5 * time.Second,
		Hooks: ExecuteResultHook{
			OnExecuteInit:   func(s string) { sessionID = s },
			OnExecuteStdout: func(s string) { stdoutLines = append(stdoutLines, s) },
			OnExecuteStderr: func(s string) { stderrLines = append(stderrLines, s) },
			OnExecuteError: func(err *execute.ErrorOutput) {
				gotErr = err
				completeCh <- struct{}{}
			},
			OnExecuteComplete: func(_ time.Duration) {
				completeCh <- struct{}{}
			},
		},
	}

	require.NoError(t, c.runCommand(ctx, req))

	select {
	case <-completeCh:
	case <-time.After(2 * time.Second):
		require.Fail(t, "timeout waiting for completion hook")
	}

	require.NotEmpty(t, sessionID, "expected session id to be set")
	require.Equal(t, []string{"before"}, stdoutLines)
	require.Empty(t, stderrLines, "expected no stderr")
	require.NotNil(t, gotErr, "expected error hook to be called")
	require.Equal(t, "CommandExecError", gotErr.EName)
	require.Equal(t, "3", gotErr.EValue)
}

// TestStdLogDescriptor_AutoCreatesTempDir verifies that stdLogDescriptor
// recreates the temp directory when it has been deleted, rather than failing.
// Regression test for https://github.com/alibaba/OpenSandbox/issues/400.
func TestStdLogDescriptor_AutoCreatesTempDir(t *testing.T) {
	if goruntime.GOOS == "windows" {
		t.Skip("TMPDIR env var has no effect on Windows")
	}

	// Point os.TempDir() at a path that does not yet exist.
	missingDir := filepath.Join(t.TempDir(), "deleted_tmp")
	t.Setenv("TMPDIR", missingDir)

	c := NewController("", "")
	stdout, stderr, err := c.stdLogDescriptor("test-session")
	require.NoError(t, err)
	stdout.Close()
	stderr.Close()

	// The directory must have been created.
	info, err := os.Stat(missingDir)
	require.NoError(t, err, "expected temp dir to be created, stat error")
	require.True(t, info.IsDir(), "expected %s to be a directory", missingDir)
}

// TestCombinedOutputDescriptor_AutoCreatesTempDir verifies that
// combinedOutputDescriptor also recreates the temp directory when missing.
// Regression test for https://github.com/alibaba/OpenSandbox/issues/400.
func TestCombinedOutputDescriptor_AutoCreatesTempDir(t *testing.T) {
	if goruntime.GOOS == "windows" {
		t.Skip("TMPDIR env var has no effect on Windows")
	}

	missingDir := filepath.Join(t.TempDir(), "deleted_tmp")
	t.Setenv("TMPDIR", missingDir)

	c := NewController("", "")
	f, err := c.combinedOutputDescriptor("test-session")
	require.NoError(t, err)
	f.Close()

	info, err := os.Stat(missingDir)
	require.NoError(t, err, "expected temp dir to be created, stat error")
	require.True(t, info.IsDir(), "expected %s to be a directory", missingDir)
}


================================================
FILE: components/execd/pkg/runtime/command_windows.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build windows
// +build windows

package runtime

import (
	"context"
	"errors"
	"fmt"
	"os"
	"os/exec"
	"strconv"
	"time"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
	"github.com/alibaba/opensandbox/execd/pkg/log"
	"github.com/alibaba/opensandbox/execd/pkg/util/safego"
)

// runCommand executes shell commands and streams their output on Windows.
func (c *Controller) runCommand(ctx context.Context, request *ExecuteCodeRequest) error {
	session := c.newContextID()
	request.Hooks.OnExecuteInit(session)

	stdout, stderr, err := c.stdLogDescriptor(session)
	if err != nil {
		return fmt.Errorf("failed to get stdlog descriptor: %w", err)
	}

	startAt := time.Now()
	log.Info("received command: %v", request.Code)
	cmd := exec.CommandContext(ctx, "cmd", "/C", request.Code)

	cmd.Stdout = stdout
	cmd.Stderr = stderr
	cmd.Dir = request.Cwd
	extraEnv := mergeExtraEnvs(loadExtraEnvFromFile(), request.Envs)
	cmd.Env = mergeEnvs(os.Environ(), extraEnv)

	done := make(chan struct{}, 1)
	safego.Go(func() {
		c.tailStdPipe(c.stdoutFileName(session), request.Hooks.OnExecuteStdout, done)
	})
	safego.Go(func() {
		c.tailStdPipe(c.stderrFileName(session), request.Hooks.OnExecuteStderr, done)
	})

	err = cmd.Start()
	if err != nil {
		request.Hooks.OnExecuteError(&execute.ErrorOutput{EName: "CommandExecError", EValue: err.Error()})
		log.Error("CommandExecError: error starting commands: %v", err)
		return nil
	}

	kernel := &commandKernel{
		pid:          cmd.Process.Pid,
		content:      request.Code,
		isBackground: false,
	}
	c.storeCommandKernel(session, kernel)

	err = cmd.Wait()
	close(done)
	if err != nil {
		var eName, eValue string
		var traceback []string

		var exitError *exec.ExitError
		if errors.As(err, &exitError) {
			exitCode := exitError.ExitCode()
			eName = "CommandExecError"
			eValue = strconv.Itoa(exitCode)
		} else {
			eName = "CommandExecError"
			eValue = err.Error()
		}
		traceback = []string{err.Error()}

		request.Hooks.OnExecuteError(&execute.ErrorOutput{
			EName:     eName,
			EValue:    eValue,
			Traceback: traceback,
		})

		log.Error("CommandExecError: error running commands: %v", err)
		return nil
	}
	request.Hooks.OnExecuteComplete(time.Since(startAt))
	return nil
}

// runBackgroundCommand executes shell commands in detached mode on Windows.
func (c *Controller) runBackgroundCommand(ctx context.Context, cancel context.CancelFunc, request *ExecuteCodeRequest) error {
	session := c.newContextID()
	request.Hooks.OnExecuteInit(session)

	pipe, err := c.combinedOutputDescriptor(session)
	if err != nil {
		return fmt.Errorf("failed to get combined output descriptor: %w", err)
	}
	stdoutPath := c.combinedOutputFileName(session)
	stderrPath := c.combinedOutputFileName(session)

	startAt := time.Now()
	log.Info("received command: %v", request.Code)
	cmd := exec.CommandContext(ctx, "cmd", "/C", request.Code)

	cmd.Dir = request.Cwd
	cmd.Stdout = pipe
	cmd.Stderr = pipe
	extraEnv := mergeExtraEnvs(loadExtraEnvFromFile(), request.Envs)
	cmd.Env = mergeEnvs(os.Environ(), extraEnv)

	devNull, _ := os.OpenFile(os.DevNull, os.O_RDWR, 0) // best-effort, ignore error
	cmd.Stdin = devNull

	safego.Go(func() {
		err := cmd.Start()
		if err != nil {
			log.Error("CommandExecError: error starting commands: %v", err)
			pipe.Close() // best-effort
			cancel()
			return
		}

		kernel := &commandKernel{
			pid:          cmd.Process.Pid,
			content:      request.Code,
			stdoutPath:   stdoutPath,
			stderrPath:   stderrPath,
			startedAt:    startAt,
			running:      true,
			isBackground: true,
		}
		c.storeCommandKernel(session, kernel)

		safego.Go(func() {
			<-ctx.Done()
			if cmd.Process != nil {
				_ = cmd.Process.Kill() // best-effort
			}
		})

		err = cmd.Wait()
		cancel()
		pipe.Close()    // best-effort
		devNull.Close() // best-effort

		if err != nil {
			log.Error("CommandExecError: error running commands: %v", err)
			exitCode := 1
			var exitError *exec.ExitError
			if errors.As(err, &exitError) {
				exitCode = exitError.ExitCode()
			}
			c.markCommandFinished(session, exitCode, err.Error())
			return
		}
		c.markCommandFinished(session, 0, "")
	})

	request.Hooks.OnExecuteComplete(time.Since(startAt))
	return nil
}


================================================
FILE: components/execd/pkg/runtime/context.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"errors"
	"fmt"
	"net/http"
	"os"
	"path/filepath"
	"strings"

	"github.com/google/uuid"
	"k8s.io/client-go/util/retry"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter"
	jupytersession "github.com/alibaba/opensandbox/execd/pkg/jupyter/session"
	"github.com/alibaba/opensandbox/execd/pkg/log"
)

// CreateContext provisions a kernel-backed session and returns its ID.
// Bash language uses Jupyter kernel like other languages; for pipe-based bash sessions use CreateBashSession (session API).
func (c *Controller) CreateContext(req *CreateContextRequest) (string, error) {
	// Create a new Jupyter session.
	var (
		client  *jupyter.Client
		session *jupytersession.Session
		err     error
	)

	err = retry.OnError(kernelWaitingBackoff, func(err error) bool {
		log.Error("failed to create session, retrying: %v", err)
		return err != nil
	}, func() error {
		client, session, err = c.createJupyterContext(*req)
		return err
	})
	if err != nil {
		return "", err
	}

	kernel := &jupyterKernel{
		kernelID: session.Kernel.ID,
		client:   client,
		language: req.Language,
	}
	c.storeJupyterKernel(session.ID, kernel)

	err = c.setWorkingDir(kernel, req)
	if err != nil {
		return "", fmt.Errorf("failed to setup working dir: %w", err)
	}

	return session.ID, nil
}

func (c *Controller) DeleteContext(session string) error {
	return c.deleteSessionAndCleanup(session)
}

func (c *Controller) GetContext(session string) (CodeContext, error) {
	kernel := c.getJupyterKernel(session)
	if kernel == nil {
		return CodeContext{}, ErrContextNotFound
	}
	return CodeContext{
		ID:       session,
		Language: kernel.language,
	}, nil
}

func (c *Controller) ListContext(language string) ([]CodeContext, error) {
	switch language {
	case Command.String(), BackgroundCommand.String(), SQL.String():
		return nil, fmt.Errorf("unsupported language context operation: %s", language)
	case "":
		return c.listAllContexts()
	default:
		return c.listLanguageContexts(Language(language))
	}
}

func (c *Controller) DeleteLanguageContext(language Language) error {
	contexts, err := c.listLanguageContexts(language)
	if err != nil {
		return err
	}

	seen := make(map[string]struct{})
	for _, context := range contexts {
		if _, ok := seen[context.ID]; ok {
			continue
		}
		seen[context.ID] = struct{}{}

		if err := c.deleteSessionAndCleanup(context.ID); err != nil {
			return fmt.Errorf("error deleting context %s: %w", context.ID, err)
		}
	}
	return nil
}

func (c *Controller) deleteSessionAndCleanup(session string) error {
	if c.getJupyterKernel(session) == nil {
		return ErrContextNotFound
	}
	if err := c.jupyterClient().DeleteSession(session); err != nil {
		return err
	}
	c.jupyterClientMap.Delete(session)
	c.deleteDefaultSessionByID(session)
	return nil
}

func (c *Controller) newContextID() string {
	return strings.ReplaceAll(uuid.New().String(), "-", "")
}

func (c *Controller) newIpynbPath(sessionID, cwd string) (string, error) {
	if cwd != "" {
		err := os.MkdirAll(cwd, os.ModePerm)
		if err != nil {
			return "", err
		}
	}

	return filepath.Join(cwd, fmt.Sprintf("%s.ipynb", sessionID)), nil
}

// createDefaultLanguageJupyterContext prewarms a session for stateless execution.
func (c *Controller) createDefaultLanguageJupyterContext(language Language) error {
	if c.getDefaultLanguageSession(language) != "" {
		return nil
	}

	var (
		client  *jupyter.Client
		session *jupytersession.Session
		err     error
	)
	err = retry.OnError(kernelWaitingBackoff, func(err error) bool {
		log.Error("failed to create context, retrying: %v", err)
		return err != nil
	}, func() error {
		client, session, err = c.createJupyterContext(CreateContextRequest{
			Language: language,
			Cwd:      "",
		})
		return err
	})
	if err != nil {
		return err
	}

	c.setDefaultLanguageSession(language, session.ID)
	c.jupyterClientMap.Store(session.ID, &jupyterKernel{
		kernelID: session.Kernel.ID,
		client:   client,
		language: language,
	})
	return nil
}

// createJupyterContext performs the actual context creation workflow.
func (c *Controller) createJupyterContext(request CreateContextRequest) (*jupyter.Client, *jupytersession.Session, error) {
	client := c.jupyterClient()

	kernel, err := c.searchKernel(client, request.Language)
	if err != nil {
		return nil, nil, err
	}

	sessionID := c.newContextID()
	ipynb, err := c.newIpynbPath(sessionID, request.Cwd)
	if err != nil {
		return nil, nil, err
	}

	jupyterSession, err := client.CreateSession(sessionID, ipynb, kernel)
	if err != nil {
		return nil, nil, err
	}

	kernels, err := client.ListKernels()
	if err != nil {
		return nil, nil, err
	}

	found := false
	for _, k := range kernels {
		if k.ID == jupyterSession.Kernel.ID {
			found = true
			break
		}
	}
	if !found {
		return nil, nil, errors.New("kernel not found")
	}

	return client, jupyterSession, nil
}

// storeJupyterKernel caches a session -> kernel mapping.
func (c *Controller) storeJupyterKernel(sessionID string, kernel *jupyterKernel) {
	c.jupyterClientMap.Store(sessionID, kernel)
}

func (c *Controller) jupyterClient() *jupyter.Client {
	httpClient := &http.Client{
		Transport: &jupyter.AuthTransport{
			Token: c.token,
			Base:  http.DefaultTransport,
		},
	}

	return jupyter.NewClient(c.baseURL,
		jupyter.WithToken(c.token),
		jupyter.WithHTTPClient(httpClient))
}

func (c *Controller) getDefaultLanguageSession(language Language) string {
	if v, ok := c.defaultLanguageSessions.Load(language); ok {
		if session, ok := v.(string); ok {
			return session
		}
	}
	return ""
}

func (c *Controller) setDefaultLanguageSession(language Language, sessionID string) {
	c.defaultLanguageSessions.Store(language, sessionID)
}

func (c *Controller) deleteDefaultSessionByID(sessionID string) {
	c.defaultLanguageSessions.Range(func(key, value any) bool {
		if s, ok := value.(string); ok && s == sessionID {
			c.defaultLanguageSessions.Delete(key)
		}
		return true
	})
}

func (c *Controller) listAllContexts() ([]CodeContext, error) {
	contexts := make([]CodeContext, 0)
	c.jupyterClientMap.Range(func(key, value any) bool {
		session, _ := key.(string)
		if kernel, ok := value.(*jupyterKernel); ok && kernel != nil {
			contexts = append(contexts, CodeContext{ID: session, Language: kernel.language})
		}
		return true
	})

	c.defaultLanguageSessions.Range(func(key, value any) bool {
		lang, _ := key.(Language)
		session, _ := value.(string)
		if session == "" {
			return true
		}
		contexts = append(contexts, CodeContext{ID: session, Language: lang})
		return true
	})

	return contexts, nil
}

func (c *Controller) listLanguageContexts(language Language) ([]CodeContext, error) {
	contexts := make([]CodeContext, 0)
	c.jupyterClientMap.Range(func(key, value any) bool {
		session, _ := key.(string)
		if kernel, ok := value.(*jupyterKernel); ok && kernel != nil && kernel.language == language {
			contexts = append(contexts, CodeContext{ID: session, Language: language})
		}
		return true
	})

	if defaultContext := c.getDefaultLanguageSession(language); defaultContext != "" {
		contexts = append(contexts, CodeContext{ID: defaultContext, Language: language})
	}

	return contexts, nil
}


================================================
FILE: components/execd/pkg/runtime/context_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"net/http"
	"net/http/httptest"
	"os"
	"path/filepath"
	"strings"
	"testing"

	"github.com/stretchr/testify/require"
)

func TestListContextsAndNewIpynbPath(t *testing.T) {
	c := NewController("http://example", "token")
	c.jupyterClientMap.Store("session-python", &jupyterKernel{language: Python})
	c.defaultLanguageSessions.Store(Go, "session-go-default")

	pyContexts, err := c.listLanguageContexts(Python)
	require.NoError(t, err)
	require.Len(t, pyContexts, 1)
	require.Equal(t, "session-python", pyContexts[0].ID)
	require.Equal(t, Python, pyContexts[0].Language)

	allContexts, err := c.listAllContexts()
	require.NoError(t, err)
	require.Len(t, allContexts, 2)

	tmpDir := filepath.Join(t.TempDir(), "nested")
	path, err := c.newIpynbPath("abc123", tmpDir)
	require.NoError(t, err)
	_, statErr := os.Stat(tmpDir)
	require.NoError(t, statErr, "expected directory to be created")
	expected := filepath.Join(tmpDir, "abc123.ipynb")
	require.Equal(t, expected, path)
}

func TestNewContextID_UniqueAndLength(t *testing.T) {
	c := NewController("", "")
	id1 := c.newContextID()
	id2 := c.newContextID()

	require.NotEmpty(t, id1)
	require.NotEmpty(t, id2)
	require.NotEqual(t, id1, id2, "expected unique ids")
	require.Len(t, id1, 32)
	require.Len(t, id2, 32)
}

func TestNewIpynbPath_ErrorWhenCwdIsFile(t *testing.T) {
	c := NewController("", "")
	tmpFile := filepath.Join(t.TempDir(), "file.txt")
	require.NoError(t, os.WriteFile(tmpFile, []byte("x"), 0o644))

	_, err := c.newIpynbPath("abc", tmpFile)
	require.Error(t, err, "expected error when cwd is a file")
}

func TestListContextUnsupportedLanguage(t *testing.T) {
	c := NewController("", "")
	_, err := c.ListContext(Command.String())
	require.Error(t, err, "expected error for command language")
	_, err = c.ListContext(BackgroundCommand.String())
	require.Error(t, err, "expected error for background-command language")
	_, err = c.ListContext(SQL.String())
	require.Error(t, err, "expected error for sql language")
}

func TestDeleteContext_NotFound(t *testing.T) {
	c := NewController("", "")
	err := c.DeleteContext("missing")
	require.Error(t, err, "expected ErrContextNotFound")
	require.ErrorIs(t, err, ErrContextNotFound)
}

func TestGetContext_NotFound(t *testing.T) {
	c := NewController("", "")

	_, err := c.GetContext("missing")
	require.Error(t, err, "expected ErrContextNotFound")
	require.ErrorIs(t, err, ErrContextNotFound)
}

func TestDeleteContext_RemovesCacheOnSuccess(t *testing.T) {
	sessionID := "sess-123"

	// mock jupyter server that accepts DELETE
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		require.Equal(t, http.MethodDelete, r.Method, "unexpected method")
		require.True(t, strings.HasSuffix(r.URL.Path, "/api/sessions/"+sessionID), "unexpected path: %s", r.URL.Path)
		w.WriteHeader(http.StatusNoContent)
	}))
	defer server.Close()

	c := NewController(server.URL, "token")
	c.jupyterClientMap.Store(sessionID, &jupyterKernel{language: Python})
	c.defaultLanguageSessions.Store(Python, sessionID)

	require.NoError(t, c.DeleteContext(sessionID))

	require.Nil(t, c.getJupyterKernel(sessionID), "expected cache to be cleared")
	_, ok := c.defaultLanguageSessions.Load(Python)
	require.False(t, ok, "expected default session entry to be removed")
}

func TestDeleteLanguageContext_RemovesCacheOnSuccess(t *testing.T) {
	lang := Python
	session1 := "sess-1"
	session2 := "sess-2"

	// mock jupyter server to accept two deletes
	deleteCalls := make(map[string]int)
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		require.Equal(t, http.MethodDelete, r.Method, "unexpected method")
		if strings.Contains(r.URL.Path, session1) {
			deleteCalls[session1]++
		} else if strings.Contains(r.URL.Path, session2) {
			deleteCalls[session2]++
		} else {
			require.Failf(t, "unexpected path", "%s", r.URL.Path)
		}
		w.WriteHeader(http.StatusNoContent)
	}))
	defer server.Close()

	c := NewController(server.URL, "token")
	c.jupyterClientMap.Store(session1, &jupyterKernel{language: lang})
	c.jupyterClientMap.Store(session2, &jupyterKernel{language: lang})
	c.defaultLanguageSessions.Store(lang, session2)

	require.NoError(t, c.DeleteLanguageContext(lang))

	_, ok := c.jupyterClientMap.Load(session1)
	require.False(t, ok, "expected session1 removed from cache")
	_, ok = c.jupyterClientMap.Load(session2)
	require.False(t, ok, "expected session2 removed from cache")
	_, ok = c.defaultLanguageSessions.Load(lang)
	require.False(t, ok, "expected default entry removed")
	require.Equal(t, 1, deleteCalls[session1])
	require.Equal(t, 1, deleteCalls[session2])
}


================================================
FILE: components/execd/pkg/runtime/ctrl.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"context"
	"database/sql"
	"fmt"
	"sync"
	"time"

	"k8s.io/apimachinery/pkg/util/wait"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter"
)

var kernelWaitingBackoff = wait.Backoff{
	Steps:    60,
	Duration: 500 * time.Millisecond,
	Factor:   1.5,
	Jitter:   0.1,
}

// Controller manages code execution across runtimes.
type Controller struct {
	baseURL                 string
	token                   string
	mu                      sync.RWMutex
	jupyterClientMap        sync.Map // map[sessionID]*jupyterKernel
	defaultLanguageSessions sync.Map // map[Language]string
	commandClientMap        sync.Map // map[sessionID]*commandKernel
	bashSessionClientMap    sync.Map // map[sessionID]*bashSession
	db                      *sql.DB
	dbOnce                  sync.Once
}

type jupyterKernel struct {
	mu       sync.Mutex
	kernelID string
	client   *jupyter.Client
	language Language
}

type commandKernel struct {
	pid          int
	stdoutPath   string
	stderrPath   string
	startedAt    time.Time
	finishedAt   *time.Time
	exitCode     *int
	errMsg       string
	running      bool
	isBackground bool
	content      string
}

// NewController creates a runtime controller.
func NewController(baseURL, token string) *Controller {
	return &Controller{
		baseURL: baseURL,
		token:   token,
	}
}

// Execute dispatches a request to the correct backend.
func (c *Controller) Execute(request *ExecuteCodeRequest) error {
	var cancel context.CancelFunc
	var ctx context.Context
	if request.Timeout > 0 {
		ctx, cancel = context.WithTimeout(context.Background(), request.Timeout)
	} else {
		ctx, cancel = context.WithCancel(context.Background())
	}

	switch request.Language {
	case Command:
		defer cancel()
		return c.runCommand(ctx, request)
	case BackgroundCommand:
		return c.runBackgroundCommand(ctx, cancel, request)
	case Bash, Python, Java, JavaScript, TypeScript, Go:
		defer cancel()
		return c.runJupyter(ctx, request)
	case SQL:
		defer cancel()
		return c.runSQL(ctx, request)
	default:
		defer cancel()
		return fmt.Errorf("unknown language: %s", request.Language)
	}
}


================================================
FILE: components/execd/pkg/runtime/env.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"fmt"
	"os"
	"strings"

	"github.com/alibaba/opensandbox/execd/pkg/log"
)

// loadExtraEnvFromFile reads key=value lines from EXECD_ENVS (if set).
// Empty lines and lines starting with '#' are ignored.
func loadExtraEnvFromFile() map[string]string {
	path := os.Getenv("EXECD_ENVS")
	if path == "" {
		return nil
	}

	data, err := os.ReadFile(path)
	if err != nil {
		log.Warn("EXECD_ENVS: failed to read file %s: %v", path, err)
		return nil
	}

	envs := make(map[string]string)
	lines := strings.Split(string(data), "\n")
	for _, line := range lines {
		line = strings.TrimSpace(line)
		if line == "" || strings.HasPrefix(line, "#") {
			continue
		}
		kv := strings.SplitN(line, "=", 2)
		if len(kv) != 2 {
			log.Warn("EXECD_ENVS: skip malformed line: %s", line)
			continue
		}
		envs[kv[0]] = os.ExpandEnv(kv[1])
	}

	return envs
}

// mergeEnvs overlays extra into base and returns a merged slice.
func mergeEnvs(base []string, extra map[string]string) []string {
	if len(extra) == 0 {
		return base
	}

	merged := make(map[string]string, len(base)+len(extra))
	for _, kv := range base {
		pair := strings.SplitN(kv, "=", 2)
		if len(pair) == 2 {
			merged[pair[0]] = pair[1]
		}
	}

	for k, v := range extra {
		merged[k] = v
	}

	out := make([]string, 0, len(merged))
	for k, v := range merged {
		out = append(out, fmt.Sprintf("%s=%s", k, v))
	}

	return out
}

// mergeExtraEnvs merges environment maps from file and request-level overrides.
func mergeExtraEnvs(fromFile, fromRequest map[string]string) map[string]string {
	if len(fromRequest) == 0 {
		return fromFile
	}

	merged := make(map[string]string, len(fromFile)+len(fromRequest))
	for k, v := range fromFile {
		merged[k] = v
	}
	for k, v := range fromRequest {
		merged[k] = v
	}

	return merged
}


================================================
FILE: components/execd/pkg/runtime/env_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"os"
	"path/filepath"
	"strings"
	"testing"

	"github.com/stretchr/testify/require"
)

func TestLoadExtraEnvFromFileUnset(t *testing.T) {
	t.Setenv("EXECD_ENVS", "")
	require.Nil(t, loadExtraEnvFromFile(), "expected nil when EXECD_ENVS unset")
}

func TestLoadExtraEnvFromFileParsesAndExpands(t *testing.T) {
	dir := t.TempDir()
	envFile := filepath.Join(dir, "env")

	t.Setenv("EXECD_ENVS", envFile)
	t.Setenv("BASE_DIR", "/opt/base")

	content := strings.Join([]string{
		"# comment",
		"FOO=bar",
		"PATH=$BASE_DIR/bin",
		"MALFORMED",
		"EMPTY=",
		"",
	}, "\n")

	require.NoError(t, os.WriteFile(envFile, []byte(content), 0o644))

	got := loadExtraEnvFromFile()
	require.Len(t, got, 3)
	require.Equal(t, "bar", got["FOO"])
	require.Equal(t, "/opt/base/bin", got["PATH"])
	val, ok := got["EMPTY"]
	require.True(t, ok)
	require.Equal(t, "", val)
}

func TestLoadExtraEnvFromFileMissingFile(t *testing.T) {
	dir := t.TempDir()
	envFile := filepath.Join(dir, "does-not-exist")
	t.Setenv("EXECD_ENVS", envFile)

	require.Nil(t, loadExtraEnvFromFile(), "expected nil for missing file")
}

func TestMergeEnvsOverlaysExtra(t *testing.T) {
	base := []string{"A=1", "B=2"}
	extra := map[string]string{"B": "override", "C": "3"}

	merged := mergeEnvs(base, extra)
	got := make(map[string]string)
	for _, kv := range merged {
		parts := strings.SplitN(kv, "=", 2)
		if len(parts) == 2 {
			got[parts[0]] = parts[1]
		}
	}

	require.Len(t, got, 3)
	require.Equal(t, "1", got["A"])
	require.Equal(t, "override", got["B"])
	require.Equal(t, "3", got["C"])
}

func TestMergeExtraEnvsMergesAndOverrides(t *testing.T) {
	fromFile := map[string]string{"A": "1", "B": "2"}
	fromRequest := map[string]string{"B": "override", "C": "3"}

	got := mergeExtraEnvs(fromFile, fromRequest)

	require.Len(t, got, 3)
	require.Equal(t, "1", got["A"])
	require.Equal(t, "override", got["B"])
	require.Equal(t, "3", got["C"])
}

func TestMergeExtraEnvsHandlesNilFromFile(t *testing.T) {
	fromRequest := map[string]string{"ONLY": "request"}

	got := mergeExtraEnvs(nil, fromRequest)

	require.Len(t, got, 1)
	require.Equal(t, "request", got["ONLY"])
}


================================================
FILE: components/execd/pkg/runtime/errors.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import "errors"

var ErrContextNotFound = errors.New("context not found")


================================================
FILE: components/execd/pkg/runtime/helpers_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"context"
	"database/sql"
	"database/sql/driver"
	"errors"
	"fmt"
	"io"
	"sync/atomic"
	"testing"
	"time"

	"github.com/stretchr/testify/require"
)

type stubDriver struct {
	columns          []string
	rows             [][]driver.Value
	execRowsAffected int64
	queryErr         error
	execErr          error
	pingErr          error
	execCalled       int32
	queryCalled      int32
}

type stubConn struct {
	d *stubDriver
}

func (c *stubConn) Prepare(string) (driver.Stmt, error) { return nil, errors.New("not implemented") }
func (c *stubConn) Close() error                        { return nil }
func (c *stubConn) Begin() (driver.Tx, error)           { return nil, errors.New("not implemented") }

func (c *stubConn) Ping(context.Context) error {
	return c.d.pingErr
}

func (c *stubConn) ExecContext(_ context.Context, _ string, _ []driver.NamedValue) (driver.Result, error) {
	atomic.AddInt32(&c.d.execCalled, 1)
	if c.d.execErr != nil {
		return nil, c.d.execErr
	}
	return driver.RowsAffected(c.d.execRowsAffected), nil
}

func (c *stubConn) QueryContext(_ context.Context, _ string, _ []driver.NamedValue) (driver.Rows, error) {
	atomic.AddInt32(&c.d.queryCalled, 1)
	if c.d.queryErr != nil {
		return nil, c.d.queryErr
	}
	return &stubRows{
		columns: c.d.columns,
		rows:    c.d.rows,
	}, nil
}

type stubRows struct {
	columns []string
	rows    [][]driver.Value
	idx     int
}

func (r *stubRows) Columns() []string { return r.columns }
func (r *stubRows) Close() error      { return nil }
func (r *stubRows) Next(dest []driver.Value) error {
	if r.idx >= len(r.rows) {
		return io.EOF
	}
	row := r.rows[r.idx]
	r.idx++
	for i, v := range row {
		dest[i] = v
	}
	return nil
}

type stubConnector struct {
	d *stubDriver
}

func (c *stubConnector) Connect(context.Context) (driver.Conn, error) {
	return &stubConn{d: c.d}, nil
}

func (c *stubConnector) Driver() driver.Driver {
	return c
}

func (c *stubConnector) Open(string) (driver.Conn, error) {
	return &stubConn{d: c.d}, nil
}

func newStubDB(t *testing.T, d *stubDriver) *sql.DB {
	t.Helper()
	driverName := fmt.Sprintf("stub-%d", time.Now().UnixNano())
	sql.Register(driverName, &stubConnector{d: d})
	db, err := sql.Open(driverName, "")
	require.NoError(t, err)
	return db
}


================================================
FILE: components/execd/pkg/runtime/interrupt.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package runtime

import (
	"errors"
	"fmt"
	"os"
	"strings"
	"syscall"
	"time"

	"github.com/alibaba/opensandbox/execd/pkg/log"
)

// Interrupt stops execution in the specified session.
func (c *Controller) Interrupt(sessionID string) error {
	switch {
	case c.getJupyterKernel(sessionID) != nil:
		kernel := c.getJupyterKernel(sessionID)
		log.Warning("Interrupting Jupyter kernel %s", kernel.kernelID)
		return kernel.client.InterruptKernel(kernel.kernelID)
	case c.getCommandKernel(sessionID) != nil:
		kernel := c.getCommandKernel(sessionID)
		return c.killPid(kernel.pid)
	case c.getBashSession(sessionID) != nil:
		return c.closeBashSession(sessionID)
	default:
		return errors.New("no such session")
	}
}

// killPid sends SIGTERM followed by SIGKILL if needed.
func (c *Controller) killPid(pid int) error {
	process, err := os.FindProcess(pid)
	if err != nil {
		return err
	}
	log.Warning("Attempting to terminate process %d", pid)

	if err := process.Signal(syscall.SIGTERM); err != nil {
		if strings.Contains(err.Error(), "already finished") {
			return nil
		}
		log.Warning("SIGTERM failed for pid %d: %v, trying SIGKILL", pid, err)
	} else {
		done := make(chan error, 1)
		go func() {
			_, err := process.Wait()
			done <- err
		}()

		select {
		case err := <-done:
			if err == nil {
				log.Info("Process %d terminated gracefully", pid)
				return nil
			}
		case <-time.After(3 * time.Second):
			log.Warning("Process %d did not terminate after SIGTERM, using SIGKILL", pid)
		}
	}

	if err := process.Signal(syscall.SIGKILL); err != nil {
		if strings.Contains(err.Error(), "already finished") {
			return nil
		}
		return fmt.Errorf("failed to kill process %d: %w", pid, err)
	}

	for range 3 {
		if err := process.Signal(syscall.Signal(0)); err != nil {
			if strings.Contains(err.Error(), "already finished") ||
				strings.Contains(err.Error(), "no such process") {
				log.Info("Process %d confirmed terminated", pid)
				return nil
			}
		}
		time.Sleep(50 * time.Millisecond)
	}

	return fmt.Errorf("process %d might still be running", pid)
}


================================================
FILE: components/execd/pkg/runtime/interrupt_windows.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build windows
// +build windows

package runtime

import (
	"errors"
	"fmt"
	"os"
	"time"

	"github.com/alibaba/opensandbox/execd/pkg/log"
)

// Interrupt stops execution in the specified session.
func (c *Controller) Interrupt(sessionID string) error {
	switch {
	case c.getJupyterKernel(sessionID) != nil:
		kernel := c.getJupyterKernel(sessionID)
		log.Warning("Interrupting Jupyter kernel %s", kernel.kernelID)
		return kernel.client.InterruptKernel(kernel.kernelID)
	case c.getCommandKernel(sessionID) != nil:
		kernel := c.getCommandKernel(sessionID)
		return c.killPid(kernel.pid)
	default:
		return errors.New("no such session")
	}
}

// killPid terminates a process on Windows.
func (c *Controller) killPid(pid int) error {
	process, err := os.FindProcess(pid)
	if err != nil {
		return err
	}
	log.Warning("Attempting to terminate process %d", pid)

	if err := process.Kill(); err != nil {
		return fmt.Errorf("failed to kill process %d: %w", pid, err)
	}

	// Best-effort wait to reduce zombies; os.Process.Wait only works for child processes.
	done := make(chan error, 1)
	go func() {
		_, err := process.Wait()
		done <- err
	}()

	select {
	case <-done:
	case <-time.After(3 * time.Second):
		log.Warning("Process %d kill wait timed out", pid)
	}

	return nil
}


================================================
FILE: components/execd/pkg/runtime/jupyter.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"context"
	"errors"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter"
	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
	"github.com/alibaba/opensandbox/execd/pkg/log"
)

// runJupyter executes code through a Jupyter kernel.
func (c *Controller) runJupyter(ctx context.Context, request *ExecuteCodeRequest) error {
	if c.baseURL == "" || c.token == "" {
		return errors.New("language runtime server not configured, please check your image runtime")
	}
	if request.Context == "" {
		if c.getDefaultLanguageSession(request.Language) == "" {
			if err := c.createDefaultLanguageJupyterContext(request.Language); err != nil {
				return err
			}
		}
	}

	var targetSessionID string
	if request.Context == "" {
		targetSessionID = c.getDefaultLanguageSession(request.Language)
	} else {
		targetSessionID = request.Context
	}

	kernel := c.getJupyterKernel(targetSessionID)
	if kernel == nil {
		return ErrContextNotFound
	}

	request.SetDefaultHooks()
	request.Hooks.OnExecuteInit(targetSessionID)

	return c.runJupyterCode(ctx, kernel, request)
}

// runJupyterCode streams execution results for a single kernel.
//
//nolint:gocognit // complex due to hook handling; refactor later
func (c *Controller) runJupyterCode(ctx context.Context, kernel *jupyterKernel, request *ExecuteCodeRequest) error {
	if !kernel.mu.TryLock() {
		return errors.New("session is busy")
	}
	defer kernel.mu.Unlock()

	err := kernel.client.ConnectToKernel(kernel.kernelID)
	if err != nil {
		return err
	}
	defer kernel.client.DisconnectFromKernel(kernel.kernelID)

	results := make(chan *execute.ExecutionResult, 10)

	err = kernel.client.ExecuteCodeStream(kernel.kernelID, request.Code, results)
	if err != nil {
		return err
	}

	for {
		select {
		case result := <-results:
			if result == nil {
				return nil
			}

			if result.ExecutionCount > 0 || len(result.ExecutionData) > 0 {
				request.Hooks.OnExecuteResult(result.ExecutionData, result.ExecutionCount)
			}

			if result.Status != "" {
				request.Hooks.OnExecuteStatus(result.Status)
			}

			if result.ExecutionTime > 0 {
				request.Hooks.OnExecuteComplete(result.ExecutionTime)
			}

			if result.Error != nil {
				request.Hooks.OnExecuteError(result.Error)
			}

			if len(result.Stream) > 0 {
				for _, stream := range result.Stream {
					switch stream.Name {
					case execute.StreamStdout:
						request.Hooks.OnExecuteStdout(stream.Text)
					case execute.StreamStderr:
						request.Hooks.OnExecuteStderr(stream.Text)
					default:
					}
				}
			}

		case <-ctx.Done():
			log.Warning("context cancelled, try to interrupt kernel")
			err = kernel.client.InterruptKernel(kernel.kernelID)
			if err != nil {
				log.Error("interrupt kernel failed: %v", err)
			}

			request.Hooks.OnExecuteError(&execute.ErrorOutput{
				EName:  "ContextCancelled",
				EValue: "Interrupt kernel",
			})
			return errors.New("context cancelled, interrupt kernel")
		}
	}
}

// setWorkingDir configures the working directory for a kernel session.
func (c *Controller) setWorkingDir(_ *jupyterKernel, _ *CreateContextRequest) error {
	return nil
}

// getJupyterKernel retrieves a kernel connection from the session map.
func (c *Controller) getJupyterKernel(sessionID string) *jupyterKernel {
	if v, ok := c.jupyterClientMap.Load(sessionID); ok {
		if kernel, ok := v.(*jupyterKernel); ok {
			return kernel
		}
	}
	return nil
}

// searchKernel finds a kernel spec name for the given language.
func (c *Controller) searchKernel(client *jupyter.Client, language Language) (string, error) {
	specs, err := client.GetKernelSpecs()
	if err != nil {
		return "", err
	}

	if len(specs.Kernelspecs) == 0 {
		return "", errors.New("no kernel specs found")
	}

	var kernelName string
	for name, spec := range specs.Kernelspecs {
		if name == "python3" {
			continue
		}

		if spec.Spec.Language == language.String() {
			kernelName = name
		}
	}
	if kernelName == "" {
		return "", errors.New("no kernel specs found")
	}

	return kernelName, nil
}


================================================
FILE: components/execd/pkg/runtime/language.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

// Language represents the programming language or execution mode
type Language string

const (
	Command           Language = "command"
	Bash              Language = "bash"
	Python            Language = "python"
	Java              Language = "java"
	JavaScript        Language = "javascript"
	TypeScript        Language = "typescript"
	Go                Language = "go"
	SQL               Language = "sql"
	BackgroundCommand Language = "background-command"
)

// String returns the string representation of the language
func (l Language) String() string {
	return string(l)
}


================================================
FILE: components/execd/pkg/runtime/sql.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"context"
	"database/sql"
	"encoding/json"
	"errors"
	"fmt"
	"strings"
	"time"

	"github.com/google/uuid"

	_ "github.com/go-sql-driver/mysql"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
	"github.com/alibaba/opensandbox/execd/pkg/log"
)

// QueryResult represents a SQL query response.
type QueryResult struct {
	Columns []string `json:"columns,omitempty"`
	Rows    [][]any  `json:"rows,omitempty"`
	Error   string   `json:"error,omitempty"`
}

// runSQL executes SQL queries based on their type.
func (c *Controller) runSQL(ctx context.Context, request *ExecuteCodeRequest) error {
	request.Hooks.OnExecuteInit(uuid.New().String())
	err := c.initDB()
	if err != nil {
		request.Hooks.OnExecuteError(&execute.ErrorOutput{EName: "DBInitError", EValue: err.Error()})
		log.Error("DBInitError: error initializing db server: %v", err)
		return err
	}

	err = c.db.PingContext(ctx)
	if err != nil {
		request.Hooks.OnExecuteError(&execute.ErrorOutput{EName: "DBPingError", EValue: err.Error()})
		log.Error("DBPingError: error pinging db server: %v", err)
		return err
	}

	switch c.getQueryType(request.Code) {
	case "SELECT":
		return c.executeSelectSQLQuery(ctx, request)
	default:
		return c.executeUpdateSQLQuery(ctx, request)
	}
}

// executeSelectSQLQuery handles SELECT statements.
func (c *Controller) executeSelectSQLQuery(ctx context.Context, request *ExecuteCodeRequest) error {
	startAt := time.Now()

	rows, err := c.db.QueryContext(ctx, request.Code)
	if err != nil {
		request.Hooks.OnExecuteError(&execute.ErrorOutput{EName: "DBQueryError", EValue: err.Error()})
		return nil
	}
	defer rows.Close()

	columns, err := rows.Columns()
	if err != nil {
		request.Hooks.OnExecuteError(&execute.ErrorOutput{EName: "DBQueryError", EValue: err.Error()})
		return nil
	}

	var result [][]any
	values := make([]any, len(columns))
	scanArgs := make([]any, len(columns))
	for i := range values {
		scanArgs[i] = &values[i]
	}

	for rows.Next() {
		err := rows.Scan(scanArgs...)
		if err != nil {
			request.Hooks.OnExecuteError(&execute.ErrorOutput{EName: "RowScanError", EValue: err.Error()})
			return nil
		}
		row := make([]any, len(columns))
		for i, v := range values {
			if v == nil {
				row[i] = nil
			} else {
				row[i] = fmt.Sprintf("%v", v)
			}
		}
		result = append(result, row)
	}
	if err := rows.Err(); err != nil {
		request.Hooks.OnExecuteError(&execute.ErrorOutput{EName: "RowIterationError", EValue: err.Error()})
		return nil
	}

	queryResult := QueryResult{
		Columns: columns,
		Rows:    result,
	}
	bytes, err := json.Marshal(queryResult)
	if err != nil {
		request.Hooks.OnExecuteError(&execute.ErrorOutput{EName: "JSONMarshalError", EValue: err.Error()})
		return nil
	}
	request.Hooks.OnExecuteResult(
		map[string]any{
			"text/plain": string(bytes),
		},
		1,
	)
	request.Hooks.OnExecuteComplete(time.Since(startAt))
	return nil
}

// executeUpdateSQLQuery handles non-SELECT statements.
func (c *Controller) executeUpdateSQLQuery(ctx context.Context, request *ExecuteCodeRequest) error {
	startAt := time.Now()

	result, err := c.db.ExecContext(ctx, request.Code)
	if err != nil {
		request.Hooks.OnExecuteError(&execute.ErrorOutput{EName: "DBExecError", EValue: err.Error()})
		return err
	}

	affected, _ := result.RowsAffected()
	queryResult := QueryResult{
		Rows:    [][]any{{affected}},
		Columns: []string{"affected_rows"},
	}
	bytes, err := json.Marshal(queryResult)
	if err != nil {
		request.Hooks.OnExecuteError(&execute.ErrorOutput{EName: "JSONMarshalError", EValue: err.Error()})
		return err
	}
	request.Hooks.OnExecuteResult(
		map[string]any{
			"text/plain": string(bytes),
		},
		1,
	)
	request.Hooks.OnExecuteComplete(time.Since(startAt))
	return nil
}

// getQueryType extracts the first token to decide which executor to use.
func (c *Controller) getQueryType(query string) string {
	fields := strings.Fields(query)
	if len(fields) == 0 {
		return ""
	}
	return strings.ToUpper(fields[0])
}

// initDB lazily opens the local sandbox database.
func (c *Controller) initDB() error {
	var initErr error
	c.dbOnce.Do(func() {
		dsn := "root:@tcp(127.0.0.1:3306)/"
		db, err := sql.Open("mysql", dsn)
		if err != nil {
			initErr = err
			return
		}

		err = db.Ping()
		if err != nil {
			initErr = err
			return
		}

		_, err = db.Exec("CREATE DATABASE IF NOT EXISTS sandbox")
		if err != nil {
			initErr = err
			return
		}

		_, err = db.Exec("USE sandbox")
		if err != nil {
			initErr = err
			return
		}

		c.db = db
	})

	if initErr != nil {
		return initErr
	}
	if c.db == nil {
		return errors.New("db is not initialized")
	}
	return nil
}


================================================
FILE: components/execd/pkg/runtime/sql_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"context"
	"database/sql/driver"
	"encoding/json"
	"testing"
	"time"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
	"github.com/stretchr/testify/require"
)

func TestExecuteSelectSQLQuery_Success(t *testing.T) {
	driver := &stubDriver{
		columns: []string{"id", "name"},
		rows: [][]driver.Value{
			{int64(1), "alice"},
			{int64(2), "bob"},
		},
	}
	db := newStubDB(t, driver)

	c := NewController("", "")
	c.db = db

	var (
		gotResult map[string]any
		gotError  *execute.ErrorOutput
		completed bool
	)

	req := &ExecuteCodeRequest{
		Code: "SELECT * FROM users",
		Hooks: ExecuteResultHook{
			OnExecuteResult: func(result map[string]any, _ int) {
				gotResult = result
			},
			OnExecuteError: func(err *execute.ErrorOutput) {
				gotError = err
			},
			OnExecuteComplete: func(time.Duration) {
				completed = true
			},
		},
	}

	require.NoError(t, c.executeSelectSQLQuery(context.Background(), req))

	require.Nil(t, gotError, "unexpected error hook")
	require.True(t, completed, "expected completion hook to be triggered")

	raw, ok := gotResult["text/plain"]
	require.True(t, ok, "expected text/plain payload")
	var qr QueryResult
	require.NoError(t, json.Unmarshal([]byte(raw.(string)), &qr))

	require.Equal(t, []string{"id", "name"}, qr.Columns, "unexpected columns")
	require.Len(t, qr.Rows, 2, "unexpected rows")
	require.Equal(t, "1", qr.Rows[0][0])
	require.Equal(t, "bob", qr.Rows[1][1])
}

func TestExecuteUpdateSQLQuery_Success(t *testing.T) {
	driver := &stubDriver{
		execRowsAffected: 3,
	}
	db := newStubDB(t, driver)

	c := NewController("", "")
	c.db = db

	var (
		gotResult map[string]any
		gotError  *execute.ErrorOutput
		completed bool
	)

	req := &ExecuteCodeRequest{
		Code: "UPDATE users SET name='alice' WHERE id=1",
		Hooks: ExecuteResultHook{
			OnExecuteResult: func(result map[string]any, _ int) {
				gotResult = result
			},
			OnExecuteError: func(err *execute.ErrorOutput) {
				gotError = err
			},
			OnExecuteComplete: func(time.Duration) {
				completed = true
			},
		},
	}

	require.NoError(t, c.executeUpdateSQLQuery(context.Background(), req))

	require.Nil(t, gotError, "unexpected error hook")
	require.True(t, completed, "expected completion hook to be triggered")

	raw, ok := gotResult["text/plain"]
	require.True(t, ok, "expected text/plain payload")
	var qr QueryResult
	require.NoError(t, json.Unmarshal([]byte(raw.(string)), &qr))

	require.Equal(t, []string{"affected_rows"}, qr.Columns, "unexpected columns")
	require.Len(t, qr.Rows, 1, "unexpected rows length")
	require.Len(t, qr.Rows[0], 1, "unexpected row entry length")
	require.Equal(t, float64(3), qr.Rows[0][0])
}


================================================
FILE: components/execd/pkg/runtime/types.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"fmt"
	"sync"
	"time"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
)

// ExecuteResultHook groups execution callbacks.
type ExecuteResultHook struct {
	OnExecuteInit     func(context string)
	OnExecuteResult   func(result map[string]any, count int)
	OnExecuteStatus   func(status string)
	OnExecuteStdout   func(stdout string) //nolint:predeclared
	OnExecuteStderr   func(stderr string) //nolint:predeclared
	OnExecuteError    func(err *execute.ErrorOutput)
	OnExecuteComplete func(executionTime time.Duration)
}

// ExecuteCodeRequest represents a code execution request with context and hooks.
type ExecuteCodeRequest struct {
	Language Language          `json:"language"`
	Code     string            `json:"code"`
	Context  string            `json:"context"`
	Timeout  time.Duration     `json:"timeout"`
	Cwd      string            `json:"cwd"`
	Envs     map[string]string `json:"envs"`
	Uid      *uint32           `json:"uid,omitempty"`
	Gid      *uint32           `json:"gid,omitempty"`
	Hooks    ExecuteResultHook
}

// SetDefaultHooks installs stdout logging fallbacks for unset hooks.
func (req *ExecuteCodeRequest) SetDefaultHooks() {
	if req.Hooks.OnExecuteResult == nil {
		req.Hooks.OnExecuteResult = func(result map[string]any, count int) { fmt.Printf("OnExecuteResult: %d, %++v\n", count, result) }
	}
	if req.Hooks.OnExecuteStatus == nil {
		req.Hooks.OnExecuteStatus = func(status string) { fmt.Printf("OnExecuteStatus: %s\n", status) }
	}
	if req.Hooks.OnExecuteStdout == nil {
		req.Hooks.OnExecuteStdout = func(stdout string) { fmt.Printf("OnExecuteStdout: %s\n", stdout) }
	}
	if req.Hooks.OnExecuteStderr == nil {
		req.Hooks.OnExecuteStderr = func(stderr string) { fmt.Printf("OnExecuteStderr: %s\n", stderr) }
	}
	if req.Hooks.OnExecuteError == nil {
		req.Hooks.OnExecuteError = func(err *execute.ErrorOutput) { fmt.Printf("OnExecuteError: %++v\n", err) }
	}
	if req.Hooks.OnExecuteComplete == nil {
		req.Hooks.OnExecuteComplete = func(executionTime time.Duration) {
			fmt.Printf("OnExecuteComplete: %v\n", executionTime)
		}
	}
	if req.Hooks.OnExecuteInit == nil {
		req.Hooks.OnExecuteInit = func(session string) { fmt.Printf("OnExecuteInit: %s\n", session) }
	}
}

// CreateContextRequest represents a stateful session creation request.
type CreateContextRequest struct {
	Language Language `json:"language"`
	Cwd      string   `json:"cwd"`
}

type CodeContext struct {
	ID       string   `json:"id,omitempty"`
	Language Language `json:"language"`
}

// bashSessionConfig holds bash session configuration.
type bashSessionConfig struct {
	// StartupSource is a list of scripts sourced on startup.
	StartupSource []string
	// Session is the session identifier.
	Session string
	// StartupTimeout is the startup timeout.
	StartupTimeout time.Duration
	// Cwd is the working directory.
	Cwd string
}

// bashSession represents a bash session.
type bashSession struct {
	config  *bashSessionConfig
	mu      sync.Mutex
	started bool
	env     map[string]string
	cwd     string

	// currentProcessPid is the pid of the active run's process group leader (bash).
	// Set after cmd.Start(), cleared when run() returns. Used by close() to kill the process group.
	currentProcessPid int
}


================================================
FILE: components/execd/pkg/runtime/types_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"reflect"
	"testing"

	"github.com/stretchr/testify/require"
)

func TestExecuteCodeRequest_SetDefaultHooks(t *testing.T) {
	customResult := func(map[string]any, int) {}

	req := &ExecuteCodeRequest{
		Hooks: ExecuteResultHook{
			OnExecuteResult: customResult,
		},
	}

	req.SetDefaultHooks()

	require.NotNil(t, req.Hooks.OnExecuteStdout)
	require.NotNil(t, req.Hooks.OnExecuteStderr)
	require.NotNil(t, req.Hooks.OnExecuteError)
	require.NotNil(t, req.Hooks.OnExecuteResult, "expected OnExecuteResult to remain set")
	require.Equal(t, reflect.ValueOf(customResult).Pointer(), reflect.ValueOf(req.Hooks.OnExecuteResult).Pointer(),
		"default hooks should not override existing ones")
}


================================================
FILE: components/execd/pkg/util/glob/index.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package glob

func findUnescapedByteIndex(s string, c byte, allowEscaping bool) int {
	l := len(s)
	for i := 0; i < l; i++ {
		if allowEscaping && s[i] == '\\' {
			// skip next byte
			i++
		} else if s[i] == c {
			return i
		}
	}
	return -1
}

// findMatchedClosingAltIndex finds the matching `}` for a `{`.
func findMatchedClosingAltIndex(s string, allowEscaping bool) int {
	return findMatchedClosingSymbolsIndex(s, allowEscaping, '{', '}', 1)
}

// findMatchedClosingBracketIndex finds the matching `)` for a `(`.
func findMatchedClosingBracketIndex(s string, allowEscaping bool) int {
	return findMatchedClosingSymbolsIndex(s, allowEscaping, '(', ')', 0)
}

// findNextCommaIndex returns the next comma outside nested braces.
func findNextCommaIndex(s string, allowEscaping bool) int {
	alts := 1
	l := len(s)
	for i := 0; i < l; i++ {
		if allowEscaping && s[i] == '\\' {
			i++
		} else if s[i] == '{' {
			alts++
		} else if s[i] == '}' {
			alts--
		} else if s[i] == ',' && alts == 1 {
			return i
		}
	}
	return -1
}

func findMatchedClosingSymbolsIndex(s string, allowEscaping bool, left, right uint8, begin int) int {
	l := len(s)
	for i := 0; i < l; i++ {
		if allowEscaping && s[i] == '\\' {
			i++
		} else if s[i] == left {
			begin++
		} else if s[i] == right {
			if begin--; begin == 0 {
				return i
			}
		}
	}
	return -1
}


================================================
FILE: components/execd/pkg/util/glob/match.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This code is based on or derived from doublestar
// Copyright (c) 2014 Bob Matcuk
// Licensed under MIT License
// https://github.com/bmatcuk/doublestar/blob/master/LICENSE

package glob

import (
	"path/filepath"
	"unicode/utf8"

	globutil "github.com/bmatcuk/doublestar/v4"
)

// PathMatch is filepath.Match compatible but honors doublestar semantics.
func PathMatch(pattern, name string) (bool, error) {
	return matchWithSeparator(pattern, name, filepath.Separator, true)
}

func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) {
	return doMatchWithSeparator(pattern, name, separator, validate, -1, -1, -1, -1, 0, 0)
}

//nolint:gocognit,nestif,gocyclo,maintidx
func doMatchWithSeparator(pattern, name string, separator rune, validate bool, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, patIdx, nameIdx int) (matched bool, err error) {
	patLen := len(pattern)
	nameLen := len(name)
	startOfSegment := true
MATCH:
	for nameIdx < nameLen {
		if patIdx < patLen {
			switch pattern[patIdx] {
			case '*':
				if patIdx++; patIdx < patLen && pattern[patIdx] == '*' {
					// doublestar - must begin with a path separator, otherwise we'll
					patIdx++
					if startOfSegment {
						if patIdx >= patLen {
							// pattern ends in `/**`: return true
							return true, nil
						}

						// doublestar must also end with a path separator, otherwise we're
						patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
						if patRune == separator {
							patIdx += patRuneLen

							doublestarPatternBacktrack = patIdx
							doublestarNameBacktrack = nameIdx
							starPatternBacktrack = -1
							starNameBacktrack = -1
							continue
						}
					}
				}
				startOfSegment = false

				starPatternBacktrack = patIdx
				starNameBacktrack = nameIdx
				continue

			case '?':
				startOfSegment = false
				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
				if nameRune == separator {
					// `?` cannot match the separator
					break
				}

				patIdx++
				nameIdx += nameRuneLen
				continue

			case '[':
				startOfSegment = false
				if patIdx++; patIdx >= patLen {
					// class didn't end
					return false, globutil.ErrBadPattern
				}
				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])

				matched := false
				negate := pattern[patIdx] == '!' || pattern[patIdx] == '^'
				if negate {
					patIdx++
				}

				if patIdx >= patLen || pattern[patIdx] == ']' {
					// class didn't end or empty character class
					return false, globutil.ErrBadPattern
				}

				last := utf8.MaxRune
				for patIdx < patLen && pattern[patIdx] != ']' {
					patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
					patIdx += patRuneLen

					// match a range
					if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' {
						if pattern[patIdx] == '\\' {
							// next character is escaped
							patIdx++
						}
						patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
						patIdx += patRuneLen

						if last <= nameRune && nameRune <= patRune {
							matched = true
							break
						}

						// didn't match range - reset `last`
						last = utf8.MaxRune
						continue
					}

					// not a range - check if the next rune is escaped
					if patRune == '\\' {
						patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
						patIdx += patRuneLen
					}

					// check if the rune matches
					if patRune == nameRune {
						matched = true
						break
					}

					// no matches yet
					last = patRune
				}

				if matched == negate {
					// failed to match - if we reached the end of the pattern, that means
					if patIdx >= patLen {
						return false, globutil.ErrBadPattern
					}
					break
				}

				closingIdx := findUnescapedByteIndex(pattern[patIdx:], ']', true)
				if closingIdx == -1 {
					// no closing `]`
					return false, globutil.ErrBadPattern
				}

				patIdx += closingIdx + 1
				nameIdx += nameRuneLen
				continue
			case '!':
				negateIdx := patIdx
				// begin index of (
				patIdx++
				closingIdx := findMatchedClosingBracketIndex(pattern[patIdx:], separator != '\\')
				if closingIdx == -1 {
					return false, globutil.ErrBadPattern
				}
				closingIdx += patIdx

				result, err := doMatchWithSeparator(pattern[:negateIdx]+pattern[patIdx+1:closingIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, negateIdx, nameIdx)
				if err != nil {
					return false, err
				} else if !result {
					return true, nil
				} else {
					return false, nil
				}
			case '{':
				startOfSegment = false //nolint:ineffassign
				beforeIdx := patIdx
				patIdx++
				closingIdx := findMatchedClosingAltIndex(pattern[patIdx:], separator != '\\')
				if closingIdx == -1 {
					// no closing `}`
					return false, globutil.ErrBadPattern
				}
				closingIdx += patIdx

				for {
					commaIdx := findNextCommaIndex(pattern[patIdx:closingIdx], separator != '\\')
					if commaIdx == -1 {
						break
					}
					commaIdx += patIdx

					result, err := doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:commaIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
					if result || err != nil {
						return result, err
					}

					patIdx = commaIdx + 1
				}
				return doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:closingIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)

			case '\\':
				if separator != '\\' {
					// next rune is "escaped" in the pattern - literal match
					if patIdx++; patIdx >= patLen {
						// pattern ended
						return false, globutil.ErrBadPattern
					}
				}
				fallthrough

			default:
				patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
				if patRune != nameRune {
					if separator != '\\' && patIdx > 0 && pattern[patIdx-1] == '\\' {
						// if this rune was meant to be escaped, we need to move patIdx
						patIdx--
					}
					break
				}

				patIdx += patRuneLen
				nameIdx += nameRuneLen
				startOfSegment = patRune == separator
				continue
			}
		}

		if starPatternBacktrack >= 0 {
			// `*` backtrack, but only if the `name` rune isn't the separator
			nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:])
			if nameRune != separator {
				starNameBacktrack += nameRuneLen
				patIdx = starPatternBacktrack
				nameIdx = starNameBacktrack
				startOfSegment = false
				continue
			}
		}

		if doublestarPatternBacktrack >= 0 {
			// `**` backtrack, advance `name` past next separator
			nameIdx = doublestarNameBacktrack
			for nameIdx < nameLen {
				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
				nameIdx += nameRuneLen
				if nameRune == separator {
					doublestarNameBacktrack = nameIdx
					patIdx = doublestarPatternBacktrack
					startOfSegment = true
					continue MATCH
				}
			}
		}

		if validate && patIdx < patLen && !isValidPattern(pattern[patIdx:], separator) {
			return false, globutil.ErrBadPattern
		}
		return false, nil
	}

	if nameIdx < nameLen {
		// we reached the end of `pattern` before the end of `name`
		return false, nil
	}

	// we've reached the end of `name`; we've successfully matched if we've also
	return isZeroLengthPattern(pattern[patIdx:], separator)
}

// nolint:nakedret
func isZeroLengthPattern(pattern string, separator rune) (ret bool, err error) {
	// `/**` is a special case - a pattern such as `path/to/a/**` *should* match
	if pattern == "" || pattern == "*" || pattern == "**" || pattern == string(separator)+"**" {
		return true, nil
	}

	if pattern[0] == '{' {
		closingIdx := findMatchedClosingAltIndex(pattern[1:], separator != '\\')
		if closingIdx == -1 {
			// no closing '}'
			return false, globutil.ErrBadPattern
		}
		closingIdx += 1

		patIdx := 1
		for {
			commaIdx := findNextCommaIndex(pattern[patIdx:closingIdx], separator != '\\')
			if commaIdx == -1 {
				break
			}
			commaIdx += patIdx

			ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], separator)
			if ret || err != nil {
				return
			}

			patIdx = commaIdx + 1
		}
		return isZeroLengthPattern(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], separator)
	}

	// no luck - validate the rest of the pattern
	if !isValidPattern(pattern, separator) {
		return false, globutil.ErrBadPattern
	}
	return false, nil
}


================================================
FILE: components/execd/pkg/util/glob/match_benchmark_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package glob

import (
	"path/filepath"
	"testing"
)

func BenchmarkPathMatch(b *testing.B) {
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		for _, tt := range matchTests {
			if tt.isStandard && tt.testOnDisk {
				pattern := filepath.FromSlash(tt.pattern)
				testPath := filepath.FromSlash(tt.testPath)
				PathMatch(pattern, testPath)
			}
		}
	}
}


================================================
FILE: components/execd/pkg/util/glob/match_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This code is based on or derived from doublestar
// Copyright (c) 2014 Bob Matcuk
// Licensed under MIT License
// https://github.com/bmatcuk/doublestar/blob/master/LICENSE

package glob

import (
	"path/filepath"
	"runtime"
	"strings"
	"testing"

	globutil "github.com/bmatcuk/doublestar/v4"
)

type MatchTest struct {
	pattern, testPath     string
	shouldMatch           bool
	shouldMatchGlob       bool
	expectedErr           error
	expectIOErr           bool
	expectPatternNotExist bool
	isStandard            bool
	testOnDisk            bool
	numResults            int
	winNumResults         int
}

// Tests which contain escapes and symlinks will not work on Windows
var onWindows = runtime.GOOS == "windows"

var matchTests = []MatchTest{
	{"", "", true, false, nil, true, false, true, true, 0, 0},
	{"*", "", true, true, nil, false, false, true, false, 0, 0},
	{"*", "/", false, false, nil, false, false, true, false, 0, 0},
	{"/*", "/", true, true, nil, false, false, true, false, 0, 0},
	{"/*", "/debug/", false, false, nil, false, false, true, false, 0, 0},
	{"/*", "//", false, false, nil, false, false, true, false, 0, 0},
	{"abc", "abc", true, true, nil, false, false, true, true, 1, 1},
	{"*", "abc", true, true, nil, false, false, true, true, 22, 17},
	{"*c", "abc", true, true, nil, false, false, true, true, 2, 2},
	{"*/", "a/", true, true, nil, false, false, true, false, 0, 0},
	{"a*", "a", true, true, nil, false, false, true, true, 9, 9},
	{"a*", "abc", true, true, nil, false, false, true, true, 9, 9},
	{"a*", "ab/c", false, false, nil, false, false, true, true, 9, 9},
	{"a*/b", "abc/b", true, true, nil, false, false, true, true, 2, 2},
	{"a*/b", "a/c/b", false, false, nil, false, false, true, true, 2, 2},
	{"a*/c/", "a/b", false, false, nil, false, false, false, true, 1, 1},
	{"a*b*c*d*e*", "axbxcxdxe", true, true, nil, false, false, true, true, 3, 3},
	{"a*b*c*d*e*/f", "axbxcxdxe/f", true, true, nil, false, false, true, true, 2, 2},
	{"a*b*c*d*e*/f", "axbxcxdxexxx/f", true, true, nil, false, false, true, true, 2, 2},
	{"a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false, false, nil, false, false, true, true, 2, 2},
	{"a*b*c*d*e*/f", "axbxcxdxexxx/fff", false, false, nil, false, false, true, true, 2, 2},
	{"a*b?c*x", "abxbbxdbxebxczzx", true, true, nil, false, false, true, true, 2, 2},
	{"a*b?c*x", "abxbbxdbxebxczzy", false, false, nil, false, false, true, true, 2, 2},
	{"ab[c]", "abc", true, true, nil, false, false, true, true, 1, 1},
	{"ab[b-d]", "abc", true, true, nil, false, false, true, true, 1, 1},
	{"ab[e-g]", "abc", false, false, nil, false, false, true, true, 0, 0},
	{"ab[^c]", "abc", false, false, nil, false, false, true, true, 0, 0},
	{"ab[^b-d]", "abc", false, false, nil, false, false, true, true, 0, 0},
	{"ab[^e-g]", "abc", true, true, nil, false, false, true, true, 1, 1},
	{"a\\*b", "ab", false, false, nil, false, true, true, !onWindows, 0, 0},
	{"a?b", "a☺b", true, true, nil, false, false, true, true, 1, 1},
	{"a[^a]b", "a☺b", true, true, nil, false, false, true, true, 1, 1},
	{"a[!a]b", "a☺b", true, true, nil, false, false, false, true, 1, 1},
	{"a???b", "a☺b", false, false, nil, false, false, true, true, 0, 0},
	{"a[^a][^a][^a]b", "a☺b", false, false, nil, false, false, true, true, 0, 0},
	{"[a-ζ]*", "α", true, true, nil, false, false, true, true, 20, 17},
	{"*[a-ζ]", "A", false, false, nil, false, false, true, true, 20, 17},
	{"a?b", "a/b", false, false, nil, false, false, true, true, 1, 1},
	{"a*b", "a/b", false, false, nil, false, false, true, true, 1, 1},
	{"[\\]a]", "]", true, true, nil, false, false, true, !onWindows, 2, 2},
	{"[\\-]", "-", true, true, nil, false, false, true, !onWindows, 1, 1},
	{"[x\\-]", "x", true, true, nil, false, false, true, !onWindows, 2, 2},
	{"[x\\-]", "-", true, true, nil, false, false, true, !onWindows, 2, 2},
	{"[x\\-]", "z", false, false, nil, false, false, true, !onWindows, 2, 2},
	{"[\\-x]", "x", true, true, nil, false, false, true, !onWindows, 2, 2},
	{"[\\-x]", "-", true, true, nil, false, false, true, !onWindows, 2, 2},
	{"[\\-x]", "a", false, false, nil, false, false, true, !onWindows, 2, 2},
	{"[]a]", "]", false, false, globutil.ErrBadPattern, false, false, true, true, 0, 0},
	// doublestar, like bash, allows these when path.Match() does not
	{"[-]", "-", true, true, nil, false, false, false, !onWindows, 1, 0},
	{"[x-]", "x", true, true, nil, false, false, false, true, 2, 1},
	{"[x-]", "-", true, true, nil, false, false, false, !onWindows, 2, 1},
	{"[x-]", "z", false, false, nil, false, false, false, true, 2, 1},
	{"[-x]", "x", true, true, nil, false, false, false, true, 2, 1},
	{"[-x]", "-", true, true, nil, false, false, false, !onWindows, 2, 1},
	{"[-x]", "a", false, false, nil, false, false, false, true, 2, 1},
	{"[a-b-d]", "a", true, true, nil, false, false, false, true, 3, 2},
	{"[a-b-d]", "b", true, true, nil, false, false, false, true, 3, 2},
	{"[a-b-d]", "-", true, true, nil, false, false, false, !onWindows, 3, 2},
	{"[a-b-d]", "c", false, false, nil, false, false, false, true, 3, 2},
	{"[a-b-x]", "x", true, true, nil, false, false, false, true, 4, 3},
	{"\\", "a", false, false, globutil.ErrBadPattern, false, false, true, !onWindows, 0, 0},
	{"[", "a", false, false, globutil.ErrBadPattern, false, false, true, true, 0, 0},
	{"[^", "a", false, false, globutil.ErrBadPattern, false, false, true, true, 0, 0},
	{"[^bc", "a", false, false, globutil.ErrBadPattern, false, false, true, true, 0, 0},
	{"a[", "a", false, false, globutil.ErrBadPattern, false, false, true, true, 0, 0},
	{"a[", "ab", false, false, globutil.ErrBadPattern, false, false, true, true, 0, 0},
	{"ad[", "ab", false, false, globutil.ErrBadPattern, false, false, true, true, 0, 0},
	{"*x", "xxx", true, true, nil, false, false, true, true, 4, 4},
	{"[abc]", "b", true, true, nil, false, false, true, true, 3, 3},
	{"**", "", true, true, nil, false, false, false, false, 38, 38},
	{"a/**", "a", true, false, nil, false, false, false, true, 7, 7},
	{"a/**", "a/", true, true, nil, false, false, false, false, 7, 7},
	{"a/**/", "a/", true, true, nil, false, false, false, false, 4, 4},
	{"a/**", "a/b", true, true, nil, false, false, false, true, 7, 7},
	{"a/**", "a/b/c", true, true, nil, false, false, false, true, 7, 7},
	{"**/c", "c", true, true, nil, !onWindows, false, false, true, 5, 4},
	{"**/c", "b/c", true, true, nil, !onWindows, false, false, true, 5, 4},
	{"**/c", "a/b/c", true, true, nil, !onWindows, false, false, true, 5, 4},
	{"**/c", "a/b", false, false, nil, !onWindows, false, false, true, 5, 4},
	{"**/c", "abcd", false, false, nil, !onWindows, false, false, true, 5, 4},
	{"**/c", "a/abc", false, false, nil, !onWindows, false, false, true, 5, 4},
	{"a/**/b", "a/b", true, true, nil, false, false, false, true, 2, 2},
	{"a/**/c", "a/b/c", true, true, nil, false, false, false, true, 2, 2},
	{"a/**/d", "a/b/c/d", true, true, nil, false, false, false, true, 1, 1},
	{"a/\\**", "a/b/c", false, false, nil, false, false, false, !onWindows, 0, 0},
	{"a/\\[*\\]", "a/bc", false, false, nil, false, false, true, !onWindows, 0, 0},
	// this fails the FilepathGlob test on Windows
	{"a/b/c", "a/b//c", false, false, nil, false, false, true, !onWindows, 1, 1},
	// odd: Glob + filepath.Glob return results
	{"a/", "a", false, false, nil, false, false, true, false, 0, 0},
	{"ab{c,d}", "abc", true, true, nil, false, true, false, true, 1, 1},
	{"ab{c,d,*}", "abcde", true, true, nil, false, true, false, true, 5, 5},
	{"ab{c,d}[", "abcd", false, false, globutil.ErrBadPattern, false, false, false, true, 0, 0},
	{"a{,bc}", "a", true, true, nil, false, false, false, true, 2, 2},
	{"a{,bc}", "abc", true, true, nil, false, false, false, true, 2, 2},
	{"a/{b/c,c/b}", "a/b/c", true, true, nil, false, false, false, true, 2, 2},
	{"a/{b/c,c/b}", "a/c/b", true, true, nil, false, false, false, true, 2, 2},
	{"a/a*{b,c}", "a/abc", true, true, nil, false, false, false, true, 1, 1},
	{"{a/{b,c},abc}", "a/b", true, true, nil, false, false, false, true, 3, 3},
	{"{a/{b,c},abc}", "a/c", true, true, nil, false, false, false, true, 3, 3},
	{"{a/{b,c},abc}", "abc", true, true, nil, false, false, false, true, 3, 3},
	{"{a/{b,c},abc}", "a/b/c", false, false, nil, false, false, false, true, 3, 3},
	{"{a/ab*}", "a/abc", true, true, nil, false, false, false, true, 1, 1},
	{"{a/*}", "a/b", true, true, nil, false, false, false, true, 3, 3},
	{"{a/abc}", "a/abc", true, true, nil, false, false, false, true, 1, 1},
	{"{a/b,a/c}", "a/c", true, true, nil, false, false, false, true, 2, 2},
	{"abc/**", "abc/b", true, true, nil, false, false, false, true, 3, 3},
	{"**/abc", "abc", true, true, nil, !onWindows, false, false, true, 2, 2},
	{"abc**", "abc/b", false, false, nil, false, false, false, true, 3, 3},
	{"**/*.txt", "abc/ßtestß.txt", true, true, nil, !onWindows, false, false, true, 1, 1},
	{"**/ß*", "abc/ßtestß.txt", true, true, nil, !onWindows, false, false, true, 1, 1},
	{"**/{a,b}", "a/b", true, true, nil, !onWindows, false, false, true, 5, 5},
	// unfortunately, io/fs can't handle this, so neither can Glob =(
	{"broken-symlink", "broken-symlink", true, true, nil, false, false, true, false, 1, 1},
	{"broken-symlink/*", "a", false, false, nil, false, true, true, true, 0, 0},
	{"broken*/*", "a", false, false, nil, false, false, true, true, 0, 0},
	{"working-symlink/c/*", "working-symlink/c/d", true, true, nil, false, false, true, !onWindows, 1, 1},
	{"working-sym*/*", "working-symlink/c", true, true, nil, false, false, true, !onWindows, 1, 1},
	{"b/**/f", "b/symlink-dir/f", true, true, nil, false, false, false, !onWindows, 2, 2},
	{"*/symlink-dir/*", "b/symlink-dir/f", true, true, nil, !onWindows, false, true, !onWindows, 2, 2},
	{"e/**", "e/**", true, true, nil, false, false, false, !onWindows, 11, 6},
	{"e/**", "e/*", true, true, nil, false, false, false, !onWindows, 11, 6},
	{"e/**", "e/?", true, true, nil, false, false, false, !onWindows, 11, 6},
	{"e/**", "e/[", true, true, nil, false, false, false, true, 11, 6},
	{"e/**", "e/]", true, true, nil, false, false, false, true, 11, 6},
	{"e/**", "e/[]", true, true, nil, false, false, false, true, 11, 6},
	{"e/**", "e/{", true, true, nil, false, false, false, true, 11, 6},
	{"e/**", "e/}", true, true, nil, false, false, false, true, 11, 6},
	{"e/**", "e/\\", true, true, nil, false, false, false, !onWindows, 11, 6},
	{"e/*", "e/*", true, true, nil, false, false, true, !onWindows, 10, 5},
	{"e/?", "e/?", true, true, nil, false, false, true, !onWindows, 7, 4},
	{"e/?", "e/*", true, true, nil, false, false, true, !onWindows, 7, 4},
	{"e/?", "e/[", true, true, nil, false, false, true, true, 7, 4},
	{"e/?", "e/]", true, true, nil, false, false, true, true, 7, 4},
	{"e/?", "e/{", true, true, nil, false, false, true, true, 7, 4},
	{"e/?", "e/}", true, true, nil, false, false, true, true, 7, 4},
	{"e/\\[", "e/[", true, true, nil, false, false, true, !onWindows, 1, 1},
	{"e/[", "e/[", false, false, globutil.ErrBadPattern, false, false, true, true, 0, 0},
	{"e/]", "e/]", true, true, nil, false, false, true, true, 1, 1},
	{"e/\\]", "e/]", true, true, nil, false, false, true, !onWindows, 1, 1},
	{"e/\\{", "e/{", true, true, nil, false, false, true, !onWindows, 1, 1},
	{"e/\\}", "e/}", true, true, nil, false, false, true, !onWindows, 1, 1},
	{"e/[\\*\\?]", "e/*", true, true, nil, false, false, true, !onWindows, 2, 2},
	{"e/[\\*\\?]", "e/?", true, true, nil, false, false, true, !onWindows, 2, 2},
	{"e/[\\*\\?]", "e/**", false, false, nil, false, false, true, !onWindows, 2, 2},
	{"e/[\\*\\?]?", "e/**", true, true, nil, false, false, true, !onWindows, 1, 1},
	{"e/{\\*,\\?}", "e/*", true, true, nil, false, false, false, !onWindows, 2, 2},
	{"e/{\\*,\\?}", "e/?", true, true, nil, false, false, false, !onWindows, 2, 2},
	{"e/\\*", "e/*", true, true, nil, false, false, true, !onWindows, 1, 1},
	{"e/\\?", "e/?", true, true, nil, false, false, true, !onWindows, 1, 1},
	{"e/\\?", "e/**", false, false, nil, false, false, true, !onWindows, 1, 1},
	{"*\\}", "}", true, true, nil, false, false, true, !onWindows, 1, 1},
	{"nonexistent-path", "a", false, false, nil, false, true, true, true, 0, 0},
	{"nonexistent-path/", "a", false, false, nil, false, true, true, true, 0, 0},
	{"nonexistent-path/file", "a", false, false, nil, false, true, true, true, 0, 0},
	{"nonexistent-path/*", "a", false, false, nil, false, true, true, true, 0, 0},
	{"nonexistent-path/**", "a", false, false, nil, false, true, true, true, 0, 0},
	{"nopermission/*", "nopermission/file", true, false, nil, true, false, true, !onWindows, 0, 0},
	{"nopermission/dir/", "nopermission/dir", false, false, nil, true, false, true, !onWindows, 0, 0},
	{"nopermission/file", "nopermission/file", true, false, nil, true, false, true, !onWindows, 0, 0},
	{"node_modules/!(.cache)/**", "node_modules/others/file.txt", true, true, nil, false, false, false, !onWindows, 0, 0},
	{"node_modules/!(.cache)/**", "node_modules/.cache/file.txt", false, false, nil, false, false, false, !onWindows, 0, 0},
	{"node_modules/!(.cache)/**", "node_modules/file.txt", true, false, nil, false, false, false, !onWindows, 0, 0},
	{"node_modules/!(.cache)/**", "node_modules/others/others/file.txt", true, true, nil, false, false, false, !onWindows, 0, 0},
}

// numResultsFilesOnly memoizes results with WithFilesOnly.
var numResultsFilesOnly []int

// numResultsNoFollow memoizes results with WithNoFollow.
var numResultsNoFollow []int

// numResultsAllOpts memoizes counts with every option enabled.
var numResultsAllOpts []int

func TestValidatePattern(t *testing.T) {
	for idx, tt := range matchTests {
		testValidatePatternWith(t, idx, tt)
	}
}

func testValidatePatternWith(t *testing.T, idx int, tt MatchTest) {
	defer func() {
		if r := recover(); r != nil {
			t.Errorf("#%v. Validate(%#q) panicked: %#v", idx, tt.pattern, r)
		}
	}()

	result := isValidPattern(tt.pattern, '/')
	if result != (tt.expectedErr == nil) {
		t.Errorf("#%v. ValidatePattern(%#q) = %v want %v", idx, tt.pattern, result, !result)
	}
}

func TestPathMatch(t *testing.T) {
	for idx, tt := range matchTests {
		// Even though we aren't actually matching paths on disk, we are using
		if tt.testOnDisk {
			testPathMatchWith(t, idx, tt)
		}
	}
}

func testPathMatchWith(t *testing.T, idx int, tt MatchTest) {
	defer func() {
		if r := recover(); r != nil {
			t.Errorf("#%v. Match(%#q, %#q) panicked: %#v", idx, tt.pattern, tt.testPath, r)
		}
	}()

	pattern := filepath.FromSlash(tt.pattern)
	testPath := filepath.FromSlash(tt.testPath)
	ok, err := PathMatch(pattern, testPath)
	if ok != tt.shouldMatch || err != tt.expectedErr {
		t.Errorf("#%v. PathMatch(%#q, %#q) = %v, %v want %v, %v", idx, pattern, testPath, ok, err, tt.shouldMatch, tt.expectedErr)
	}

	if tt.isStandard {
		stdOk, stdErr := filepath.Match(pattern, testPath)
		if ok != stdOk || !compareErrors(err, stdErr) {
			t.Errorf("#%v. PathMatch(%#q, %#q) != filepath.Match(...). Got %v, %v want %v, %v", idx, pattern, testPath, ok, err, stdOk, stdErr)
		}
	}
}

func TestPathMatchFake(t *testing.T) {
	// This test fakes that our path separator is `\\` so we can test what it
	if onWindows {
		return
	}

	for idx, tt := range matchTests {
		// Even though we aren't actually matching paths on disk, we are using
		if tt.testOnDisk && !strings.Contains(tt.pattern, "\\") {
			testPathMatchFakeWith(t, idx, tt)
		}
	}
}

func testPathMatchFakeWith(t *testing.T, idx int, tt MatchTest) {
	defer func() {
		if r := recover(); r != nil {
			t.Errorf("#%v. Match(%#q, %#q) panicked: %#v", idx, tt.pattern, tt.testPath, r)
		}
	}()

	pattern := strings.ReplaceAll(tt.pattern, "/", "\\")
	testPath := strings.ReplaceAll(tt.testPath, "/", "\\")
	ok, err := matchWithSeparator(pattern, testPath, '\\', true)
	if ok != tt.shouldMatch || err != tt.expectedErr {
		t.Errorf("#%v. PathMatch(%#q, %#q) = %v, %v want %v, %v", idx, pattern, testPath, ok, err, tt.shouldMatch, tt.expectedErr)
	}
}

func compareErrors(a, b error) bool {
	if a == nil {
		return b == nil
	}
	return b != nil
}


================================================
FILE: components/execd/pkg/util/glob/pattern.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package glob

// isValidPattern checks whether a glob pattern is well-formed.
//
//nolint:gocognit
func isValidPattern(s string, separator rune) bool {
	altDepth := 0
	l := len(s)
VALIDATE:
	for i := 0; i < l; i++ {
		switch s[i] {
		case '\\':
			if separator != '\\' {
				if i++; i >= l {
					return false
				}
			}
			continue

		case '[':
			if i++; i >= l {
				return false
			}
			if s[i] == '^' || s[i] == '!' {
				i++
			}
			if i >= l || s[i] == ']' {
				return false
			}

			for ; i < l; i++ {
				if separator != '\\' && s[i] == '\\' {
					i++
				} else if s[i] == ']' {
					continue VALIDATE
				}
			}

			return false

		case '{':
			altDepth++
			continue

		case '}':
			if altDepth == 0 {
				return false
			}
			altDepth--
			continue
		}
	}

	return altDepth == 0
}


================================================
FILE: components/execd/pkg/util/safego/safe.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package safego

import (
	"context"
	"log"
	"net/http"
	"runtime"

	runtimeutil "k8s.io/apimachinery/pkg/util/runtime"
)

func InitPanicLogger(_ context.Context) {
	runtimeutil.PanicHandlers = []func(context.Context, any){
		func(_ context.Context, r any) {
			if r == http.ErrAbortHandler { // nolint:errorlint
				return
			}

			const size = 64 << 10
			stacktrace := make([]byte, size)
			stacktrace = stacktrace[:runtime.Stack(stacktrace, false)]
			if _, ok := r.(string); ok {
				log.Printf("Observed a panic: %s\n%s", r, stacktrace)
			} else {
				log.Printf("Observed a panic: %#v (%v)\n%s", r, r, stacktrace)
			}
		},
	}
}

func init() {
	runtimeutil.ReallyCrash = false
}

func Go(f func()) {
	go func() {
		defer runtimeutil.HandleCrash()

		f()
	}()
}


================================================
FILE: components/execd/pkg/util/safego/safe_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package safego

import (
	"context"
	"sync"
	"testing"
)

func Test_Go(t *testing.T) {
	ctx, cancelFunc := context.WithCancel(context.Background())
	defer cancelFunc()

	InitPanicLogger(ctx)

	var wg sync.WaitGroup
	wg.Add(1)
	Go(func() {
		defer wg.Done()
		panic("I'm done")
	})
	wg.Wait()
}


================================================
FILE: components/execd/pkg/web/controller/basic.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"encoding/json"
	"net/http"
	"strconv"

	"github.com/gin-gonic/gin"

	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

type basicController struct {
	ctx *gin.Context
}

func newBasicController(ctx *gin.Context) *basicController {
	return &basicController{ctx: ctx}
}

func (c *basicController) RespondError(status int, code model.ErrorCode, message ...string) {
	resp := model.ErrorResponse{
		Code:    code,
		Message: "",
	}
	if len(message) > 0 {
		resp.Message = message[0]
	}
	c.ctx.JSON(status, resp)
}

func (c *basicController) RespondSuccess(data any) {
	if data == nil {
		c.ctx.Status(http.StatusOK)
		return
	}
	c.ctx.JSON(http.StatusOK, data)
}

func (c *basicController) QueryInt64(query string, defaultValue int64) int64 {
	val, err := strconv.ParseInt(query, 10, 64)
	if err != nil {
		return defaultValue
	}
	return val
}

func (c *basicController) bindJSON(target any) error {
	decoder := json.NewDecoder(c.ctx.Request.Body)
	return decoder.Decode(target)
}


================================================
FILE: components/execd/pkg/web/controller/basic_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"encoding/json"
	"net/http"
	"net/http/httptest"
	"testing"

	"github.com/alibaba/opensandbox/execd/pkg/web/model"
	"github.com/stretchr/testify/require"
)

func TestBasicControllerRespondSuccess(t *testing.T) {
	ctx, rec := newTestContext(http.MethodGet, "/", nil)
	ctrl := &basicController{ctx: ctx}

	payload := map[string]string{"status": "ok"}
	ctrl.RespondSuccess(payload)

	require.Equal(t, http.StatusOK, rec.Code)
	var resp map[string]string
	require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
	require.Equal(t, "ok", resp["status"])
}

func TestBasicControllerRespondError(t *testing.T) {
	ctx, rec := newTestContext(http.MethodGet, "/", nil)
	ctrl := &basicController{ctx: ctx}

	ctrl.RespondError(http.StatusBadRequest, model.ErrorCodeInvalidRequest, "boom")

	require.Equal(t, http.StatusBadRequest, rec.Code)
	var resp model.ErrorResponse
	require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
	require.Equal(t, model.ErrorCodeInvalidRequest, resp.Code)
	require.Equal(t, "boom", resp.Message)
}

func setupBasicController(method string) (*basicController, *httptest.ResponseRecorder) {
	ctx, w := newTestContext(method, "/", nil)
	ctrl := &basicController{ctx: ctx}
	return ctrl, w
}

func TestRespondSuccessWritesPayload(t *testing.T) {
	ctrl, w := setupBasicController(http.MethodGet)

	payload := map[string]string{"status": "ok"}
	ctrl.RespondSuccess(payload)

	require.Equal(t, http.StatusOK, w.Code)
	var got map[string]string
	require.NoError(t, json.Unmarshal(w.Body.Bytes(), &got))
	require.Equal(t, "ok", got["status"])
}

func TestRespondErrorAddsCodeAndMessage(t *testing.T) {
	ctrl, w := setupBasicController(http.MethodGet)

	ctrl.RespondError(http.StatusBadRequest, model.ErrorCodeInvalidRequest, "invalid payload")

	require.Equal(t, http.StatusBadRequest, w.Code)
	var got model.ErrorResponse
	require.NoError(t, json.Unmarshal(w.Body.Bytes(), &got))
	require.Equal(t, model.ErrorCodeInvalidRequest, got.Code)
	require.Equal(t, "invalid payload", got.Message)
}

func TestQueryInt64(t *testing.T) {
	ctrl := &basicController{}

	tests := []struct {
		name     string
		query    string
		def      int64
		expected int64
	}{
		{name: "valid number", query: "42", def: 0, expected: 42},
		{name: "empty uses default", query: "", def: 5, expected: 5},
		{name: "invalid uses default", query: "not-a-number", def: -1, expected: -1},
		{name: "negative number", query: "-10", def: 0, expected: -10},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got := ctrl.QueryInt64(tt.query, tt.def)
			require.Equalf(t, tt.expected, got, "QueryInt64(%q, %d)", tt.query, tt.def)
		})
	}
}


================================================
FILE: components/execd/pkg/web/controller/codeinterpreting.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"context"
	"errors"
	"fmt"
	"io"
	"net/http"
	"sync"
	"time"

	"github.com/gin-gonic/gin"

	"github.com/alibaba/opensandbox/execd/pkg/flag"
	"github.com/alibaba/opensandbox/execd/pkg/runtime"
	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

var codeRunner *runtime.Controller

func InitCodeRunner() {
	codeRunner = runtime.NewController(flag.JupyterServerHost, flag.JupyterServerToken)
}

// CodeInterpretingController handles code execution entrypoints.
type CodeInterpretingController struct {
	*basicController

	// chunkWriter serializes SSE event writes to prevent interleaved output.
	chunkWriter sync.Mutex
}

func NewCodeInterpretingController(ctx *gin.Context) *CodeInterpretingController {
	return &CodeInterpretingController{
		basicController: newBasicController(ctx),
	}
}

// CreateContext creates a new code execution context.
func (c *CodeInterpretingController) CreateContext() {
	var request model.CodeContextRequest
	if err := c.bindJSON(&request); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request, MAYBE invalid body format. %v", err),
		)
		return
	}

	session, err := codeRunner.CreateContext(&runtime.CreateContextRequest{
		Language: runtime.Language(request.Language),
		Cwd:      request.Cwd,
	})
	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error creating code context. %v", err),
		)
		return
	}

	resp := model.CodeContext{
		ID:                 session,
		CodeContextRequest: request,
	}
	c.RespondSuccess(resp)
}

// InterruptCode interrupts the execution of running code in a session.
func (c *CodeInterpretingController) InterruptCode() {
	c.interrupt()
}

// RunCode executes code in a context and streams output via SSE.
func (c *CodeInterpretingController) RunCode() {
	var request model.RunCodeRequest
	if err := c.bindJSON(&request); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request, MAYBE invalid body format. %v", err),
		)
		return
	}

	err := request.Validate()
	if err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("invalid request, validation error %v", err),
		)
		return
	}

	ctx, cancel := context.WithCancel(c.ctx.Request.Context())
	defer cancel()
	runCodeRequest := c.buildExecuteCodeRequest(request)
	eventsHandler := c.setServerEventsHandler(ctx)
	runCodeRequest.Hooks = eventsHandler

	c.setupSSEResponse()
	err = codeRunner.Execute(runCodeRequest)
	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error running codes %v", err),
		)
		return
	}

	time.Sleep(flag.ApiGracefulShutdownTimeout)
}

// GetContext returns a specific code context by id.
func (c *CodeInterpretingController) GetContext() {
	contextID := c.ctx.Param("contextId")
	if contextID == "" {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeMissingQuery,
			"missing path parameter 'contextId'",
		)
		return
	}

	codeContext, err := codeRunner.GetContext(contextID)
	if err != nil {
		if errors.Is(err, runtime.ErrContextNotFound) {
			c.RespondError(
				http.StatusNotFound,
				model.ErrorCodeContextNotFound,
				fmt.Sprintf("context %s not found", contextID),
			)
			return
		}
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error getting code context %s. %v", contextID, err),
		)
		return
	}
	c.RespondSuccess(codeContext)
}

// ListContexts returns active code contexts, optionally filtered by language.
func (c *CodeInterpretingController) ListContexts() {
	language := c.ctx.Query("language")

	contexts, err := codeRunner.ListContext(language)
	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			err.Error(),
		)
		return
	}

	c.RespondSuccess(contexts)
}

// DeleteContextsByLanguage deletes all contexts for a given language.
func (c *CodeInterpretingController) DeleteContextsByLanguage() {
	language := c.ctx.Query("language")
	if language == "" {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeMissingQuery,
			"missing query parameter 'language'",
		)
		return
	}

	err := codeRunner.DeleteLanguageContext(runtime.Language(language))
	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error deleting code context %s. %v", language, err),
		)
		return
	}

	c.RespondSuccess(nil)
}

// DeleteContext deletes a specific code context by id.
func (c *CodeInterpretingController) DeleteContext() {
	contextID := c.ctx.Param("contextId")
	if contextID == "" {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeMissingQuery,
			"missing path parameter 'contextId'",
		)
		return
	}

	err := codeRunner.DeleteContext(contextID)
	if err != nil {
		if errors.Is(err, runtime.ErrContextNotFound) {
			c.RespondError(
				http.StatusNotFound,
				model.ErrorCodeContextNotFound,
				fmt.Sprintf("context %s not found", contextID),
			)
			return
		} else {
			c.RespondError(
				http.StatusInternalServerError,
				model.ErrorCodeRuntimeError,
				fmt.Sprintf("error deleting code context %s. %v", contextID, err),
			)
			return
		}
	}

	c.RespondSuccess(nil)
}

// CreateSession creates a new bash session (create_session API).
// An empty body is allowed and is treated as default options (no cwd override).
func (c *CodeInterpretingController) CreateSession() {
	var request model.CreateSessionRequest
	if err := c.bindJSON(&request); err != nil && !errors.Is(err, io.EOF) {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request. %v", err),
		)
		return
	}

	sessionID, err := codeRunner.CreateBashSession(&runtime.CreateContextRequest{
		Cwd: request.Cwd,
	})
	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error creating session. %v", err),
		)
		return
	}

	c.RespondSuccess(model.CreateSessionResponse{SessionID: sessionID})
}

// RunInSession runs code in an existing bash session and streams output via SSE (run_in_session API).
func (c *CodeInterpretingController) RunInSession() {
	sessionID := c.ctx.Param("sessionId")
	if sessionID == "" {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeMissingQuery,
			"missing path parameter 'sessionId'",
		)
		return
	}

	var request model.RunInSessionRequest
	if err := c.bindJSON(&request); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request. %v", err),
		)
		return
	}
	if err := request.Validate(); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("invalid request. %v", err),
		)
		return
	}

	timeout := time.Duration(request.TimeoutMs) * time.Millisecond
	runReq := &runtime.ExecuteCodeRequest{
		Language: runtime.Bash,
		Context:  sessionID,
		Code:     request.Code,
		Cwd:      request.Cwd,
		Timeout:  timeout,
	}
	ctx, cancel := context.WithCancel(c.ctx.Request.Context())
	defer cancel()
	runReq.Hooks = c.setServerEventsHandler(ctx)

	c.setupSSEResponse()
	err := codeRunner.RunInBashSession(ctx, runReq)
	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error running in session. %v", err),
		)
		return
	}

	time.Sleep(flag.ApiGracefulShutdownTimeout)
}

// DeleteSession deletes a bash session (delete_session API).
func (c *CodeInterpretingController) DeleteSession() {
	sessionID := c.ctx.Param("sessionId")
	if sessionID == "" {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeMissingQuery,
			"missing path parameter 'sessionId'",
		)
		return
	}

	err := codeRunner.DeleteBashSession(sessionID)
	if err != nil {
		if errors.Is(err, runtime.ErrContextNotFound) {
			c.RespondError(
				http.StatusNotFound,
				model.ErrorCodeContextNotFound,
				fmt.Sprintf("session %s not found", sessionID),
			)
			return
		}
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error deleting session %s. %v", sessionID, err),
		)
		return
	}

	c.RespondSuccess(nil)
}

// buildExecuteCodeRequest converts a RunCodeRequest to runtime format.
func (c *CodeInterpretingController) buildExecuteCodeRequest(request model.RunCodeRequest) *runtime.ExecuteCodeRequest {
	req := &runtime.ExecuteCodeRequest{
		Language: runtime.Language(request.Context.Language),
		Code:     request.Code,
		Context:  request.Context.ID,
	}

	if req.Language == "" {
		req.Language = runtime.Command
	}

	return req
}

func (c *CodeInterpretingController) interrupt() {
	session := c.ctx.Query("id")
	if session == "" {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeMissingQuery,
			"missing query parameter 'id'",
		)
		return
	}

	err := codeRunner.Interrupt(session)
	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error interruptting code context. %v", err),
		)
		return
	}

	c.RespondSuccess(nil)
}


================================================
FILE: components/execd/pkg/web/controller/codeinterpreting_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"encoding/json"
	"net/http"
	"testing"

	"github.com/gin-gonic/gin"

	"github.com/alibaba/opensandbox/execd/pkg/runtime"
	"github.com/alibaba/opensandbox/execd/pkg/web/model"
	"github.com/stretchr/testify/require"
)

func TestBuildExecuteCodeRequestDefaultsToCommand(t *testing.T) {
	ctrl := &CodeInterpretingController{}
	req := model.RunCodeRequest{
		Code: "echo 1",
		Context: model.CodeContext{
			ID:                 "session-1",
			CodeContextRequest: model.CodeContextRequest{},
		},
	}

	execReq := ctrl.buildExecuteCodeRequest(req)

	require.Equal(t, runtime.Command, execReq.Language, "expected default language")
	require.Equal(t, "session-1", execReq.Context)
	require.Equal(t, "echo 1", execReq.Code)
}

func TestBuildExecuteCodeRequestRespectsLanguage(t *testing.T) {
	ctrl := &CodeInterpretingController{}
	req := model.RunCodeRequest{
		Code: "print(1)",
		Context: model.CodeContext{
			ID: "session-2",
			CodeContextRequest: model.CodeContextRequest{
				Language: "python",
			},
		},
	}

	execReq := ctrl.buildExecuteCodeRequest(req)

	require.Equal(t, runtime.Language("python"), execReq.Language)
}

func TestGetContext_NotFoundReturns404(t *testing.T) {
	ctx, w := newTestContext(http.MethodGet, "/code/contexts/missing", nil)
	ctx.Params = append(ctx.Params, gin.Param{Key: "contextId", Value: "missing"})
	ctrl := NewCodeInterpretingController(ctx)

	previous := codeRunner
	codeRunner = runtime.NewController("", "")
	t.Cleanup(func() { codeRunner = previous })

	ctrl.GetContext()

	require.Equal(t, http.StatusNotFound, w.Code)

	var resp model.ErrorResponse
	require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp))
	require.Equal(t, model.ErrorCodeContextNotFound, resp.Code)
	require.Equal(t, "context missing not found", resp.Message)
}

func TestGetContext_MissingIDReturns400(t *testing.T) {
	ctx, w := newTestContext(http.MethodGet, "/code/contexts/", nil)
	ctrl := NewCodeInterpretingController(ctx)

	ctrl.GetContext()

	require.Equal(t, http.StatusBadRequest, w.Code)

	var resp model.ErrorResponse
	require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp))
	require.Equal(t, model.ErrorCodeMissingQuery, resp.Code)
	require.Equal(t, "missing path parameter 'contextId'", resp.Message)
}


================================================
FILE: components/execd/pkg/web/controller/command.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"context"
	"fmt"
	"net/http"
	"strconv"
	"time"

	"github.com/alibaba/opensandbox/execd/pkg/flag"
	"github.com/alibaba/opensandbox/execd/pkg/runtime"
	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

// RunCommand executes a shell command and streams the output via SSE.
func (c *CodeInterpretingController) RunCommand() {
	var request model.RunCommandRequest
	if err := c.bindJSON(&request); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request, MAYBE invalid body format. %v", err),
		)
		return
	}

	err := request.Validate()
	if err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("invalid request, validation error %v", err),
		)
		return
	}

	ctx, cancel := context.WithCancel(c.ctx.Request.Context())
	defer cancel()

	runCodeRequest := c.buildExecuteCommandRequest(request)
	eventsHandler := c.setServerEventsHandler(ctx)
	runCodeRequest.Hooks = eventsHandler

	c.setupSSEResponse()
	err = codeRunner.Execute(runCodeRequest)
	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error running commands %v", err),
		)
		return
	}

	time.Sleep(flag.ApiGracefulShutdownTimeout)
}

// InterruptCommand stops a running shell command session.
func (c *CodeInterpretingController) InterruptCommand() {
	c.interrupt()
}

// GetCommandStatus returns command status by id.
func (c *CodeInterpretingController) GetCommandStatus() {
	commandID := c.ctx.Param("id")
	if commandID == "" {
		c.RespondError(http.StatusBadRequest, model.ErrorCodeInvalidRequest, "missing command execution id")
		return
	}

	status, err := codeRunner.GetCommandStatus(commandID)
	if err != nil {
		c.RespondError(http.StatusNotFound, model.ErrorCodeInvalidRequest, err.Error())
		return
	}

	resp := model.CommandStatusResponse{
		ID:       status.Session,
		Running:  status.Running,
		ExitCode: status.ExitCode,
		Error:    status.Error,
		Content:  status.Content,
	}
	if !status.StartedAt.IsZero() {
		resp.StartedAt = status.StartedAt
	}
	if status.FinishedAt != nil {
		resp.FinishedAt = status.FinishedAt
	}

	c.RespondSuccess(resp)
}

// GetBackgroundCommandOutput returns accumulated stdout/stderr for a command session as plain text.
func (c *CodeInterpretingController) GetBackgroundCommandOutput() {
	id := c.ctx.Param("id")
	if id == "" {
		c.RespondError(http.StatusBadRequest, model.ErrorCodeMissingQuery, "missing command execution id")
		return
	}

	cursor := c.QueryInt64(c.ctx.Query("cursor"), 0)
	output, lastCursor, err := codeRunner.SeekBackgroundCommandOutput(id, cursor)
	if err != nil {
		c.RespondError(http.StatusBadRequest, model.ErrorCodeInvalidRequest, err.Error())
		return
	}

	c.ctx.Header("EXECD-COMMANDS-TAIL-CURSOR", strconv.FormatInt(lastCursor, 10))
	c.ctx.Header("Content-Type", "text/plain; charset=utf-8")
	c.ctx.String(http.StatusOK, "%s", output)
}

func (c *CodeInterpretingController) buildExecuteCommandRequest(request model.RunCommandRequest) *runtime.ExecuteCodeRequest {
	timeout := time.Duration(request.TimeoutMs) * time.Millisecond
	if request.Background {
		return &runtime.ExecuteCodeRequest{
			Language: runtime.BackgroundCommand,
			Code:     request.Command,
			Cwd:      request.Cwd,
			Timeout:  timeout,
			Gid:      request.Gid,
			Uid:      request.Uid,
			Envs:     request.Envs,
		}
	} else {
		return &runtime.ExecuteCodeRequest{
			Language: runtime.Command,
			Code:     request.Command,
			Cwd:      request.Cwd,
			Timeout:  timeout,
			Gid:      request.Gid,
			Uid:      request.Uid,
			Envs:     request.Envs,
		}
	}
}


================================================
FILE: components/execd/pkg/web/controller/command_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"encoding/json"
	"net/http"
	"net/http/httptest"
	"reflect"
	"testing"

	"github.com/alibaba/opensandbox/execd/pkg/runtime"
	"github.com/alibaba/opensandbox/execd/pkg/web/model"
	"github.com/stretchr/testify/require"
)

func TestBuildExecuteCommandRequestForwardsEnvs(t *testing.T) {
	ctrl := &CodeInterpretingController{}
	envs := map[string]string{"FOO": "bar", "BAZ": "qux"}
	req := model.RunCommandRequest{
		Command: "echo hi",
		Cwd:     "/tmp",
		Envs:    envs,
	}

	execReq := ctrl.buildExecuteCommandRequest(req)

	require.Equal(t, runtime.Command, execReq.Language)
	require.True(t, reflect.DeepEqual(execReq.Envs, envs), "expected envs to be forwarded")
	require.Equal(t, "/tmp", execReq.Cwd)
}

func TestBuildExecuteCommandRequestForwardsEnvsBackground(t *testing.T) {
	ctrl := &CodeInterpretingController{}
	envs := map[string]string{"FOO": "bar"}
	req := model.RunCommandRequest{
		Command:    "echo hi",
		Background: true,
		Envs:       envs,
	}

	execReq := ctrl.buildExecuteCommandRequest(req)

	require.Equal(t, runtime.BackgroundCommand, execReq.Language)
	require.True(t, reflect.DeepEqual(execReq.Envs, envs), "expected envs to be forwarded")
}

func setupCommandController(method, path string) (*CodeInterpretingController, *httptest.ResponseRecorder) {
	ctx, w := newTestContext(method, path, nil)
	ctrl := NewCodeInterpretingController(ctx)
	return ctrl, w
}

func TestGetCommandStatus_MissingID(t *testing.T) {
	ctrl, w := setupCommandController(http.MethodGet, "/command/status/")

	ctrl.GetCommandStatus()

	require.Equal(t, http.StatusBadRequest, w.Code)

	var resp model.ErrorResponse
	require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp))
	require.Equal(t, model.ErrorCodeInvalidRequest, resp.Code)
	require.Equal(t, "missing command execution id", resp.Message)
}

func TestGetBackgroundCommandOutput_MissingID(t *testing.T) {
	ctrl, w := setupCommandController(http.MethodGet, "/command/logs/")

	ctrl.GetBackgroundCommandOutput()

	require.Equal(t, http.StatusBadRequest, w.Code)

	var resp model.ErrorResponse
	require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp))
	require.Equal(t, model.ErrorCodeMissingQuery, resp.Code)
	require.Equal(t, "missing command execution id", resp.Message)
}


================================================
FILE: components/execd/pkg/web/controller/filesystem.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package controller

import (
	"fmt"
	"net/http"
	"os"
	"os/user"
	"path/filepath"
	"strconv"
	"strings"
	"syscall"

	"github.com/gin-gonic/gin"

	"github.com/alibaba/opensandbox/execd/pkg/util/glob"
	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

// FilesystemController handles file system operations
type FilesystemController struct {
	*basicController
}

func NewFilesystemController(ctx *gin.Context) *FilesystemController {
	return &FilesystemController{basicController: newBasicController(ctx)}
}

func (c *FilesystemController) handleFileError(err error) {
	if os.IsNotExist(err) {
		c.RespondError(
			http.StatusNotFound,
			model.ErrorCodeFileNotFound,
			fmt.Sprintf("file not found. %v", err),
		)
	} else {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error accessing file: %v", err),
		)
	}
}

// GetFilesInfo retrieves metadata for specified file paths
func (c *FilesystemController) GetFilesInfo() {
	paths := c.ctx.QueryArray("path")
	if len(paths) == 0 {
		c.RespondSuccess(make(map[string]model.FileInfo))
		return
	}

	resp := make(map[string]model.FileInfo)
	for _, filePath := range paths {
		fileInfo, err := GetFileInfo(filePath)
		if err != nil {
			c.handleFileError(err)
			return
		}
		resp[filePath] = fileInfo
	}

	c.RespondSuccess(resp)
}

// RemoveFiles deletes specified files
func (c *FilesystemController) RemoveFiles() {
	paths := c.ctx.QueryArray("path")
	for _, filePath := range paths {
		if err := DeleteFile(filePath); err != nil {
			c.RespondError(
				http.StatusInternalServerError,
				model.ErrorCodeRuntimeError,
				fmt.Sprintf("error removing file %s. %v", filePath, err),
			)
			return
		}
	}

	c.RespondSuccess(nil)
}

// ChmodFiles changes file permissions for specified files
func (c *FilesystemController) ChmodFiles() {
	var request map[string]model.Permission
	if err := c.bindJSON(&request); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request, MAYBE invalid body format. %v", err),
		)
		return
	}

	for file, item := range request {
		err := ChmodFile(file, item)
		if err != nil {
			c.RespondError(
				http.StatusInternalServerError,
				model.ErrorCodeRuntimeError,
				fmt.Sprintf("error changing permissions for %s. %v", file, err),
			)
			return
		}
	}

	c.RespondSuccess(nil)
}

// RenameFiles renames or moves files to new paths
func (c *FilesystemController) RenameFiles() {
	var request []model.RenameFileItem
	if err := c.bindJSON(&request); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request, MAYBE invalid body format. %v", err),
		)
		return
	}

	for _, renameItem := range request {
		if err := RenameFile(renameItem); err != nil {
			c.handleFileError(err)
			return
		}
	}

	c.RespondSuccess(nil)
}

// MakeDirs creates directories with specified permissions
func (c *FilesystemController) MakeDirs() {
	var request map[string]model.Permission
	if err := c.bindJSON(&request); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request, MAYBE invalid body format. %v", err),
		)
		return
	}

	for dir, perm := range request {
		if err := MakeDir(dir, perm); err != nil {
			c.handleFileError(err)
			return
		}
	}

	c.RespondSuccess(nil)
}

// RemoveDirs recursively removes directories
func (c *FilesystemController) RemoveDirs() {
	paths := c.ctx.QueryArray("path")
	for _, dir := range paths {
		if err := os.RemoveAll(dir); err != nil {
			c.RespondError(
				http.StatusInternalServerError,
				model.ErrorCodeRuntimeError,
				fmt.Sprintf("error removing directory %s. %v", dir, err),
			)
			return
		}
	}

	c.RespondSuccess(nil)
}

// SearchFiles searches for files matching a pattern in a directory
func (c *FilesystemController) SearchFiles() {
	path := c.ctx.Query("path")
	if path == "" {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeMissingQuery,
			"missing query parameter 'path'",
		)
		return
	}

	path, err := filepath.Abs(path)
	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error converting path %s to absolute. %v", path, err),
		)
		return
	}

	_, err = os.Stat(path)
	if err != nil {
		c.handleFileError(err)
		return
	}

	pattern := c.ctx.Query("pattern")
	if pattern == "" {
		pattern = "**"
	}

	files := make([]model.FileInfo, 0, 16)
	err = filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error {
		if os.IsNotExist(err) {
			return nil
		}
		if err != nil {
			return fmt.Errorf("error accessing path %s: %w", filePath, err)
		}
		if info.IsDir() {
			return nil
		}

		match, err := glob.PathMatch(pattern, info.Name())
		if err != nil {
			return fmt.Errorf("invalid pattern %s: %w", pattern, err)
		}

		if match {
			sys := info.Sys().(*syscall.Stat_t)

			owner, err := user.LookupId(strconv.FormatUint(uint64(sys.Uid), 10))
			if err != nil {
				return fmt.Errorf("error lookup owner for file %s: %w", filePath, err)
			}

			group, err := user.LookupGroupId(strconv.FormatUint(uint64(sys.Gid), 10))
			if err != nil {
				return fmt.Errorf("error lookup group for file %s: %w", filePath, err)
			}

			files = append(files, model.FileInfo{
				Path:       filePath,
				Size:       info.Size(),
				ModifiedAt: info.ModTime(),
				CreatedAt:  getFileCreateTime(info),
				Permission: model.Permission{
					Owner: owner.Username,
					Group: group.Name,
					Mode: func() int {
						mode := strconv.FormatInt(int64(info.Mode().Perm()), 8)
						i, _ := strconv.Atoi(mode)
						return i
					}(),
				},
			})
		}

		return nil
	})

	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error searching files. %v", err),
		)
		return
	}

	c.RespondSuccess(files)
}

// ReplaceContent replaces text content in specified files
func (c *FilesystemController) ReplaceContent() {
	var request map[string]model.ReplaceFileContentItem
	if err := c.bindJSON(&request); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request, MAYBE invalid body format. %v", err),
		)
		return
	}

	for file, item := range request {
		file, err := filepath.Abs(file)
		if err != nil {
			c.handleFileError(err)
			return
		}

		if _, err = os.Stat(file); err != nil {
			c.handleFileError(err)
			return
		}

		content, err := os.ReadFile(file)
		if err != nil {
			c.handleFileError(err)
			return
		}

		fileInfo, err := os.Stat(file)
		if err != nil {
			c.handleFileError(err)
			return
		}
		mode := fileInfo.Mode()

		newContent := strings.ReplaceAll(string(content), item.Old, item.New)

		err = os.WriteFile(file, []byte(newContent), mode)
		if err != nil {
			c.handleFileError(err)
			return
		}
	}

	c.RespondSuccess(nil)
}


================================================
FILE: components/execd/pkg/web/controller/filesystem_download.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"path/filepath"
	"strconv"

	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

// DownloadFile serves a file for download with support for range requests.
func (c *FilesystemController) DownloadFile() {
	filePath := c.ctx.Query("path")
	if filePath == "" {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeMissingQuery,
			"missing query parameter 'path'",
		)
		return
	}

	file, err := os.Open(filePath)
	if err != nil {
		c.handleFileError(err)
		return
	}
	defer file.Close()

	fileInfo, err := file.Stat()
	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error getting file stat info: %s. %v", filePath, err),
		)
		return
	}

	c.ctx.Header("Content-Type", "application/octet-stream")
	c.ctx.Header("Content-Disposition", formatContentDisposition(filepath.Base(filePath)))
	c.ctx.Header("Content-Length", strconv.FormatInt(fileInfo.Size(), 10))

	if rangeHeader := c.ctx.GetHeader("Range"); rangeHeader != "" {
		ranges, err := ParseRange(rangeHeader, fileInfo.Size())
		if err != nil {
			c.RespondError(
				http.StatusRequestedRangeNotSatisfiable,
				model.ErrorCodeUnknown,
			)
			return
		}
		if len(ranges) > 0 {
			r := ranges[0]
			c.ctx.Status(http.StatusPartialContent)
			c.ctx.Header("Content-Range", fmt.Sprintf("bytes %d-%d/%d", r.start, r.start+r.length-1, fileInfo.Size()))
			c.ctx.Header("Content-Length", strconv.FormatInt(r.length, 10))

			_, _ = file.Seek(r.start, io.SeekStart)
			_, _ = io.CopyN(c.ctx.Writer, file, r.length)
			return
		}
	}

	http.ServeContent(c.ctx.Writer, c.ctx.Request, filepath.Base(filePath), fileInfo.ModTime(), file)
}

// formatContentDisposition formats the Content-Disposition header value with proper
// encoding for non-ASCII filenames according to RFC 6266 and RFC 5987.
func formatContentDisposition(filename string) string {
	// Check if filename contains non-ASCII characters
	needsEncoding := false
	for _, r := range filename {
		if r > 127 {
			needsEncoding = true
			break
		}
	}

	if !needsEncoding {
		return "attachment; filename=\"" + filename + "\""
	}

	// Use RFC 5987 encoding for non-ASCII filenames
	// Format: attachment; filename="fallback"; filename*=UTF-8''encoded_name
	encodedFilename := url.PathEscape(filename)
	return "attachment; filename=\"" + encodedFilename + "\"; filename*=UTF-8''" + encodedFilename
}


================================================
FILE: components/execd/pkg/web/controller/filesystem_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"encoding/json"
	"fmt"
	"net/http"
	"net/http/httptest"
	"net/url"
	"os"
	"path/filepath"
	"testing"

	"github.com/alibaba/opensandbox/execd/pkg/web/model"
	"github.com/stretchr/testify/require"
)

func newFilesystemController(t *testing.T, method, rawURL string, body []byte) (*FilesystemController, *httptest.ResponseRecorder) {
	t.Helper()
	ctx, rec := newTestContext(method, rawURL, body)
	ctrl := NewFilesystemController(ctx)
	return ctrl, rec
}

func TestFilesystemControllerGetFilesInfo(t *testing.T) {
	tmpDir := t.TempDir()
	target := filepath.Join(tmpDir, "foo.txt")
	require.NoError(t, os.WriteFile(target, []byte("demo"), 0o644))

	query := fmt.Sprintf("/files/info?path=%s", url.QueryEscape(target))
	ctrl, rec := newFilesystemController(t, http.MethodGet, query, nil)

	ctrl.GetFilesInfo()

	require.Equal(t, http.StatusOK, rec.Code)
	var resp map[string]model.FileInfo
	require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
	info, ok := resp[target]
	require.True(t, ok, "response missing entry for %s", target)
	require.NotEmpty(t, info.Path)
	require.NotZero(t, info.Size)
}

func TestFilesystemControllerSearchFiles(t *testing.T) {
	tmpDir := t.TempDir()
	a := filepath.Join(tmpDir, "alpha.txt")
	b := filepath.Join(tmpDir, "beta.log")
	require.NoError(t, os.WriteFile(a, []byte("alpha"), 0o644))
	require.NoError(t, os.WriteFile(b, []byte("beta"), 0o644))

	rawURL := fmt.Sprintf("/files/search?path=%s&pattern=%s", url.QueryEscape(tmpDir), url.QueryEscape("*.txt"))
	ctrl, rec := newFilesystemController(t, http.MethodGet, rawURL, nil)

	ctrl.SearchFiles()

	require.Equal(t, http.StatusOK, rec.Code)
	var files []model.FileInfo
	require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &files))
	require.Len(t, files, 1)
	require.Equal(t, a, files[0].Path)
}

func TestFilesystemControllerReplaceContent(t *testing.T) {
	tmpDir := t.TempDir()
	target := filepath.Join(tmpDir, "content.txt")
	require.NoError(t, os.WriteFile(target, []byte("hello world"), 0o644))

	body, err := json.Marshal(map[string]model.ReplaceFileContentItem{
		target: {
			Old: "world",
			New: "universe",
		},
	})
	require.NoError(t, err)

	ctrl, rec := newFilesystemController(t, http.MethodPost, "/files/replace", body)

	ctrl.ReplaceContent()

	require.Equal(t, http.StatusOK, rec.Code)
	data, err := os.ReadFile(target)
	require.NoError(t, err)
	require.Equal(t, "hello universe", string(data))
}

func TestFilesystemControllerSearchFilesHandlesAbsentDir(t *testing.T) {
	rawURL := "/files/search?path=/not/exists"
	ctrl, rec := newFilesystemController(t, http.MethodGet, rawURL, nil)

	ctrl.SearchFiles()

	require.Equal(t, http.StatusNotFound, rec.Code)
}

func TestReplaceContentFailsUnknownFile(t *testing.T) {
	payload, _ := json.Marshal(map[string]model.ReplaceFileContentItem{
		filepath.Join(t.TempDir(), "missing.txt"): {
			Old: "old",
			New: "new",
		},
	})
	ctrl, rec := newFilesystemController(t, http.MethodPost, "/files/replace", payload)

	ctrl.ReplaceContent()

	require.Contains(t, []int{http.StatusNotFound, http.StatusInternalServerError}, rec.Code, "expected failure status")
}

func TestFormatContentDisposition(t *testing.T) {
	tests := []struct {
		name     string
		filename string
		want     string
	}{
		{
			name:     "ASCII filename",
			filename: "test.txt",
			want:     "attachment; filename=\"test.txt\"",
		},
		{
			name:     "Chinese filename",
			filename: "测试文件.txt",
			want:     "attachment; filename=\"%E6%B5%8B%E8%AF%95%E6%96%87%E4%BB%B6.txt\"; filename*=UTF-8''%E6%B5%8B%E8%AF%95%E6%96%87%E4%BB%B6.txt",
		},
		{
			name:     "Japanese filename",
			filename: "テスト.txt",
			want:     "attachment; filename=\"%E3%83%86%E3%82%B9%E3%83%88.txt\"; filename*=UTF-8''%E3%83%86%E3%82%B9%E3%83%88.txt",
		},
		{
			name:     "Special characters in filename",
			filename: "file with spaces.txt",
			want:     "attachment; filename=\"file with spaces.txt\"",
		},
		{
			name:     "Mixed ASCII and non-ASCII",
			filename: "report-报告.pdf",
			want:     "attachment; filename=\"report-%E6%8A%A5%E5%91%8A.pdf\"; filename*=UTF-8''report-%E6%8A%A5%E5%91%8A.pdf",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got := formatContentDisposition(tt.filename)
			require.Equal(t, tt.want, got)
		})
	}
}


================================================
FILE: components/execd/pkg/web/controller/filesystem_upload.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"os"
	"path/filepath"

	"github.com/alibaba/opensandbox/execd/pkg/log"
	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

// UploadFile uploads files with metadata to specified paths
func (c *FilesystemController) UploadFile() {
	form, err := c.ctx.MultipartForm()
	if err != nil || form == nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidFile,
			"multipart form is empty",
		)
		return
	}

	metadataParts := form.File["metadata"]
	fileParts := form.File["file"]

	if len(metadataParts) == 0 {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidFileMetadata,
			"metadata file is missing",
		)
		return
	}

	if len(fileParts) == 0 {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidFileContent,
			"file is missing",
		)
		return
	}

	if len(metadataParts) != len(fileParts) {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidFile,
			fmt.Sprintf("metadata and file count mismatch: %d vs %d", len(metadataParts), len(fileParts)),
		)
		return
	}

	for i := range metadataParts {
		metadataHeader := metadataParts[i]
		metadataFile, err := metadataHeader.Open()
		if err != nil {
			c.RespondError(
				http.StatusBadRequest,
				model.ErrorCodeInvalidFileMetadata,
				fmt.Sprintf("error opening metadata file. %v", err),
			)
			return
		}

		metaBytes, err := io.ReadAll(metadataFile)
		metadataFile.Close()
		if err != nil {
			c.RespondError(
				http.StatusBadRequest,
				model.ErrorCodeInvalidFileMetadata,
				fmt.Sprintf("error reading metadata content. %v", err),
			)
			return
		}

		var meta model.FileMetadata
		if err := json.Unmarshal(metaBytes, &meta); err != nil {
			c.RespondError(
				http.StatusBadRequest,
				model.ErrorCodeInvalidFileMetadata,
				fmt.Sprintf("invalid metadata format. %v", err),
			)
			return
		}

		targetPath := meta.Path
		if targetPath == "" {
			c.RespondError(
				http.StatusBadRequest,
				model.ErrorCodeInvalidFileMetadata,
				"metadata path is empty",
			)
			return
		}

		targetDir := filepath.Dir(targetPath)
		if err := os.MkdirAll(targetDir, os.ModePerm); err != nil {
			c.RespondError(
				http.StatusInternalServerError,
				model.ErrorCodeRuntimeError,
				fmt.Sprintf("error creating target directory %s. %v", targetDir, err),
			)
			return
		}

		fileHeader := fileParts[i]
		file, err := fileHeader.Open()
		if err != nil {
			c.RespondError(
				http.StatusInternalServerError,
				model.ErrorCodeRuntimeError,
				fmt.Sprintf("error opening file %s. %v", fileHeader.Filename, err),
			)
			return
		}

		dst, err := os.OpenFile(targetPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm)
		if err != nil {
			file.Close()
			c.RespondError(
				http.StatusInternalServerError,
				model.ErrorCodeRuntimeError,
				fmt.Sprintf("error opening destination file %s. %v", targetPath, err),
			)
			return
		}

		if _, err := io.Copy(dst, file); err != nil {
			dst.Close()
			file.Close()
			c.RespondError(
				http.StatusInternalServerError,
				model.ErrorCodeRuntimeError,
				fmt.Sprintf("error copying file %s. %v", targetPath, err),
			)
			return
		}

		if err := dst.Sync(); err != nil {
			log.Error("failed to sync target file: %v", err)
		}
		if err := dst.Close(); err != nil {
			log.Error("failed to close target file: %v", err)
		}
		file.Close()

		if err := ChmodFile(targetPath, meta.Permission); err != nil {
			c.RespondError(
				http.StatusInternalServerError,
				model.ErrorCodeRuntimeError,
				fmt.Sprintf("error chmoding file %s. %v", targetPath, err),
			)
			return
		}
	}

	c.RespondSuccess(nil)
}


================================================
FILE: components/execd/pkg/web/controller/filesystem_windows.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build windows
// +build windows

package controller

import (
	"fmt"
	"net/http"
	"os"
	"path/filepath"
	"strconv"
	"strings"

	"github.com/gin-gonic/gin"

	"github.com/alibaba/opensandbox/execd/pkg/util/glob"
	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

// FilesystemController handles file system operations.
type FilesystemController struct {
	*basicController
}

func NewFilesystemController(ctx *gin.Context) *FilesystemController {
	return &FilesystemController{basicController: newBasicController(ctx)}
}

func (c *FilesystemController) handleFileError(err error) {
	if os.IsNotExist(err) {
		c.RespondError(
			http.StatusNotFound,
			model.ErrorCodeFileNotFound,
			fmt.Sprintf("file not found. %v", err),
		)
	} else {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error accessing file: %v", err),
		)
	}
}

// GetFilesInfo retrieves metadata for specified file paths
func (c *FilesystemController) GetFilesInfo() {
	paths := c.ctx.QueryArray("path")
	if len(paths) == 0 {
		c.RespondSuccess(make(map[string]model.FileInfo))
		return
	}

	resp := make(map[string]model.FileInfo)
	for _, filePath := range paths {
		fileInfo, err := GetFileInfo(filePath)
		if err != nil {
			c.handleFileError(err)
			return
		}
		resp[filePath] = fileInfo
	}

	c.RespondSuccess(resp)
}

// RemoveFiles deletes specified files
func (c *FilesystemController) RemoveFiles() {
	paths := c.ctx.QueryArray("path")
	for _, filePath := range paths {
		if err := DeleteFile(filePath); err != nil {
			c.RespondError(
				http.StatusInternalServerError,
				model.ErrorCodeRuntimeError,
				fmt.Sprintf("error removing file %s. %v", filePath, err),
			)
			return
		}
	}

	c.RespondSuccess(nil)
}

// ChmodFiles changes file permissions for specified files
func (c *FilesystemController) ChmodFiles() {
	var request map[string]model.Permission
	if err := c.bindJSON(&request); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request, MAYBE invalid body format. %v", err),
		)
		return
	}

	for file, item := range request {
		err := ChmodFile(file, item)
		if err != nil {
			c.RespondError(
				http.StatusInternalServerError,
				model.ErrorCodeRuntimeError,
				fmt.Sprintf("error changing permissions for %s. %v", file, err),
			)
			return
		}
	}

	c.RespondSuccess(nil)
}

// RenameFiles renames or moves files to new paths
func (c *FilesystemController) RenameFiles() {
	var request []model.RenameFileItem
	if err := c.bindJSON(&request); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request, MAYBE invalid body format. %v", err),
		)
		return
	}

	for _, renameItem := range request {
		if err := RenameFile(renameItem); err != nil {
			c.handleFileError(err)
			return
		}
	}

	c.RespondSuccess(nil)
}

// MakeDirs creates directories with specified permissions
func (c *FilesystemController) MakeDirs() {
	var request map[string]model.Permission
	if err := c.bindJSON(&request); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request, MAYBE invalid body format. %v", err),
		)
		return
	}

	for dir, perm := range request {
		if err := MakeDir(dir, perm); err != nil {
			c.handleFileError(err)
			return
		}
	}

	c.RespondSuccess(nil)
}

// RemoveDirs recursively removes directories
func (c *FilesystemController) RemoveDirs() {
	paths := c.ctx.QueryArray("path")
	for _, dir := range paths {
		if err := os.RemoveAll(dir); err != nil {
			c.RespondError(
				http.StatusInternalServerError,
				model.ErrorCodeRuntimeError,
				fmt.Sprintf("error removing directory %s. %v", dir, err),
			)
			return
		}
	}

	c.RespondSuccess(nil)
}

// SearchFiles searches for files matching a pattern in a directory
func (c *FilesystemController) SearchFiles() {
	path := c.ctx.Query("path")
	if path == "" {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeMissingQuery,
			"missing query parameter 'path'",
		)
		return
	}

	path, err := filepath.Abs(path)
	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error converting path %s to absolute. %v", path, err),
		)
		return
	}

	_, err = os.Stat(path)
	if err != nil {
		c.handleFileError(err)
		return
	}

	pattern := c.ctx.Query("pattern")
	if pattern == "" {
		pattern = "**"
	}

	files := make([]model.FileInfo, 0, 16)
	err = filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error {
		if os.IsNotExist(err) {
			return nil
		}
		if err != nil {
			return fmt.Errorf("error accessing path %s: %w", filePath, err)
		}
		if info.IsDir() {
			return nil
		}

		match, err := glob.PathMatch(pattern, info.Name())
		if err != nil {
			return fmt.Errorf("invalid pattern %s: %w", pattern, err)
		}

		if match {
			files = append(files, model.FileInfo{
				Path:       filePath,
				Size:       info.Size(),
				ModifiedAt: info.ModTime(),
				CreatedAt:  getFileCreateTime(info),
				Permission: model.Permission{
					Owner: "",
					Group: "",
					Mode: func() int {
						mode := strconv.FormatInt(int64(info.Mode().Perm()), 8)
						i, _ := strconv.Atoi(mode)
						return i
					}(),
				},
			})
		}

		return nil
	})

	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error searching files. %v", err),
		)
		return
	}

	c.RespondSuccess(files)
}

// ReplaceContent replaces text content in specified files
func (c *FilesystemController) ReplaceContent() {
	var request map[string]model.ReplaceFileContentItem
	if err := c.bindJSON(&request); err != nil {
		c.RespondError(
			http.StatusBadRequest,
			model.ErrorCodeInvalidRequest,
			fmt.Sprintf("error parsing request, MAYBE invalid body format. %v", err),
		)
		return
	}

	for file, item := range request {
		file, err := filepath.Abs(file)
		if err != nil {
			c.handleFileError(err)
			return
		}

		if _, err = os.Stat(file); err != nil {
			c.handleFileError(err)
			return
		}

		content, err := os.ReadFile(file)
		if err != nil {
			c.handleFileError(err)
			return
		}

		fileInfo, err := os.Stat(file)
		if err != nil {
			c.handleFileError(err)
			return
		}
		mode := fileInfo.Mode()

		newContent := strings.ReplaceAll(string(content), item.Old, item.New)

		err = os.WriteFile(file, []byte(newContent), mode)
		if err != nil {
			c.handleFileError(err)
			return
		}
	}

	c.RespondSuccess(nil)
}


================================================
FILE: components/execd/pkg/web/controller/metric.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"encoding/json"
	"fmt"
	"net/http"
	"runtime"
	"time"

	"github.com/gin-gonic/gin"
	"github.com/shirou/gopsutil/cpu"
	"github.com/shirou/gopsutil/mem"

	"github.com/alibaba/opensandbox/execd/pkg/log"
	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

// MetricController handles system metrics requests
type MetricController struct {
	*basicController
}

func NewMetricController(ctx *gin.Context) *MetricController {
	return &MetricController{basicController: newBasicController(ctx)}
}

// GetMetrics returns current system metrics
func (c *MetricController) GetMetrics() {
	metrics, err := c.readMetrics()
	if err != nil {
		c.RespondError(
			http.StatusInternalServerError,
			model.ErrorCodeRuntimeError,
			fmt.Sprintf("error reading runtime metrics. %v", err),
		)
		return
	}

	c.RespondSuccess(metrics)
}

// WatchMetrics streams system metrics via SSE
func (c *MetricController) WatchMetrics() {
	c.setupSSEResponse()

	for {
		select {
		case <-c.ctx.Request.Context().Done():
			return
		case <-time.After(time.Second * 1):
			func() {
				if flusher, ok := c.ctx.Writer.(http.Flusher); ok {
					defer flusher.Flush()
				}
				metrics, err := c.readMetrics()
				if err != nil {
					msg, _ := json.Marshal(map[string]string{ //nolint:errchkjson
						"error": err.Error(),
					})
					_, err = c.ctx.Writer.Write(append(msg, '\n'))
					if err != nil {
						log.Error("WatchMetrics write data %s error: %v", string(msg), err)
					}
				} else {
					msg, _ := json.Marshal(metrics) //nolint:errchkjson
					_, err = c.ctx.Writer.Write(append(msg, '\n'))
					if err != nil {
						log.Error("WatchMetrics write data %s error: %v", string(msg), err)
					}
				}
			}()
		}
	}
}

// readMetrics collects current CPU and memory metrics
func (c *MetricController) readMetrics() (*model.Metrics, error) {
	metric := model.NewMetrics()

	metric.CpuCount = float64(runtime.GOMAXPROCS(-1))
	cpuPercent, err := cpu.Percent(time.Second, false)
	if err != nil {
		return nil, fmt.Errorf("failed to get CPU percent: %w", err)
	}
	if len(cpuPercent) > 0 {
		metric.CpuUsedPct = cpuPercent[0]
	}

	vmStat, err := mem.VirtualMemory()
	if err != nil {
		return nil, fmt.Errorf("failed to get memory info: %w", err)
	}
	metric.MemTotalMiB = float64(vmStat.Total) / 1024 / 1024
	metric.MemUsedMiB = float64(vmStat.Used) / 1024 / 1024

	return metric, nil
}


================================================
FILE: components/execd/pkg/web/controller/metric_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"encoding/json"
	"net/http"
	"net/http/httptest"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"

	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

func setupMetricController(method, path string) (*MetricController, *httptest.ResponseRecorder) {
	ctx, w := newTestContext(method, path, nil)
	ctrl := NewMetricController(ctx)
	return ctrl, w
}

// TestReadMetrics exercises readMetrics end-to-end.
func TestReadMetrics(t *testing.T) {
	ctrl := &MetricController{}

	metrics, err := ctrl.readMetrics()

	assert.NoError(t, err)
	assert.NotNil(t, metrics)

	// Validate CPU count
	assert.Greater(t, metrics.CpuCount, 0.0)

	// Validate CPU utilization
	assert.GreaterOrEqual(t, metrics.CpuUsedPct, 0.0)
	assert.Less(t, metrics.CpuUsedPct, 100.1) // CPU usage should be under 100% with small float tolerance

	// Validate memory information
	assert.Greater(t, metrics.MemTotalMiB, 0.0)
	assert.GreaterOrEqual(t, metrics.MemUsedMiB, 0.0)
	assert.LessOrEqual(t, metrics.MemUsedMiB, metrics.MemTotalMiB) // Used memory should not exceed total

	// Validate timestamps
	currentTime := time.Now().UnixMilli()
	oneMinuteAgo := currentTime - 60*1000
	assert.GreaterOrEqual(t, metrics.Timestamp, oneMinuteAgo) // Should be within the last minute
	assert.LessOrEqual(t, metrics.Timestamp, currentTime)     // Should not be in the future
}

// TestGetMetricsEndpoint covers the happy path.
func TestGetMetricsEndpoint(t *testing.T) {
	ctrl, w := setupMetricController("GET", "/api/metrics")

	ctrl.GetMetrics()

	assert.Equal(t, http.StatusOK, w.Code)

	var metrics model.Metrics
	err := json.Unmarshal(w.Body.Bytes(), &metrics)
	assert.NoError(t, err)

	assert.Greater(t, metrics.CpuCount, 0.0)
	assert.GreaterOrEqual(t, metrics.CpuUsedPct, 0.0)
	assert.Greater(t, metrics.MemTotalMiB, 0.0)
	assert.GreaterOrEqual(t, metrics.MemUsedMiB, 0.0)
	assert.NotZero(t, metrics.Timestamp)
}

// TestWatchMetricsHeaders verifies SSE header defaults.
func TestWatchMetricsHeaders(t *testing.T) {
	ctrl, w := setupMetricController("GET", "/api/watch-metrics")

	ctrl.setupSSEResponse()

	contentType := w.Header().Get("Content-Type")
	assert.Equal(t, "text/event-stream", contentType)

	cacheControl := w.Header().Get("Cache-Control")
	assert.Equal(t, "no-cache", cacheControl)

	connection := w.Header().Get("Connection")
	assert.Equal(t, "keep-alive", connection)

	buffering := w.Header().Get("X-Accel-Buffering")
	assert.Equal(t, "no", buffering)
}

// TestMetricSerialization ensures metrics marshal and unmarshal cleanly.
func TestMetricSerialization(t *testing.T) {
	metrics := &model.Metrics{
		CpuCount:    4,
		CpuUsedPct:  25.5,
		MemTotalMiB: 8192,
		MemUsedMiB:  4096,
		Timestamp:   time.Now().UnixMilli(),
	}

	data, err := json.Marshal(metrics)
	assert.NoError(t, err)

	var decodedMetrics model.Metrics
	err = json.Unmarshal(data, &decodedMetrics)
	assert.NoError(t, err)
	assert.Equal(t, metrics.CpuCount, decodedMetrics.CpuCount)
	assert.Equal(t, metrics.CpuUsedPct, decodedMetrics.CpuUsedPct)
	assert.Equal(t, metrics.MemTotalMiB, decodedMetrics.MemTotalMiB)
	assert.Equal(t, metrics.MemUsedMiB, decodedMetrics.MemUsedMiB)
	assert.Equal(t, metrics.Timestamp, decodedMetrics.Timestamp)

	errorMsg := map[string]string{"error": "test error"}
	errorData, err := json.Marshal(errorMsg)
	assert.NoError(t, err)

	var decodedError map[string]string
	err = json.Unmarshal(errorData, &decodedError)
	assert.NoError(t, err)
	assert.Equal(t, "test error", decodedError["error"])
}


================================================
FILE: components/execd/pkg/web/controller/mock_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"bytes"
	"net/http"
)

type mockOutput struct {
	buffer     *bytes.Buffer
	statusCode int
	header     http.Header
}

func (m *mockOutput) Header() http.Header {
	if m.header == nil {
		m.header = make(http.Header)
	}
	return m.header
}

func (m *mockOutput) Write(b []byte) (int, error) {
	return m.buffer.Write(b)
}

func (m *mockOutput) WriteHeader(code int) {
	m.statusCode = code
}

func (m *mockOutput) Status() int {
	return m.statusCode
}

func (m *mockOutput) Body() []byte {
	return m.buffer.Bytes()
}


================================================
FILE: components/execd/pkg/web/controller/ping.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import "github.com/gin-gonic/gin"

// MainController handles basic server operations.
type MainController struct {
	*basicController
}

func NewMainController(ctx *gin.Context) *MainController {
	return &MainController{basicController: newBasicController(ctx)}
}

// Ping checks if the server is alive.
func (c *MainController) Ping() {
	c.RespondSuccess(nil)
}

// PingHandler is the Gin adapter.
func PingHandler(ctx *gin.Context) {
	NewMainController(ctx).Ping()
}


================================================
FILE: components/execd/pkg/web/controller/sse.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"context"
	"io"
	"net/http"
	"time"

	"k8s.io/apimachinery/pkg/util/wait"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
	"github.com/alibaba/opensandbox/execd/pkg/log"
	"github.com/alibaba/opensandbox/execd/pkg/runtime"
	"github.com/alibaba/opensandbox/execd/pkg/util/safego"
	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

var sseHeaders = map[string]string{
	"Content-Type":      "text/event-stream",
	"Cache-Control":     "no-cache",
	"Connection":        "keep-alive",
	"X-Accel-Buffering": "no",
}

func (c *basicController) setupSSEResponse() {
	for key, value := range sseHeaders {
		c.ctx.Writer.Header().Set(key, value)
	}
	if flusher, ok := c.ctx.Writer.(http.Flusher); ok {
		flusher.Flush()
	}
}

// setServerEventsHandler adapts runtime callbacks to SSE events.
func (c *CodeInterpretingController) setServerEventsHandler(ctx context.Context) runtime.ExecuteResultHook {
	return runtime.ExecuteResultHook{
		OnExecuteInit: func(session string) {
			event := model.ServerStreamEvent{
				Type:      model.StreamEventTypeInit,
				Text:      session,
				Timestamp: time.Now().UnixMilli(),
			}
			payload := event.ToJSON()
			c.writeSingleEvent("OnExecuteInit", payload, true, event.Summary())

			safego.Go(func() { c.ping(ctx) })
		},
		OnExecuteResult: func(result map[string]any, count int) {
			var mutated map[string]any
			if len(result) > 0 {
				mutated = make(map[string]any)
				for k, v := range result {
					switch k {
					case "text/plain":
						mutated["text"] = v
					default:
						mutated[k] = v
					}
				}
			}

			if count > 0 {
				event := model.ServerStreamEvent{
					Type:           model.StreamEventTypeCount,
					ExecutionCount: count,
					Timestamp:      time.Now().UnixMilli(),
				}
				payload := event.ToJSON()
				c.writeSingleEvent("OnExecuteResult", payload, true, event.Summary())
			}
			if len(mutated) > 0 {
				event := model.ServerStreamEvent{
					Type:      model.StreamEventTypeResult,
					Results:   mutated,
					Timestamp: time.Now().UnixMilli(),
				}
				payload := event.ToJSON()
				c.writeSingleEvent("OnExecuteResult", payload, true, event.Summary())
			}
		},
		OnExecuteComplete: func(executionTime time.Duration) {
			event := model.ServerStreamEvent{
				Type:          model.StreamEventTypeComplete,
				ExecutionTime: executionTime.Milliseconds(),
				Timestamp:     time.Now().UnixMilli(),
			}
			payload := event.ToJSON()
			c.writeSingleEvent("OnExecuteComplete", payload, true, event.Summary())
		},
		OnExecuteError: func(err *execute.ErrorOutput) {
			if err == nil {
				return
			}

			event := model.ServerStreamEvent{
				Type:      model.StreamEventTypeError,
				Error:     err,
				Timestamp: time.Now().UnixMilli(),
			}
			payload := event.ToJSON()
			c.writeSingleEvent("OnExecuteError", payload, true, event.Summary())
		},
		OnExecuteStatus: func(status string) {
			event := model.ServerStreamEvent{
				Type:      model.StreamEventTypeStatus,
				Text:      status,
				Timestamp: time.Now().UnixMilli(),
			}
			payload := event.ToJSON()
			c.writeSingleEvent("OnExecuteStatus", payload, true, event.Summary())
		},
		OnExecuteStdout: func(text string) {
			if text == "" {
				return
			}

			event := model.ServerStreamEvent{
				Type:      model.StreamEventTypeStdout,
				Text:      text,
				Timestamp: time.Now().UnixMilli(),
			}
			payload := event.ToJSON()
			c.writeSingleEvent("OnExecuteStdout", payload, true, event.Summary())
		},
		OnExecuteStderr: func(text string) {
			if text == "" {
				return
			}

			event := model.ServerStreamEvent{
				Type:      model.StreamEventTypeStderr,
				Text:      text,
				Timestamp: time.Now().UnixMilli(),
			}
			payload := event.ToJSON()
			c.writeSingleEvent("OnExecuteStderr", payload, true, event.Summary())
		},
	}
}

// writeSingleEvent serializes one SSE frame.
func (c *CodeInterpretingController) writeSingleEvent(handler string, data []byte, verbose bool, summary string) {
	if c == nil || c.ctx == nil || c.ctx.Writer == nil {
		return
	}

	select {
	case <-c.ctx.Request.Context().Done():
		log.Error("StreamEvent.%s: client disconnected", handler)
		return
	default:
	}

	c.chunkWriter.Lock()
	defer c.chunkWriter.Unlock()
	defer func() {
		if flusher, ok := c.ctx.Writer.(http.Flusher); ok {
			flusher.Flush()
		}
	}()

	payload := append(data, '\n', '\n')
	n, err := c.ctx.Writer.Write(payload)
	if err == nil && n != len(payload) {
		err = io.ErrShortWrite
	}

	if err != nil {
		log.Error("StreamEvent.%s write data %s error: %v", handler, summary, err)
	} else {
		if verbose {
			log.Info("StreamEvent.%s write data %s", handler, summary)
		}
	}
}

// ping periodically keeps the SSE connection alive.
func (c *CodeInterpretingController) ping(ctx context.Context) {
	wait.Until(func() {
		if c.ctx.Writer == nil {
			return
		}
		event := model.ServerStreamEvent{
			Type:      model.StreamEventTypePing,
			Text:      "pong",
			Timestamp: time.Now().UnixMilli(),
		}
		payload := event.ToJSON()
		c.writeSingleEvent("Ping", payload, false, event.Summary())
	}, 3*time.Second, ctx.Done())
}


================================================
FILE: components/execd/pkg/web/controller/syscall_linux.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build linux
// +build linux

package controller

import (
	"os"
	"syscall"
	"time"
)

func getFileCreateTime(fileInfo os.FileInfo) time.Time {
	stat, ok := fileInfo.Sys().(*syscall.Stat_t)
	if !ok || stat == nil {
		return fileInfo.ModTime()
	}

	return time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec)
}


================================================
FILE: components/execd/pkg/web/controller/syscall_others.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !linux
// +build !linux

package controller

import (
	"os"
	"time"
)

func getFileCreateTime(_ os.FileInfo) time.Time {
	return time.Now()
}


================================================
FILE: components/execd/pkg/web/controller/test_helpers.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"bytes"
	"net/http/httptest"

	"github.com/gin-gonic/gin"
)

// nolint:unused
func newTestContext(method, path string, body []byte) (*gin.Context, *httptest.ResponseRecorder) {
	gin.SetMode(gin.TestMode)
	w := httptest.NewRecorder()
	ctx, _ := gin.CreateTestContext(w)
	req := httptest.NewRequest(method, path, bytes.NewReader(body))
	ctx.Request = req
	return ctx, w
}


================================================
FILE: components/execd/pkg/web/controller/utils.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package controller

import (
	"errors"
	"fmt"
	"os"
	"os/user"
	"path/filepath"
	"strconv"
	"strings"
	"syscall"

	"github.com/alibaba/opensandbox/execd/pkg/log"
	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

func DeleteFile(filePath string) error {
	absPath, err := filepath.Abs(filePath)
	if err != nil {
		return fmt.Errorf("invalid path: %w", err)
	}

	fileInfo, err := os.Stat(absPath)
	if err != nil {
		if os.IsNotExist(err) {
			return nil
		}
		return err
	}

	if fileInfo.IsDir() {
		return fmt.Errorf("path is a directory: %s", filePath)
	}

	if err := os.Remove(absPath); err != nil {
		return fmt.Errorf("failed to remove file: %w", err)
	}

	return nil
}

func ChmodFile(file string, perms model.Permission) error {
	abs, err := filepath.Abs(file)
	if err != nil {
		return err
	}

	if perms.Mode != 0 {
		mode, err := strconv.ParseUint(strconv.Itoa(perms.Mode), 8, 32)
		if err != nil {
			return err
		}
		err = os.Chmod(abs, os.FileMode(mode))
		if err != nil {
			return err
		}
	}
	return SetFileOwnership(abs, perms.Owner, perms.Group)
}

func SetFileOwnership(absPath string, owner string, group string) error {
	uid := -1
	if owner != "" {
		userInfo, err := user.Lookup(owner)
		if err != nil {
			log.Warning("Failed to lookup user %s: %v", owner, err)
		} else {
			uid, err = strconv.Atoi(userInfo.Uid)
			if err != nil {
				log.Warning("Failed to convert uid for user %s: %v", owner, err)
				uid = -1
			}
		}
	}

	gid := -1
	if group != "" {
		groupInfo, err := user.LookupGroup(group)
		if err != nil {
			log.Warning("Failed to lookup group %s: %v", group, err)
		} else {
			gid, err = strconv.Atoi(groupInfo.Gid)
			if err != nil {
				log.Warning("Failed to convert gid for group %s: %v", group, err)
				gid = -1
			}
		}
	}

	if uid == -1 && gid == -1 {
		uid = os.Getuid()
		gid = os.Getgid()
	}

	if err := os.Chown(absPath, uid, gid); err != nil {
		return fmt.Errorf("failed to set owner/group for %s: %w", absPath, err)
	}

	return nil
}

func RenameFile(item model.RenameFileItem) error {
	srcPath, err := filepath.Abs(item.Src)
	if err != nil {
		return fmt.Errorf("invalid source path: %w", err)
	}

	dstPath, err := filepath.Abs(item.Dest)
	if err != nil {
		return fmt.Errorf("invalid destination path: %w", err)
	}

	if _, err := os.Stat(srcPath); os.IsNotExist(err) {
		return fmt.Errorf("source path not found: %s", item.Src)
	}

	dstDir := filepath.Dir(dstPath)

	if err := os.MkdirAll(dstDir, 0755); err != nil {
		return fmt.Errorf("failed to create destination directory: %w", err)
	}

	if _, err := os.Stat(dstPath); err == nil {
		return fmt.Errorf("destination path already exists: %s", item.Dest)
	}

	if err := os.Rename(srcPath, dstPath); err != nil {
		return fmt.Errorf("failed to rename file: %w", err)
	}

	return nil
}

func MakeDir(dir string, perm model.Permission) error {
	abs, err := filepath.Abs(dir)
	if err != nil {
		return err
	}
	err = os.MkdirAll(abs, os.ModePerm)
	if err != nil {
		return err
	}

	return ChmodFile(abs, perm)
}

func GetFileInfo(filePath string) (model.FileInfo, error) {
	absPath, err := filepath.Abs(filePath)
	if err != nil {
		return model.FileInfo{}, fmt.Errorf("invalid path %s: %w", filePath, err)
	}

	fileInfo, err := os.Stat(absPath)
	if err != nil {
		if os.IsNotExist(err) {
			return model.FileInfo{}, fmt.Errorf("file not found: %s", filePath)
		}
		return model.FileInfo{}, fmt.Errorf("error accessing file %s: %w", filePath, err)
	}

	stat := fileInfo.Sys().(*syscall.Stat_t)

	owner := strconv.FormatUint(uint64(stat.Uid), 10)
	if ownerUser, err := user.LookupId(owner); err == nil {
		owner = ownerUser.Username
	}

	group := strconv.FormatUint(uint64(stat.Gid), 10)
	if groupInfo, err := user.LookupGroupId(group); err == nil {
		group = groupInfo.Name
	}

	mode := strconv.FormatInt(int64(fileInfo.Mode().Perm()), 8)

	return model.FileInfo{
		Path:       absPath,
		Size:       fileInfo.Size(),
		ModifiedAt: fileInfo.ModTime(),
		CreatedAt:  getFileCreateTime(fileInfo),
		Permission: model.Permission{
			Owner: owner,
			Group: group,
			Mode:  func() int { i, _ := strconv.Atoi(mode); return i }(),
		},
	}, nil
}

func SearchFileMetadata(metadata map[string]model.FileMetadata, filePath string) (string, model.FileMetadata, bool) {
	base := filepath.Base(filePath)
	for path, info := range metadata {
		if filepath.Base(path) == base {
			return path, info, true
		}
	}

	return "", model.FileMetadata{}, false
}

type httpRange struct {
	start, length int64
}

func ParseRange(s string, size int64) ([]httpRange, error) {
	if !strings.HasPrefix(s, "bytes=") {
		return nil, errors.New("invalid range")
	}

	ranges := strings.Split(s[6:], ",")
	result := make([]httpRange, 0, len(ranges))

	for _, ra := range ranges {
		ra = strings.TrimSpace(ra)
		if ra == "" {
			continue
		}
		i := strings.Index(ra, "-")
		if i < 0 {
			return nil, errors.New("invalid range")
		}
		start, end := strings.TrimSpace(ra[:i]), strings.TrimSpace(ra[i+1:])
		var r httpRange

		if start == "" {
			// suffix-length
			n, err := strconv.ParseInt(end, 10, 64)
			if err != nil || n < 0 {
				return nil, errors.New("invalid range")
			}
			if n > size {
				n = size
			}
			r.start = size - n
			r.length = size - r.start
		} else {
			// start-end
			i, err := strconv.ParseInt(start, 10, 64)
			if err != nil || i < 0 {
				return nil, errors.New("invalid range")
			}
			if end == "" {
				// start-
				r.start = i
				r.length = size - i
			} else {
				// start-end
				j, err := strconv.ParseInt(end, 10, 64)
				if err != nil || j < i {
					return nil, errors.New("invalid range")
				}
				r.start = i
				r.length = j - i + 1
			}
		}
		if r.start >= size {
			continue
		}
		if r.start+r.length > size {
			r.length = size - r.start
		}
		result = append(result, r)
	}
	return result, nil
}


================================================
FILE: components/execd/pkg/web/controller/utils_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"os"
	"path/filepath"
	"reflect"
	"testing"

	"github.com/alibaba/opensandbox/execd/pkg/web/model"
	"github.com/stretchr/testify/require"
)

func TestDeleteFile(t *testing.T) {
	tmp := t.TempDir()
	file := filepath.Join(tmp, "sample.txt")
	require.NoError(t, os.WriteFile(file, []byte("hello"), 0o644))

	require.NoError(t, DeleteFile(file))
	_, err := os.Stat(file)
	require.True(t, os.IsNotExist(err), "expected file removed, got err=%v", err)

	// removing a non-existent file should be a no-op
	require.NoError(t, DeleteFile(file), "expected no error deleting missing file")
}

func TestRenameFile(t *testing.T) {
	tmp := t.TempDir()
	src := filepath.Join(tmp, "src.txt")
	require.NoError(t, os.WriteFile(src, []byte("data"), 0o644))

	dst := filepath.Join(tmp, "nested", "renamed.txt")
	require.NoError(t, RenameFile(model.RenameFileItem{Src: src, Dest: dst}))

	_, err := os.Stat(dst)
	require.NoError(t, err)
	_, err = os.Stat(src)
	require.True(t, os.IsNotExist(err), "expected source removed, got err=%v", err)

	// destination exists -> expect error
	require.NoError(t, os.WriteFile(src, []byte("data"), 0o644))
	require.Error(t, RenameFile(model.RenameFileItem{Src: src, Dest: dst}), "expected error when destination already exists")
}

func TestSearchFileMetadata(t *testing.T) {
	metadata := map[string]model.FileMetadata{
		"/tmp/a/notes.txt": {Path: "/tmp/a/notes.txt"},
		"/tmp/b/readme.md": {Path: "/tmp/b/readme.md"},
	}

	path, info, ok := SearchFileMetadata(metadata, "/any/notes.txt")
	require.True(t, ok, "expected metadata entry")
	require.Equal(t, "/tmp/a/notes.txt", path)
	require.Equal(t, "/tmp/a/notes.txt", info.Path)

	_, _, ok = SearchFileMetadata(metadata, "/foo/unknown.txt")
	require.False(t, ok, "expected no match")
}

func TestParseRange(t *testing.T) {
	tests := []struct {
		name      string
		header    string
		size      int64
		want      []httpRange
		expectErr bool
	}{
		{
			name:   "start-end",
			header: "bytes=0-9",
			size:   20,
			want:   []httpRange{{start: 0, length: 10}},
		},
		{
			name:   "suffix",
			header: "bytes=-5",
			size:   10,
			want:   []httpRange{{start: 5, length: 5}},
		},
		{
			name:      "invalid",
			header:    "bytes=foo",
			size:      10,
			expectErr: true,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got, err := ParseRange(tt.header, tt.size)
			if tt.expectErr {
				require.Error(t, err)
				return
			}
			require.NoError(t, err)
			require.True(t, reflect.DeepEqual(got, tt.want), "got %+v want %+v", got, tt.want)
		})
	}
}


================================================
FILE: components/execd/pkg/web/controller/utils_windows.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build windows
// +build windows

package controller

import (
	"errors"
	"fmt"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"syscall"
	"time"

	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

func DeleteFile(filePath string) error {
	absPath, err := filepath.Abs(filePath)
	if err != nil {
		return fmt.Errorf("invalid path: %w", err)
	}

	fileInfo, err := os.Stat(absPath)
	if err != nil {
		if os.IsNotExist(err) {
			return nil
		}
		return err
	}

	if fileInfo.IsDir() {
		return fmt.Errorf("path is a directory: %s", filePath)
	}

	if err := os.Remove(absPath); err != nil {
		return fmt.Errorf("failed to remove file: %w", err)
	}

	return nil
}

func ChmodFile(file string, perms model.Permission) error {
	abs, err := filepath.Abs(file)
	if err != nil {
		return err
	}

	if perms.Mode != 0 {
		mode, err := strconv.ParseUint(strconv.Itoa(perms.Mode), 8, 32)
		if err != nil {
			return err
		}
		err = os.Chmod(abs, os.FileMode(mode))
		if err != nil {
			return err
		}
	}
	return SetFileOwnership(abs, perms.Owner, perms.Group)
}

// SetFileOwnership is a placeholder on Windows where POSIX ownership is not supported.
func SetFileOwnership(_ string, _ string, _ string) error {
	// TODO: add Windows ACL support if needed.
	return nil
}

func RenameFile(item model.RenameFileItem) error {
	srcPath, err := filepath.Abs(item.Src)
	if err != nil {
		return fmt.Errorf("invalid source path: %w", err)
	}

	dstPath, err := filepath.Abs(item.Dest)
	if err != nil {
		return fmt.Errorf("invalid destination path: %w", err)
	}

	if _, err := os.Stat(srcPath); os.IsNotExist(err) {
		return fmt.Errorf("source path not found: %s", item.Src)
	}

	dstDir := filepath.Dir(dstPath)

	if err := os.MkdirAll(dstDir, 0755); err != nil {
		return fmt.Errorf("failed to create destination directory: %w", err)
	}

	if _, err := os.Stat(dstPath); err == nil {
		return fmt.Errorf("destination path already exists: %s", item.Dest)
	}

	if err := os.Rename(srcPath, dstPath); err != nil {
		return fmt.Errorf("failed to rename file: %w", err)
	}

	return nil
}

func MakeDir(dir string, perm model.Permission) error {
	abs, err := filepath.Abs(dir)
	if err != nil {
		return err
	}
	err = os.MkdirAll(abs, os.ModePerm)
	if err != nil {
		return err
	}

	return ChmodFile(abs, perm)
}

func GetFileInfo(filePath string) (model.FileInfo, error) {
	absPath, err := filepath.Abs(filePath)
	if err != nil {
		return model.FileInfo{}, fmt.Errorf("invalid path %s: %w", filePath, err)
	}

	fileInfo, err := os.Stat(absPath)
	if err != nil {
		if os.IsNotExist(err) {
			return model.FileInfo{}, fmt.Errorf("file not found: %s", filePath)
		}
		return model.FileInfo{}, fmt.Errorf("error accessing file %s: %w", filePath, err)
	}

	createdAt := getFileCreateTime(fileInfo)
	if data, ok := fileInfo.Sys().(*syscall.Win32FileAttributeData); ok && data != nil {
		createdAt = time.Unix(0, data.CreationTime.Nanoseconds())
	}

	mode := strconv.FormatInt(int64(fileInfo.Mode().Perm()), 8)

	return model.FileInfo{
		Path:       absPath,
		Size:       fileInfo.Size(),
		ModifiedAt: fileInfo.ModTime(),
		CreatedAt:  createdAt,
		Permission: model.Permission{
			Owner: "",
			Group: "",
			Mode: func() int {
				i, _ := strconv.Atoi(mode)
				return i
			}(),
		},
	}, nil
}

func SearchFileMetadata(metadata map[string]model.FileMetadata, filePath string) (string, model.FileMetadata, bool) {
	base := filepath.Base(filePath)
	for path, info := range metadata {
		if filepath.Base(path) == base {
			return path, info, true
		}
	}

	return "", model.FileMetadata{}, false
}

type httpRange struct {
	start, length int64
}

func ParseRange(s string, size int64) ([]httpRange, error) {
	if !strings.HasPrefix(s, "bytes=") {
		return nil, errors.New("invalid range")
	}

	ranges := strings.Split(s[6:], ",")
	result := make([]httpRange, 0, len(ranges))

	for _, ra := range ranges {
		ra = strings.TrimSpace(ra)
		if ra == "" {
			continue
		}
		i := strings.Index(ra, "-")
		if i < 0 {
			return nil, errors.New("invalid range")
		}
		start, end := strings.TrimSpace(ra[:i]), strings.TrimSpace(ra[i+1:])
		var r httpRange

		if start == "" {
			// suffix-length
			n, err := strconv.ParseInt(end, 10, 64)
			if err != nil || n < 0 {
				return nil, errors.New("invalid range")
			}
			if n > size {
				n = size
			}
			r.start = size - n
			r.length = size - r.start
		} else {
			// start-end
			i, err := strconv.ParseInt(start, 10, 64)
			if err != nil || i < 0 {
				return nil, errors.New("invalid range")
			}
			if end == "" {
				// start-
				r.start = i
				r.length = size - i
			} else {
				// start-end
				j, err := strconv.ParseInt(end, 10, 64)
				if err != nil || j < i {
					return nil, errors.New("invalid range")
				}
				r.start = i
				r.length = j - i + 1
			}
		}
		if r.start >= size {
			continue
		}
		if r.start+r.length > size {
			r.length = size - r.start
		}
		result = append(result, r)
	}
	return result, nil
}


================================================
FILE: components/execd/pkg/web/model/codeinterpreting.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package model

import (
	"encoding/json"
	"errors"
	"fmt"
	"strings"

	"github.com/go-playground/validator/v10"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
)

// RunCodeRequest represents a code execution request.
type RunCodeRequest struct {
	Context CodeContext `json:"context,omitempty"`
	Code    string      `json:"code" validate:"required"`
}

func (r *RunCodeRequest) Validate() error {
	validate := validator.New()
	return validate.Struct(r)
}

// CodeContext tracks session metadata.
type CodeContext struct {
	ID                 string `json:"id,omitempty"`
	CodeContextRequest `json:",inline"`
}

type CodeContextRequest struct {
	Language string `json:"language,omitempty"`
	Cwd      string `json:"cwd,omitempty"`
}

// RunCommandRequest represents a shell command execution request.
type RunCommandRequest struct {
	Command    string `json:"command" validate:"required"`
	Cwd        string `json:"cwd,omitempty"`
	Background bool   `json:"background,omitempty"`
	// TimeoutMs caps execution duration; 0 uses server default.
	TimeoutMs int64 `json:"timeout,omitempty" validate:"omitempty,gte=1"`

	Uid  *uint32           `json:"uid,omitempty"`
	Gid  *uint32           `json:"gid,omitempty"`
	Envs map[string]string `json:"envs,omitempty"`
}

func (r *RunCommandRequest) Validate() error {
	validate := validator.New()
	if err := validate.Struct(r); err != nil {
		return err
	}
	if r.Gid != nil && r.Uid == nil {
		return errors.New("uid is required when gid is provided")
	}
	return nil
}

type ServerStreamEventType string

const (
	StreamEventTypeInit     ServerStreamEventType = "init"
	StreamEventTypeStatus   ServerStreamEventType = "status"
	StreamEventTypeError    ServerStreamEventType = "error"
	StreamEventTypeStdout   ServerStreamEventType = "stdout"
	StreamEventTypeStderr   ServerStreamEventType = "stderr"
	StreamEventTypeResult   ServerStreamEventType = "result"
	StreamEventTypeComplete ServerStreamEventType = "execution_complete"
	StreamEventTypeCount    ServerStreamEventType = "execution_count"
	StreamEventTypePing     ServerStreamEventType = "ping"
)

// ServerStreamEvent is emitted to clients over SSE.
type ServerStreamEvent struct {
	Type           ServerStreamEventType `json:"type,omitempty"`
	Text           string                `json:"text,omitempty"`
	ExecutionCount int                   `json:"execution_count,omitempty"`
	ExecutionTime  int64                 `json:"execution_time,omitempty"`
	Timestamp      int64                 `json:"timestamp,omitempty"`
	Results        map[string]any        `json:"results,omitempty"`
	Error          *execute.ErrorOutput  `json:"error,omitempty"`
}

// ToJSON serializes the event for streaming.
func (s ServerStreamEvent) ToJSON() []byte {
	bytes, _ := json.Marshal(s)
	return bytes
}

// Summary renders a lightweight, log-friendly string without JSON.
func (s ServerStreamEvent) Summary() string {
	parts := []string{fmt.Sprintf("type=%s", s.Type)}
	if s.Text != "" {
		parts = append(parts, fmt.Sprintf("text=%s", truncateString(s.Text, 100)))
	}
	if s.ExecutionTime > 0 {
		parts = append(parts, fmt.Sprintf("elapsed_ms=%d", s.ExecutionTime))
	}
	if len(s.Results) > 0 {
		parts = append(parts, fmt.Sprintf("results=%d", len(s.Results)))
	}
	if s.Error != nil {
		errLabel := s.Error.EName
		if errLabel == "" {
			errLabel = "error"
		}
		parts = append(parts, fmt.Sprintf("error=%s: %s", errLabel, truncateString(s.Error.EValue, 80)))
	}
	return strings.Join(parts, " ")
}

func truncateString(value string, maxCount int) string {
	if maxCount <= 0 || len(value) <= maxCount {
		return value
	}
	return value[:maxCount] + "..."
}


================================================
FILE: components/execd/pkg/web/model/codeinterpreting_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package model

import (
	"encoding/json"
	"strings"
	"testing"

	"github.com/alibaba/opensandbox/execd/pkg/jupyter/execute"
	"github.com/stretchr/testify/require"
)

func TestRunCodeRequestValidate(t *testing.T) {
	req := RunCodeRequest{
		Code: "print('hi')",
	}
	require.NoError(t, req.Validate())

	req.Code = ""
	require.Error(t, req.Validate(), "expected validation error when code is empty")
}

func TestRunCommandRequestValidate(t *testing.T) {
	req := RunCommandRequest{Command: "ls"}
	require.NoError(t, req.Validate(), "expected command validation success")

	req.TimeoutMs = -100
	require.Error(t, req.Validate(), "expected validation error when timeout is negative")

	req.TimeoutMs = 0
	req.Command = "ls"
	require.NoError(t, req.Validate(), "expected success when timeout is omitted/zero")

	req.TimeoutMs = 10
	req.Command = ""
	require.Error(t, req.Validate(), "expected validation error when command is empty")
}

func ptr32(v uint32) *uint32 { return &v }

func TestRunCommandRequestValidateUidGid(t *testing.T) {
	// uid-only: valid
	req := RunCommandRequest{Command: "id", Uid: ptr32(1000)}
	require.NoError(t, req.Validate(), "expected success with uid only")

	// uid + gid: valid
	req = RunCommandRequest{Command: "id", Uid: ptr32(1000), Gid: ptr32(1000)}
	require.NoError(t, req.Validate(), "expected success with uid and gid")

	// gid-only: must be rejected
	req = RunCommandRequest{Command: "id", Gid: ptr32(1000)}
	require.Error(t, req.Validate(), "expected validation error when gid is set without uid")
}

func TestServerStreamEventToJSON(t *testing.T) {
	event := ServerStreamEvent{
		Type:           StreamEventTypeStdout,
		Text:           "hello",
		ExecutionCount: 3,
	}

	data := event.ToJSON()
	var decoded ServerStreamEvent
	require.NoError(t, json.Unmarshal(data, &decoded))
	require.Equal(t, event.Type, decoded.Type)
	require.Equal(t, event.Text, decoded.Text)
	require.Equal(t, event.ExecutionCount, decoded.ExecutionCount)
}

func TestServerStreamEventSummary(t *testing.T) {
	longText := strings.Repeat("a", 120)
	tests := []struct {
		name     string
		event    ServerStreamEvent
		contains []string
	}{
		{
			name: "basic stdout",
			event: ServerStreamEvent{
				Type:           StreamEventTypeStdout,
				Text:           "hello",
				ExecutionCount: 2,
			},
			contains: []string{"type=stdout", "text=hello"},
		},
		{
			name: "truncated text and error",
			event: ServerStreamEvent{
				Type:  StreamEventTypeError,
				Text:  longText,
				Error: &execute.ErrorOutput{EName: "ValueError", EValue: "boom"},
			},
			contains: []string{
				"type=error",
				"text=" + strings.Repeat("a", 100) + "...",
				"error=ValueError: boom",
			},
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			summary := tt.event.Summary()
			for _, want := range tt.contains {
				require.Containsf(t, summary, want, "summary missing %q", want)
			}
		})
	}
}


================================================
FILE: components/execd/pkg/web/model/command.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package model

import "time"

// CommandStatusResponse represents command status for REST APIs.
type CommandStatusResponse struct {
	ID         string     `json:"id"`
	Content    string     `json:"content,omitempty"`
	Running    bool       `json:"running"`
	ExitCode   *int       `json:"exit_code,omitempty"`
	Error      string     `json:"error,omitempty"`
	StartedAt  time.Time  `json:"started_at,omitempty"`
	FinishedAt *time.Time `json:"finished_at,omitempty"`
}


================================================
FILE: components/execd/pkg/web/model/error.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package model

type ErrorCode string

const (
	ErrorCodeInvalidRequest      ErrorCode = "INVALID_REQUEST_BODY"
	ErrorCodeMissingQuery        ErrorCode = "MISSING_QUERY"
	ErrorCodeRuntimeError        ErrorCode = "RUNTIME_ERROR"
	ErrorCodeInvalidFile         ErrorCode = "INVALID_FILE"
	ErrorCodeInvalidFileContent  ErrorCode = "INVALID_FILE_CONTENT"
	ErrorCodeInvalidFileMetadata ErrorCode = "INVALID_FILE_METADATA"
	ErrorCodeFileNotFound        ErrorCode = "FILE_NOT_FOUND"
	ErrorCodeUnknown             ErrorCode = "UNKNOWN"
	ErrorCodeContextNotFound     ErrorCode = "CONTEXT_NOT_FOUND"
)

type ErrorResponse struct {
	Code    ErrorCode `json:"code,omitempty"`
	Message string    `json:"message,omitempty"`
}


================================================
FILE: components/execd/pkg/web/model/filesystem.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package model

import "time"

// FileInfo represents file metadata including path and permissions
type FileInfo struct {
	Path       string    `json:"path,omitempty"`
	Size       int64     `json:"size"`
	ModifiedAt time.Time `json:"modified_at,omitempty"`
	CreatedAt  time.Time `json:"created_at,omitempty"`
	Permission `json:",inline"`
}

type FileMetadata struct {
	Path       string `json:"path,omitempty"`
	Permission `json:",inline"`
}

// Permission represents file ownership and mode
type Permission struct {
	Owner string `json:"owner"`
	Group string `json:"group"`
	Mode  int    `json:"mode"`
}

// RenameFileItem represents a file rename operation
type RenameFileItem struct {
	Src  string `json:"src,omitempty"`
	Dest string `json:"dest,omitempty"`
}

// ReplaceFileContentItem represents a content replacement operation
type ReplaceFileContentItem struct {
	Old string `json:"old,omitempty"`
	New string `json:"new,omitempty"`
}


================================================
FILE: components/execd/pkg/web/model/header.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package model

const (
	// ApiAccessTokenHeader carries the auth token.
	ApiAccessTokenHeader = "X-EXECD-ACCESS-TOKEN"
)


================================================
FILE: components/execd/pkg/web/model/metric.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package model

import "time"

// Metrics represents system resource usage metrics
type Metrics struct {
	CpuCount    float64 `json:"cpu_count"`
	CpuUsedPct  float64 `json:"cpu_used_pct"`
	MemTotalMiB float64 `json:"mem_total_mib"`
	MemUsedMiB  float64 `json:"mem_used_mib"`
	Timestamp   int64   `json:"timestamp"`
}

func NewMetrics() *Metrics {
	return &Metrics{
		CpuCount:    0,
		CpuUsedPct:  0,
		MemTotalMiB: 0,
		MemUsedMiB:  0,
		Timestamp:   time.Now().UnixMilli(),
	}
}


================================================
FILE: components/execd/pkg/web/model/session.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package model

import (
	"github.com/go-playground/validator/v10"
)

// CreateSessionRequest is the request body for creating a bash session.
type CreateSessionRequest struct {
	Cwd string `json:"cwd,omitempty"`
}

// CreateSessionResponse is the response for create_session.
type CreateSessionResponse struct {
	SessionID string `json:"session_id"`
}

// RunInSessionRequest is the request body for running code in an existing session.
type RunInSessionRequest struct {
	Code      string `json:"code" validate:"required"`
	Cwd       string `json:"cwd,omitempty"`
	TimeoutMs int64  `json:"timeout_ms,omitempty" validate:"omitempty,gte=0"`
}

// Validate validates RunInSessionRequest.
func (r *RunInSessionRequest) Validate() error {
	validate := validator.New()
	return validate.Struct(r)
}


================================================
FILE: components/execd/pkg/web/proxy.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package web

import (
	"net"
	"net/http"
	"net/http/httputil"
	"net/url"
	"strings"
	"time"

	"github.com/gin-gonic/gin"

	"github.com/alibaba/opensandbox/execd/pkg/log"
)

func ProxyMiddleware() gin.HandlerFunc {
	return func(c *gin.Context) {
		if !strings.HasPrefix(c.Request.URL.Path, "/proxy/") {
			c.Next()
			return
		}

		r := c.Request
		w := c.Writer

		rest := strings.TrimPrefix(r.URL.Path, "/proxy/")
		parts := strings.SplitN(rest, "/", 2)
		if len(parts) == 0 || parts[0] == "" {
			http.Error(w, "port is required", http.StatusBadRequest)
			c.Abort()
			return
		}

		port := parts[0]
		path := "/"
		if len(parts) == 2 && parts[1] != "" {
			path += parts[1]
		}

		target := &url.URL{
			Scheme: "http",
			Host:   "127.0.0.1:" + port,
			Path:   path,
		}

		isWebSocket := strings.ToLower(r.Header.Get("Upgrade")) == "websocket"

		proxy := httputil.NewSingleHostReverseProxy(target)
		// Flush SSE chunks promptly; a small interval avoids buffering breaks chunked streams.
		proxy.FlushInterval = 200 * time.Millisecond

		proxy.Director = func(req *http.Request) {
			req.URL.Scheme = "http"
			req.URL.Host = "127.0.0.1:" + port
			req.URL.Path = path
			req.URL.RawQuery = r.URL.RawQuery
			req.URL.RawPath = ""
			req.RequestURI = ""

			req.Header.Set("X-Forwarded-For", getClientIP(r))
			req.Header.Set("X-Forwarded-Proto", "http")
			req.Header.Del("X-Forwarded-Host")

			if isWebSocket {
				req.Header.Set("Connection", "Upgrade")
				req.Header.Set("Upgrade", "websocket")
				req.Header.Set("Sec-WebSocket-Version", "13")
				if key := r.Header.Get("Sec-WebSocket-Key"); key != "" {
					req.Header.Set("Sec-WebSocket-Key", key)
				}
			}
		}

		proxy.Transport = &http.Transport{
			DialContext: (&net.Dialer{
				Timeout:   600 * time.Second,
				KeepAlive: 30 * time.Second,
			}).DialContext,
			MaxIdleConns:        100,
			MaxIdleConnsPerHost: 100,
			IdleConnTimeout:     600 * time.Second,
		}

		proxy.ErrorHandler = func(rw http.ResponseWriter, req *http.Request, err error) {
			log.Error("Proxy error: %v, request: %s %s", err, req.Method, req.RequestURI)
			http.Error(rw, "Bad Gateway", http.StatusBadGateway)
		}

		log.Info("Proxy: %s %s -> %s (WebSocket: %v)", r.Method, r.RequestURI, target.Host, isWebSocket)

		proxy.ServeHTTP(w, r)
		c.Abort()
	}
}

func getClientIP(r *http.Request) string {
	if ip := r.Header.Get("X-Forwarded-For"); ip != "" {
		return strings.Split(ip, ",")[0]
	}
	if ip := r.Header.Get("X-Real-IP"); ip != "" {
		return ip
	}
	return r.RemoteAddr
}


================================================
FILE: components/execd/pkg/web/router.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package web

import (
	"net/http"

	"github.com/gin-gonic/gin"

	"github.com/alibaba/opensandbox/execd/pkg/log"
	"github.com/alibaba/opensandbox/execd/pkg/web/controller"
	"github.com/alibaba/opensandbox/execd/pkg/web/model"
)

// NewRouter builds a Gin engine with all execd routes.
func NewRouter(accessToken string) *gin.Engine {
	gin.SetMode(gin.ReleaseMode)
	r := gin.New()
	r.Use(gin.Recovery())
	r.Use(logMiddleware(), accessTokenMiddleware(accessToken), ProxyMiddleware())

	r.GET("/ping", controller.PingHandler)

	files := r.Group("/files")
	{
		files.DELETE("", withFilesystem(func(c *controller.FilesystemController) { c.RemoveFiles() }))
		files.GET("/info", withFilesystem(func(c *controller.FilesystemController) { c.GetFilesInfo() }))
		files.POST("/mv", withFilesystem(func(c *controller.FilesystemController) { c.RenameFiles() }))
		files.POST("/permissions", withFilesystem(func(c *controller.FilesystemController) { c.ChmodFiles() }))
		files.GET("/search", withFilesystem(func(c *controller.FilesystemController) { c.SearchFiles() }))
		files.POST("/replace", withFilesystem(func(c *controller.FilesystemController) { c.ReplaceContent() }))
		files.POST("/upload", withFilesystem(func(c *controller.FilesystemController) { c.UploadFile() }))
		files.GET("/download", withFilesystem(func(c *controller.FilesystemController) { c.DownloadFile() }))
	}

	directories := r.Group("/directories")
	{
		directories.POST("", withFilesystem(func(c *controller.FilesystemController) { c.MakeDirs() }))
		directories.DELETE("", withFilesystem(func(c *controller.FilesystemController) { c.RemoveDirs() }))
	}

	code := r.Group("/code")
	{
		code.POST("", withCode(func(c *controller.CodeInterpretingController) { c.RunCode() }))
		code.DELETE("", withCode(func(c *controller.CodeInterpretingController) { c.InterruptCode() }))
		code.POST("/context", withCode(func(c *controller.CodeInterpretingController) { c.CreateContext() }))
		code.GET("/contexts", withCode(func(c *controller.CodeInterpretingController) { c.ListContexts() }))
		code.DELETE("/contexts", withCode(func(c *controller.CodeInterpretingController) { c.DeleteContextsByLanguage() }))
		code.DELETE("/contexts/:contextId", withCode(func(c *controller.CodeInterpretingController) { c.DeleteContext() }))
		code.GET("/contexts/:contextId", withCode(func(c *controller.CodeInterpretingController) { c.GetContext() }))
	}

	session := r.Group("/session")
	{
		session.POST("", withCode(func(c *controller.CodeInterpretingController) { c.CreateSession() }))
		session.POST("/:sessionId/run", withCode(func(c *controller.CodeInterpretingController) { c.RunInSession() }))
		session.DELETE("/:sessionId", withCode(func(c *controller.CodeInterpretingController) { c.DeleteSession() }))
	}

	command := r.Group("/command")
	{
		command.POST("", withCode(func(c *controller.CodeInterpretingController) { c.RunCommand() }))
		command.DELETE("", withCode(func(c *controller.CodeInterpretingController) { c.InterruptCommand() }))
		command.GET("/status/:id", withCode(func(c *controller.CodeInterpretingController) { c.GetCommandStatus() }))
		command.GET("/:id/logs", withCode(func(c *controller.CodeInterpretingController) { c.GetBackgroundCommandOutput() }))
	}

	metric := r.Group("/metrics")
	{
		metric.GET("", withMetric(func(c *controller.MetricController) { c.GetMetrics() }))
		metric.GET("/watch", withMetric(func(c *controller.MetricController) { c.WatchMetrics() }))
	}

	return r
}

func withFilesystem(fn func(*controller.FilesystemController)) gin.HandlerFunc {
	return func(ctx *gin.Context) {
		fn(controller.NewFilesystemController(ctx))
	}
}

func withCode(fn func(*controller.CodeInterpretingController)) gin.HandlerFunc {
	return func(ctx *gin.Context) {
		fn(controller.NewCodeInterpretingController(ctx))
	}
}

func withMetric(fn func(*controller.MetricController)) gin.HandlerFunc {
	return func(ctx *gin.Context) {
		fn(controller.NewMetricController(ctx))
	}
}

func accessTokenMiddleware(token string) gin.HandlerFunc {
	return func(ctx *gin.Context) {
		if token == "" {
			ctx.Next()
			return
		}

		requestedToken := ctx.GetHeader(model.ApiAccessTokenHeader)
		if requestedToken == "" || requestedToken != token {
			ctx.AbortWithStatusJSON(http.StatusUnauthorized, map[string]any{
				"error": "Unauthorized: invalid or missing header " + model.ApiAccessTokenHeader,
			})
			return
		}

		ctx.Next()
	}
}

func logMiddleware() gin.HandlerFunc {
	return func(ctx *gin.Context) {
		log.Info("Requested: %v - %v", ctx.Request.Method, ctx.Request.URL.String())
		ctx.Next()
	}
}


================================================
FILE: components/execd/tests/jupyter.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


export JUPYTER_PORT=54321
export JUPYTER_TOKEN=opensandboxexecdintegrationtest

install_jupyter() {
	# install jupyter notebook for integration testing
	python --version
	pip install ipykernel jupyter

	echo "Starting jupyter notebook ..."
	jupyter notebook --ip=0.0.0.0 --port=$JUPYTER_PORT --allow-root --no-browser --NotebookApp.token=$JUPYTER_TOKEN >/tmp/jupyter.log 2>&1 &

	sleep 3
}


================================================
FILE: components/execd/tests/smoke.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


set -euxo pipefail

source tests/jupyter.sh
install_jupyter

export EXECD_API_GRACE_SHUTDOWN=500ms
export EXECD_LOG_FILE=execd.log
./bin/execd -jupyter-host=http://127.0.0.1:${JUPYTER_PORT} --jupyter-token=${JUPYTER_TOKEN} --log-level=7 >startup.log 2>&1 &


================================================
FILE: components/execd/tests/smoke_api.py
================================================
#!/usr/bin/env python3

# Copyright 2025 Alibaba Group Holding Ltd.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Simple smoke tests for execd APIs.

Prerequisites:
- execd server running locally (default http://localhost:44772)
- Optional: set env BASE_URL to override
- Optional: set env API_TOKEN if server expects X-EXECD-ACCESS-TOKEN
"""

import json
import os
import sys
import time
import uuid
import tempfile
import pathlib

import requests

BASE_URL = os.environ.get("BASE_URL", "http://localhost:44772").rstrip("/")
API_TOKEN = os.environ.get("API_TOKEN")

HEADERS = {}
if API_TOKEN:
    HEADERS["X-EXECD-ACCESS-TOKEN"] = API_TOKEN

session = requests.Session()
session.headers.update(HEADERS)


def expect(cond: bool, msg: str):
    if not cond:
        raise SystemExit(msg)


def sse_get_command_id() -> str:
    url = f"{BASE_URL}/command"
    payload = {"command": "echo smoke-command && sleep 1", "background": True}
    with session.post(url, json=payload, stream=True, timeout=15) as resp:
        expect(resp.status_code == 200, f"SSE start failed: {resp.status_code} {resp.text}")
        for line in resp.iter_lines():
            if not line or not line.startswith(b"data:"):
                # controller emits raw JSON lines without SSE 'data:' prefix
                try:
                    data = json.loads(line.decode())
                except Exception:
                    continue
            else:
                data = json.loads(line[len(b"data:") :].decode())
            if data.get("type") == "init":
                cmd_id = data.get("text")
                expect(cmd_id, "missing command id in init event")
                return cmd_id
    raise SystemExit("Failed to obtain command id from SSE")


def wait_status(cmd_id: str, timeout: float = 15.0) -> dict:
    url = f"{BASE_URL}/command/status/{cmd_id}"
    deadline = time.time() + timeout
    last = None
    while time.time() < deadline:
        r = session.get(url, timeout=5)
        expect(r.status_code == 200, f"status failed: {r.status_code} {r.text}")
        last = r.json()
        if not last.get("running", True):
            return last
        time.sleep(0.3)
    return last


def fetch_logs(cmd_id: str, cursor: int = 0):
    url = f"{BASE_URL}/command/{cmd_id}/logs"
    r = session.get(url, params={"cursor": cursor}, timeout=10)
    expect(r.status_code == 200, f"logs failed: {r.status_code} {r.text}")
    return r.text, r.headers.get("EXECD-COMMANDS-TAIL-CURSOR")


def sse_disconnect_should_stop_ping():
    """
    Open an SSE stream for a long-running command, receive init, then close the
    client side early to ensure the server handles disconnects (ping loop should
    stop). We verify the server is still responsive afterwards.
    """
    url = f"{BASE_URL}/command"
    payload = {
        # long command so the server would keep pinging if not cancelled
        "command": "sh -c 'echo long-run-start && sleep 20 && echo long-run-end'",
        "background": False,
    }

    with session.post(url, json=payload, stream=True, timeout=10) as resp:
        expect(resp.status_code == 200, f"SSE start failed: {resp.status_code} {resp.text}")
        for line in resp.iter_lines():
            if not line:
                continue
            try:
                if line.startswith(b"data:"):
                    data = json.loads(line[len(b"data:") :].decode())
                else:
                    data = json.loads(line.decode())
            except Exception:
                continue
            if data.get("type") == "init":
                break
        # explicitly close to simulate client drop
        resp.close()

    # Give server a moment to observe disconnect and ensure API remains healthy
    time.sleep(1)
    pong = session.get(f"{BASE_URL}/ping", timeout=5)
    expect(pong.status_code == 200, "ping failed after SSE disconnect")


def upload_and_download():
    tmp_dir = f"/tmp/execd-smoke-{uuid.uuid4().hex}"
    path = f"{tmp_dir}/hello.txt"
    metadata = json.dumps({"path": path})
    files = {
        "metadata": ("metadata", metadata, "application/json"),
        "file": ("file", b"hello execd\n", "application/octet-stream"),
    }
    up = session.post(f"{BASE_URL}/files/upload", files=files, timeout=10)
    expect(up.status_code == 200, f"upload failed: {up.status_code} {up.text}")

    down = session.get(f"{BASE_URL}/files/download", params={"path": path}, timeout=10)
    expect(down.status_code == 200, f"download failed: {down.status_code} {down.text}")
    expect(down.content == b"hello execd\n", "downloaded content mismatch")


def filesystem_smoke():
    base_dir = os.path.join(tempfile.gettempdir(), f"execd-smoke-{uuid.uuid4().hex}")
    sub_dir = os.path.join(base_dir, "sub")
    file_path = os.path.join(sub_dir, "hello.txt")
    renamed_path = os.path.join(sub_dir, "hello_renamed.txt")

    # create dirs
    mk = session.post(f"{BASE_URL}/directories", json={sub_dir: {"mode": 0}}, timeout=10)
    expect(mk.status_code == 200, f"mkdir failed: {mk.status_code} {mk.text}")

    # upload a file
    metadata = json.dumps({"path": file_path})
    files = {
        "metadata": ("metadata", metadata, "application/json"),
        "file": ("file", b"hello execd\n", "application/octet-stream"),
    }
    up = session.post(f"{BASE_URL}/files/upload", files=files, timeout=10)
    expect(up.status_code == 200, f"upload failed: {up.status_code} {up.text}")

    # get info
    info = session.get(f"{BASE_URL}/files/info", params={"path": [file_path]}, timeout=10)
    expect(info.status_code == 200, f"info failed: {info.status_code} {info.text}")

    # search
    search = session.get(f"{BASE_URL}/files/search", params={"path": base_dir, "pattern": "*.txt"}, timeout=10)
    expect(search.status_code == 200, f"search failed: {search.status_code} {search.text}")
    found = False
    for f in search.json():
        p = f.get("path")
        if not p:
            continue
        if pathlib.Path(p).resolve() == pathlib.Path(file_path).resolve():
            found = True
            break
    expect(found, "search did not find file")

    # replace content
    rep = session.post(
        f"{BASE_URL}/files/replace",
        json={file_path: {"old": "hello", "new": "hi"}},
        timeout=10,
    )
    expect(rep.status_code == 200, f"replace failed: {rep.status_code} {rep.text}")

    # download to verify replace
    down = session.get(f"{BASE_URL}/files/download", params={"path": file_path}, timeout=10)
    expect(down.status_code == 200, f"download failed: {down.status_code} {down.text}")
    expect(down.content == b"hi execd\n", "replace content mismatch")

    # chmod (mode only)
    chmod = session.post(f"{BASE_URL}/files/permissions", json={file_path: {"mode": 644}}, timeout=10)
    expect(chmod.status_code == 200, f"chmod failed: {chmod.status_code} {chmod.text}")

    # rename
    mv = session.post(
        f"{BASE_URL}/files/mv",
        json=[{"src": file_path, "dest": renamed_path}],
        timeout=10,
    )
    expect(mv.status_code == 200, f"rename failed: {mv.status_code} {mv.text}")

    # remove file
    rm_file = session.delete(f"{BASE_URL}/files", params={"path": [renamed_path]}, timeout=10)
    expect(rm_file.status_code == 200, f"remove file failed: {rm_file.status_code} {rm_file.text}")

    # remove dir
    rm_dir = session.delete(f"{BASE_URL}/directories", params={"path": [base_dir]}, timeout=10)
    expect(rm_dir.status_code == 200, f"remove dir failed: {rm_dir.status_code} {rm_dir.text}")


def main():
    print(f"[+] base: {BASE_URL}")
    r = session.get(f"{BASE_URL}/ping", timeout=5)
    expect(r.status_code == 200, "ping failed")
    print("[+] ping ok")

    sse_disconnect_should_stop_ping()
    print("[+] SSE disconnect handled")

    cmd_id = sse_get_command_id()
    print(f"[+] command id: {cmd_id}")

    status = wait_status(cmd_id)
    print(f"[+] status: {status}")

    logs, cursor = fetch_logs(cmd_id, cursor=0)
    print(f"[+] logs (cursor={cursor}):\n{logs}")

    filesystem_smoke()
    print("[+] filesystem APIs ok")

    print("[+] smoke tests PASS")


if __name__ == "__main__":
    try:
        main()
    except SystemExit as exc:
        print(f"[!] smoke tests FAIL: {exc}", file=sys.stderr)
        sys.exit(1)

================================================
FILE: components/ingress/.golangci.yml
================================================
run:
  skip-dirs:
    - vendor
    - tests
    - scripts
  skip-files:
    - .*/zz_generated.deepcopy.go
    - .*/mock/*.go
  tests: false
  timeout: 10m
linters-settings:
  funlen:
    lines: 500
    statements: 200
  gocyclo:
    min-complexity: 40
  gosimple:
    checks: ["S1019", "S1002"]
  staticcheck:
    checks: ["SA4006"]
  govet:
    enable:
      - asmdecl
      - assign
      - atomic
      - atomicalign
      - bools
      - buildtag
      - cgocall
      - copylocks
      - deepequalerrors
      - errorsas
      - findcall
      - framepointer
      - httpresponse
      - ifaceassert
      - lostcancel
      - nilfunc
      - nilness
      - reflectvaluecompare
      - shift
      - sigchanyzer
      - sortslice
      - stdmethods
      - stringintconv
      - testinggoroutine
      - tests
      - unmarshal
      - unreachable
      - unsafeptr
      - unusedresult
      - printf
    disable:
      - composites
      - loopclosure
      - fieldalignment
      - shadow
      - structtag
      - unusedwrite
  errcheck:
    exclude-functions:
    - flag.Set
    - os.Setenv
    - os.Unsetenv
    - logger.Sync
    - fmt.Fprintf
    - fmt.Fprintln
    - (io.Closer).Close
    - (io.ReadCloser).Close
    - (k8s.io/client-go/tools/cache.SharedInformer).AddEventHandler
  nestif:
    # 复杂度大于32的认为阻塞
    min-complexity: 32
  goconst:
    # Minimal length of string constant.
    # Default: 3
    min-len: 3
    # Minimum occurrences of constant string count to trigger issue.
    # Default: 3
    min-occurrences: 3
    # Ignore test files.
    # Default: false
    ignore-tests: true
    match-constant: false
    numbers: true
    min: 2
    max: 10
    ignore-calls: true
  gosec:
    includes:
      - G101 # Look for hard coded credentials
      - G102 # Bind to all interfaces
      - G103 # Audit the use of unsafe block
      - G104 # Audit errors not checked
      - G106 # Audit the use of ssh.InsecureIgnoreHostKey
      - G107 # Url provided to HTTP request as taint input
      - G108 # Profiling endpoint automatically exposed on /debug/pprof
      - G109 # Potential Integer overflow made by strconv.Atoi result conversion to int16/32
      - G110 # Potential DoS vulnerability via decompression bomb
      - G111 # Potential directory traversal
      - G112 # Potential slowloris attack
      - G113 # Usage of Rat.SetString in math/big with an overflow (CVE-2022-23772)
      # - G114 # Use of net/http serve function that has no support for setting timeouts
      - G201 # SQL query construction using format string
      - G202 # SQL query construction using string concatenation
      - G203 # Use of unescaped data in HTML templates
      #- G204 # Audit use of command execution
      - G301 # Poor file permissions used when creating a directory
      - G302 # Poor file permissions used with chmod
      - G303 # Creating tempfile using a predictable path
      - G304 # File path provided as taint input
      - G305 # File traversal when extracting zip/tar archive
      - G306 # Poor file permissions used when writing to a new file
      - G307 # Deferring a method which returns an error
      #- G401 # Detect the usage of DES, RC4, MD5 or SHA1
      - G402 # Look for bad TLS connection settings
      - G403 # Ensure minimum RSA key length of 2048 bits
      - G404 # Insecure random number source (rand)
      #- G501 # Import blocklist: crypto/md5
      - G502 # Import blocklist: crypto/des
      - G503 # Import blocklist: crypto/rc4
      - G504 # Import blocklist: net/http/cgi
      - G505 # Import blocklist: crypto/sha1
      - G601 # Implicit memory aliasing of items from a range statement
    # Exclude generated files
    # Default: false
    exclude-generated: true
    # Filter out the issues with a lower severity than the given value.
    # Valid options are: low, medium, high.
    # Default: low
    severity: medium
    # Filter out the issues with a lower confidence than the given value.
    # Valid options are: low, medium, high.
    # Default: low
    confidence: medium
    # Concurrency value.
    # Default: the number of logical CPUs usable by the current process.
    concurrency: 12
    # To specify the configuration of rules.
    config:
      # Globals are applicable to all rules.
      global:
        nosec: true
        show-ignored: true
        audit: true
      G101:
        # Regexp pattern for variables and constants to find.
        # Default: "(?i)passwd|pass|password|pwd|secret|token|pw|apiKey|bearer|cred"
        pattern: "(?i)example"
        # If true, complain about all cases (even with low entropy).
        # Default: false
        ignore_entropy: false
        # Maximum allowed entropy of the string.
        # Default: "80.0"
        entropy_threshold: "80.0"
        per_char_threshold: "3.0"
        truncate: "32"
      G104:
        fmt:
          - Fscanf
      G111:
        # Regexp pattern to find potential directory traversal.
        # Default: "http\\.Dir\\(\"\\/\"\\)|http\\.Dir\\('\\/'\\)"
        pattern: "custom\\.Dir\\(\\)"
      # Maximum allowed permissions mode for os.Mkdir and os.MkdirAll
      # Default: "0750"
      G301: "0750"
      # Maximum allowed permissions mode for os.OpenFile and os.Chmod
      # Default: "0600"
      G302: "0600"
      # Maximum allowed permissions mode for os.WriteFile and ioutil.WriteFile
      # Default: "0600"
      G306: "0600"
  nilnil:
    checked-types:
      - ptr
      - map
      - chan
  depguard:
    rules:
      prevent_unmaintained_packages:
        list-mode: lax # allow unless explicitely denied
        files:
          - $all
          - "!$test"
        allow:
          - $gostd
          - path/filepath
        deny:
          - pkg: io/ioutil
            desc: "replaced by io and os packages since Go 1.16: https://tip.golang.org/doc/go1.16#ioutil"
          - pkg: path
            desc: "replaced by cross-platform package path/filepath"
  gci:
    # Section configuration to compare against.
    # Section names are case-insensitive and may contain parameters in ().
    # The default order of sections is `standard > default > custom > blank > dot > alias > localmodule`,
    # If `custom-order` is `true`, it follows the order of `sections` option.
    # Default: ["standard", "default"]
    sections:
      - standard # Standard section: captures all standard packages.
      - default # Default section: contains all imports that could not be matched to another section type.:
      - prefix(github.com/org/project) # Custom section: groups all imports with the specified Prefix.
      - blank # Blank section: contains all blank imports. This section is not present unless explicitly enabled.
      - dot # Dot section: contains all dot imports. This section is not present unless explicitly enabled.
      - localmodule # Local module section: contains all local packages. This section is not present unless explicitly enabled.
    # Skip generated files.
    # Default: true
    skip-generated: true
    # Enable custom order of sections.
    # If `true`, make the section order the same as the order of `sections`.
    # Default: false
    custom-order: true
    # Drops lexical ordering for custom sections.
    # Default: false
    no-lex-order: true
  forbidigo:
    forbid:
      # Forbid spew Dump, whether it is called as function or method.
      # Depends on analyze-types below.
      - ^spew\.(ConfigState\.)?Dump$
      # The package name might be ambiguous.
      # The full import path can be used as additional criteria.
      # Depends on analyze-types below.
      - p: ^v1.Dump$
        pkg: ^example.com/pkg/api/v1$

linters:
  enable:
    - asasalint
    - asciicheck
    - bidichk
    - bodyclose
    # - cyclop
    - decorder
    - depguard
    - errcheck
    # - errchkjson
    - errorlint
    - forbidigo
    # - forcetypeassert
    - funlen
    - ineffassign
    - gocognit
    - gocyclo
    - goheader
    - gomodguard
    - goprintffuncname
    - gosimple
    - gosec
    - grouper
    - importas
    - maintidx
    - misspell
    - nakedret
    - nilerr
    - nilnil
    # - noctx
    - nosprintfhostport
    - paralleltest
    - predeclared
    # - promlinter
    - reassign
    - sqlclosecheck
    - staticcheck
    - tenv
    - testpackage
    - tparallel
    # del
    # - typecheck
    - usestdlibvars
    - nestif
    - unused
    - makezero
    - govet
    - goconst
    - gci
    # - rowserrcheck
    # 1.59 version no new lints
    # 1.58 version new lints
    # - fatcontext
    - canonicalheader
    # 1.57 version new lints
    - copyloopvar
    - intrange
    # 1.56 version new lints
    - spancheck
    # 1.55 version new lints
    - gochecksumtype
    - perfsprint
    - sloglint
    - testifylint
    - mirror
    - zerologlint
    # 1.51 version new lints
    - gocheckcompilerdirectives
    # 1.50 version new lints
    - testableexamples

issues:
  # Note: path identifiers are regular expressions, hence the \.go suffixes.
  exclude-rules:
    - path: main\.go
      linters:
        - forbidigo
    - path: _test\.go
      linters:
        - dogsled
        - errcheck
        - goconst
        - gosec
        - ineffassign
        - maintidx
        - typecheck
    - path: \.go$
      text: "should have a package comment"
    - path: \.go$
      text: 'exported (.+) should have comment( \(or a comment on this block\))? or be unexported'
    - path: \.go$
      text: "fmt.Sprintf can be replaced with string concatenation"


================================================
FILE: components/ingress/DEVELOPMENT.md
================================================
# Development Guide (Quick)

## Prerequisites
- Go 1.24+
- Docker (optional, for image build)
- Access to a Kubernetes cluster with BatchSandbox CRD installed.

## Install deps
```bash
cd components/ingress
go mod tidy && go mod vendor
```

## Build & Run
```bash
make build          # binary at bin/ingress with ldflags version info
./bin/ingress \
  --namespace <target-namespace> \
  --port 28888 \
  --log-level info
```

## Tests & Lint
```bash
make test           # go test ./...
go vet ./...        # included in make build
```

## Docker (with build args)
```bash
docker build \
  --build-arg VERSION=$(git describe --tags --always --dirty) \
  --build-arg GIT_COMMIT=$(git rev-parse HEAD) \
  --build-arg BUILD_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") \
  -t opensandbox/ingress:dev .
```

## Key Paths
- `main.go` — entrypoint, HTTP routes, provider initialization.
- `pkg/proxy/` — HTTP/WebSocket reverse proxy logic.
- `pkg/sandbox/` — Sandbox provider abstraction and BatchSandbox implementation.
- `version/` — build metadata (ldflags).

## Tips
- Health check: `/status.ok`
- Proxy endpoint: `/` (routes based on `OpenSandbox-Ingress-To` header or Host)
- Env overrides: `VERSION/GIT_COMMIT/BUILD_TIME` usable via Makefile and build.sh.
- BatchSandbox must have `sandbox.opensandbox.io/endpoints` annotation with JSON array of IPs.


================================================
FILE: components/ingress/Dockerfile
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM golang:1.24.0 AS builder

WORKDIR /build

ARG VERSION=dev
ARG GIT_COMMIT=unknown
ARG BUILD_TIME=unknown

COPY kubernetes ./kubernetes
# Prepare local modules to satisfy replace directives.
COPY components/internal/go.mod components/internal/go.sum ./components/internal/
COPY components/ingress/go.mod components/ingress/go.sum ./components/ingress/

WORKDIR /build

RUN cd components/internal && go mod download
RUN cd components/ingress && go mod download

# Copy sources.
COPY components/internal ./components/internal
COPY components/ingress/. ./components/ingress

WORKDIR /build/components/ingress

RUN CGO_ENABLED=0 go build \
    -ldflags "-X 'github.com/alibaba/opensandbox/internal/version.Version=${VERSION}' \
              -X 'github.com/alibaba/opensandbox/internal/version.BuildTime=${BUILD_TIME}' \
              -X 'github.com/alibaba/opensandbox/internal/version.GitCommit=${GIT_COMMIT}'" \
    -o /build/ingress ./main.go

FROM alpine:latest

COPY --from=builder /build/ingress .

ENTRYPOINT ["./ingress"]


================================================
FILE: components/ingress/Makefile
================================================
.PHONY: fmt
fmt: ## Run go fmt against code.
	go fmt ./...

.PHONY: vet
vet: ## Run go vet against code.
	go mod tidy && go mod vendor
	go vet ./...

.PHONY: test
test: vet ## Run tests
	go test -v -coverpkg=./... ./pkg/...

##@ Linter

.PHONY: install-golint
install-golint:
	@if ! command -v golangci-lint &> /dev/null; then \
  		echo "installing golangci-lint..."; \
  		go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest; \
  	else \
  	    echo "golangci-lint already installed"; \
	fi

.PHONY: golint
golint: fmt install-golint
	golangci-lint run -v --fix ./...

VERSION ?= $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
GIT_COMMIT ?= $(shell git rev-parse HEAD 2>/dev/null || echo "unknown")
BUILD_TIME ?= $(shell date -u +"%Y-%m-%dT%H:%M:%SZ")
LDFLAGS := -X 'github.com/alibaba/opensandbox/internal/version.Version=$(VERSION)' \
	-X 'github.com/alibaba/opensandbox/internal/version.BuildTime=$(BUILD_TIME)' \
	-X 'github.com/alibaba/opensandbox/internal/version.GitCommit=$(GIT_COMMIT)'

.PHONY: build
build: vet ## Build the binary.
	@mkdir -p bin
	go build -ldflags "$(LDFLAGS)" -o bin/router main.go

.PHONY: clean
clean: ## Clean build artifacts.
	rm -rf bin/ vendor/


================================================
FILE: components/ingress/README.md
================================================
# OpenSandbox Ingress

## Overview
- HTTP/WebSocket reverse proxy that routes to sandbox instances.
- Watches sandbox CRs (BatchSandbox or AgentSandbox, chosen by `--provider-type`) in a target Namespace:
  - BatchSandbox: reads endpoints from `sandbox.opensandbox.io/endpoints` annotation.
  - AgentSandbox: reads `status.serviceFQDN`.
- Exposes `/status.ok` health check; prints build metadata (version, commit, time, Go/platform) at startup.

## Quick Start
```bash
go run main.go \
  --namespace <target-namespace> \
  --provider-type <batchsandbox|agent-sandbox> \
  --mode <header|uri> \
  --port 28888 \
  --log-level info
```
Endpoints: `/` (proxy), `/status.ok` (health).

## Routing Modes

The ingress supports two routing modes for discovering sandbox instances:

### Header Mode (default: `--mode header`)

Routes requests based on the `OpenSandbox-Ingress-To` header or the `Host` header.

**Format:**
- Header: `OpenSandbox-Ingress-To: <sandbox-id>-<port>`
- Host: `<sandbox-id>-<port>.<domain>`

**Example:**
```bash
# Using OpenSandbox-Ingress-To header
curl -H "OpenSandbox-Ingress-To: my-sandbox-8080" https://ingress.opensandbox.io/api/users

# Using Host header
curl -H "Host: my-sandbox-8080.example.com" https://ingress.opensandbox.io/api/users
```

**Parsing logic:**
- Extracts sandbox ID and port from the format `<sandbox-id>-<port>`
- The last segment after the last `-` is treated as the port
- Everything before the last `-` is treated as the sandbox ID

### URI Mode (`--mode uri`)

Routes requests based on the URI path structure.

**Format:**

`/<sandbox-id>/<sandbox-port>/<path-to-request>`

**Example:**
```bash
# Request to sandbox "my-sandbox" on port 8080, forwarding to /api/users
curl https://ingress.opensandbox.io/my-sandbox/8080/api/users

# WebSocket example
wss://ingress.opensandbox.io/my-sandbox/8080/ws
```

**Parsing logic:**
- First path segment: sandbox ID
- Second path segment: sandbox port
- Remaining path: forwarded to the target sandbox as the request URI
- If no remaining path is provided, defaults to `/`

**Use cases:**
- When you cannot modify HTTP headers
- When you need path-based routing
- For simpler client configuration without custom headers

## Auto-Renew on Ingress Access (OSEP-0009)

When enabled, the ingress publishes **renew-intent** events to a Redis list on each proxied request (after resolving the sandbox). The OpenSandbox server consumes these events and may extend sandbox expiration for sandboxes that opted in at creation time. See [OSEP-0009](https://github.com/alibaba/opensandbox/blob/main/oseps/0009-auto-renew-sandbox-on-ingress-access.md) for the full design.

**Requirements:** The server must have auto-renew and Redis consumer enabled; the sandbox must be created with `extensions["auto_renew_on_access"]="true"`. This feature is best-effort and disabled by default.

| Flag | Default | Description |
|------|---------|-------------|
| `--renew-intent-enabled` | `false` | Enable publishing renew-intent events to Redis |
| `--renew-intent-redis-dsn` | `redis://127.0.0.1:6379/0` | Redis DSN (may include `user:password@`) |
| `--renew-intent-queue-key` | `opensandbox:renew:intent` | Redis List key for intent payloads |
| `--renew-intent-queue-max-len` | `0` | Max list length (0 = no cap); LTRIM applied when &gt; 0 |
| `--renew-intent-min-interval` | `60` | Min seconds between intents per sandbox (client-side throttle) |

**Example (with Redis):**
```bash
go run main.go \
  --namespace opensandbox \
  --renew-intent-enabled \
  --renew-intent-redis-dsn "redis://user:pass@redis:6379/0" \
  --renew-intent-min-interval 120
```

## Build
```bash
cd components/ingress
make build
# override build metadata if needed
VERSION=1.2.3 GIT_COMMIT=$(git rev-parse HEAD) BUILD_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") make build
```

## Docker Build
Dockerfile already wires ldflags via build args:
```bash
docker build \
  --build-arg VERSION=$(git describe --tags --always --dirty) \
  --build-arg GIT_COMMIT=$(git rev-parse HEAD) \
  --build-arg BUILD_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") \
  -t opensandbox/ingress:local .
```

## Multi-arch Publish Script
`build.sh` uses buildx to build/push linux/amd64 and linux/arm64:
```bash
cd components/ingress
TAG=local VERSION=1.2.3 GIT_COMMIT=abc BUILD_TIME=2025-01-01T00:00:00Z bash build.sh
```

## Runtime Requirements
- Access to Kubernetes API (in-cluster or via KUBECONFIG).
- If `--provider-type=batchsandbox`: BatchSandbox CRs in the specified Namespace with `sandbox.opensandbox.io/endpoints` annotation containing Pod IPs.
- If `--provider-type=agent-sandbox`: AgentSandbox CRs with `status.serviceFQDN` populated.

## Implementation Notes

### Header Mode Behavior
- Routing key priority: `OpenSandbox-Ingress-To` header first, otherwise Host parsing `<sandbox-name>-<port>.*`.
- Sandbox name extracted from request is used to query the sandbox CR (BatchSandbox or AgentSandbox) via informer cache:
  - BatchSandbox → endpoints annotation.
  - AgentSandbox → `status.serviceFQDN`.
- The original request path is preserved and forwarded to the target sandbox.

### URI Mode Behavior
- Routing information is extracted from the URI path: `/<sandbox-id>/<sandbox-port>/<path-to-request>`.
- The sandbox ID and port are extracted from the first two path segments.
- The remaining path (`/<path-to-request>`) is forwarded to the target sandbox as the request URI.
- If no remaining path is provided, the request URI defaults to `/`.

### Commons
- Error handling:
  - `ErrSandboxNotFound` (sandbox resource not exists) → HTTP 404
  - `ErrSandboxNotReady` (not enough replicas, missing endpoints, invalid config) → HTTP 503
  - Other errors (K8s API errors, etc.) → HTTP 502
- WebSocket path forwards essential headers and X-Forwarded-*; HTTP path strips `OpenSandbox-Ingress-To` before proxying (header mode only).

## Development & Tests
```bash
cd components/ingress
go test ./...
```
Key code:
- `main.go`: entrypoint and handlers.
- `pkg/proxy/`: HTTP/WebSocket proxy logic, sandbox endpoint resolution.
- `pkg/sandbox/`: Sandbox provider abstraction and BatchSandbox implementation.
- `version/`: build metadata output (populated via ldflags).


================================================
FILE: components/ingress/build.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

TAG=${TAG:-latest}
VERSION=${VERSION:-$(git describe --tags --always --dirty 2>/dev/null || echo "dev")}
GIT_COMMIT=${GIT_COMMIT:-$(git rev-parse HEAD 2>/dev/null || echo "unknown")}
BUILD_TIME=${BUILD_TIME:-$(date -u +"%Y-%m-%dT%H:%M:%SZ")}

REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || realpath "$(dirname "$0")/../..")
cd "${REPO_ROOT}"

docker buildx rm ingress-builder || true

docker buildx create --use --name ingress-builder

docker buildx inspect --bootstrap

docker buildx ls

LATEST_TAGS=()
if [[ "${TAG}" == v* ]]; then
  LATEST_TAGS+=(-t opensandbox/ingress:latest -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/ingress:latest)
fi

docker buildx build \
  -t opensandbox/ingress:${TAG} \
  -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/ingress:${TAG} \
  "${LATEST_TAGS[@]}" \
  -f components/ingress/Dockerfile \
  --build-arg VERSION="${VERSION}" \
  --build-arg GIT_COMMIT="${GIT_COMMIT}" \
  --build-arg BUILD_TIME="${BUILD_TIME}" \
  --platform linux/amd64,linux/arm64 \
  --push \
  .


================================================
FILE: components/ingress/go.mod
================================================
module github.com/alibaba/opensandbox/ingress

go 1.24.0

require (
	github.com/alibaba/OpenSandbox/sandbox-k8s v0.0.0
	github.com/alibaba/opensandbox/internal v0.0.0
	github.com/alicebob/miniredis/v2 v2.37.0
	github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674
	github.com/redis/go-redis/v9 v9.18.0
	github.com/stretchr/testify v1.11.1
	k8s.io/apimachinery v0.34.3
	k8s.io/client-go v0.34.3
	knative.dev/pkg v0.0.0-20260120122510-4a022ed9999a
)

require (
	github.com/blendle/zapdriver v1.3.1 // indirect
	github.com/cespare/xxhash/v2 v2.3.0 // indirect
	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
	github.com/emicklei/go-restful/v3 v3.12.2 // indirect
	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
	github.com/go-logr/logr v1.4.3 // indirect
	github.com/go-logr/stdr v1.2.2 // indirect
	github.com/go-openapi/jsonpointer v0.21.0 // indirect
	github.com/go-openapi/jsonreference v0.21.0 // indirect
	github.com/go-openapi/swag v0.23.0 // indirect
	github.com/gogo/protobuf v1.3.2 // indirect
	github.com/google/gnostic-models v0.7.0 // indirect
	github.com/google/go-cmp v0.7.0 // indirect
	github.com/google/uuid v1.6.0 // indirect
	github.com/hashicorp/golang-lru v1.0.2 // indirect
	github.com/josharian/intern v1.0.0 // indirect
	github.com/json-iterator/go v1.1.12 // indirect
	github.com/kelseyhightower/envconfig v1.4.0 // indirect
	github.com/mailru/easyjson v0.9.0 // indirect
	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
	github.com/pkg/errors v0.9.1 // indirect
	github.com/pmezard/go-difflib v1.0.0 // indirect
	github.com/spf13/pflag v1.0.10 // indirect
	github.com/x448/float16 v0.8.4 // indirect
	github.com/yuin/gopher-lua v1.1.1 // indirect
	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
	go.opentelemetry.io/otel v1.40.0 // indirect
	go.opentelemetry.io/otel/metric v1.40.0 // indirect
	go.opentelemetry.io/otel/trace v1.40.0 // indirect
	go.uber.org/atomic v1.11.0 // indirect
	go.uber.org/multierr v1.11.0 // indirect
	go.uber.org/zap v1.27.1 // indirect
	go.yaml.in/yaml/v2 v2.4.3 // indirect
	go.yaml.in/yaml/v3 v3.0.4 // indirect
	golang.org/x/net v0.49.0 // indirect
	golang.org/x/oauth2 v0.32.0 // indirect
	golang.org/x/sync v0.19.0 // indirect
	golang.org/x/sys v0.40.0 // indirect
	golang.org/x/term v0.39.0 // indirect
	golang.org/x/text v0.33.0 // indirect
	golang.org/x/time v0.10.0 // indirect
	gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect
	google.golang.org/protobuf v1.36.10 // indirect
	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
	gopkg.in/inf.v0 v0.9.1 // indirect
	gopkg.in/yaml.v3 v3.0.1 // indirect
	k8s.io/api v0.34.3 // indirect
	k8s.io/klog/v2 v2.130.1 // indirect
	k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect
	k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect
	sigs.k8s.io/controller-runtime v0.21.0 // indirect
	sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
	sigs.k8s.io/randfill v1.0.0 // indirect
	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
	sigs.k8s.io/yaml v1.6.0 // indirect
)

replace github.com/alibaba/OpenSandbox/sandbox-k8s => ../../kubernetes

replace github.com/alibaba/opensandbox/internal => ../internal


================================================
FILE: components/ingress/go.sum
================================================
github.com/alicebob/miniredis/v2 v2.37.0 h1:RheObYW32G1aiJIj81XVt78ZHJpHonHLHW7OLIshq68=
github.com/alicebob/miniredis/v2 v2.37.0/go.mod h1:TcL7YfarKPGDAthEtl5NBeHZfeUQj6OXMm/+iu5cLMM=
github.com/blendle/zapdriver v1.3.1 h1:C3dydBOWYRiOk+B8X9IVZ5IOe+7cl+tGOexN4QqHfpE=
github.com/blendle/zapdriver v1.3.1/go.mod h1:mdXfREi6u5MArG4j9fewC+FGnXaBR+T4Ox4J2u4eHCc=
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls=
github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM=
github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo=
github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo=
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo=
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA=
github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c=
github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kelseyhightower/envconfig v1.4.0 h1:Im6hONhd3pLkfDFsbRgu68RDNkGF1r3dvMUtDTo2cv8=
github.com/kelseyhightower/envconfig v1.4.0/go.mod h1:cccZRl6mQpaq41TPp5QxidR+Sa3axMbJDNb//FQX6Gg=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg=
github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw=
github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/redis/go-redis/v9 v9.18.0 h1:pMkxYPkEbMPwRdenAzUNyFNrDgHx9U+DrBabWNfSRQs=
github.com/redis/go-redis/v9 v9.18.0/go.mod h1:k3ufPphLU5YXwNTUcCRXGxUoF1fqxnhFQmscfkCoDA0=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms=
go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g=
go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g=
go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc=
go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8=
go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE=
go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8=
go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew=
go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw=
go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
go.uber.org/zap v1.27.1 h1:08RqriUEv8+ArZRYSTXy1LeBScaMpVSTBhCeaZYfMYc=
go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c=
golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o=
golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8=
golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY=
golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.39.0 h1:RclSuaJf32jOqZz74CkPA9qFuVTX7vhLlpfj/IGWlqY=
golang.org/x/term v0.39.0/go.mod h1:yxzUCTP/U+FzoxfdKmLaA0RV1WgE0VY7hXBwKtY/4ww=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE=
golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8=
golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4=
golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc=
golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gomodules.xyz/jsonpatch/v2 v2.5.0 h1:JELs8RLM12qJGXU4u/TO3V25KW8GreMKl9pdkk14RM0=
gomodules.xyz/jsonpatch/v2 v2.5.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
k8s.io/api v0.34.3 h1:D12sTP257/jSH2vHV2EDYrb16bS7ULlHpdNdNhEw2S4=
k8s.io/api v0.34.3/go.mod h1:PyVQBF886Q5RSQZOim7DybQjAbVs8g7gwJNhGtY5MBk=
k8s.io/apimachinery v0.34.3 h1:/TB+SFEiQvN9HPldtlWOTp0hWbJ+fjU+wkxysf/aQnE=
k8s.io/apimachinery v0.34.3/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
k8s.io/client-go v0.34.3 h1:wtYtpzy/OPNYf7WyNBTj3iUA0XaBHVqhv4Iv3tbrF5A=
k8s.io/client-go v0.34.3/go.mod h1:OxxeYagaP9Kdf78UrKLa3YZixMCfP6bgPwPwNBQBzpM=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA=
k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts=
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y=
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
knative.dev/pkg v0.0.0-20260120122510-4a022ed9999a h1:9f29OTA7w/iVIX6PS6yveVVzNbcUS74eQfchVe8o2/4=
knative.dev/pkg v0.0.0-20260120122510-4a022ed9999a/go.mod h1:Tz3GoxcNC5vH3Zo//cW3mnHL474u+Y1wbsUIZ11p8No=
sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8=
sigs.k8s.io/controller-runtime v0.21.0/go.mod h1:OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM=
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=
sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE=
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=


================================================
FILE: components/ingress/main.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"context"
	"fmt"
	"log"
	"net/http"
	"time"

	"k8s.io/apimachinery/pkg/runtime"
	"knative.dev/pkg/injection"
	"knative.dev/pkg/signals"

	"github.com/alibaba/opensandbox/ingress/pkg/flag"
	"github.com/alibaba/opensandbox/ingress/pkg/proxy"
	"github.com/alibaba/opensandbox/ingress/pkg/renewintent"
	"github.com/alibaba/opensandbox/ingress/pkg/sandbox"
	slogger "github.com/alibaba/opensandbox/internal/logger"
	"github.com/alibaba/opensandbox/internal/version"
)

func main() {
	version.EchoVersion("OpenSandbox Ingress")

	flag.InitFlags()
	if flag.Namespace == "" {
		log.Panicf("'-namespace' not set.")
	}

	cfg := injection.ParseAndGetRESTConfigOrDie()
	cfg.ContentType = runtime.ContentTypeProtobuf
	cfg.UserAgent = "opensandbox-ingress/" + version.GitCommit

	ctx := signals.NewContext()
	ctx = withLogger(ctx, flag.LogLevel)

	// Create sandbox provider factory
	providerFactory := sandbox.NewProviderFactory(
		cfg,
		flag.Namespace,
		time.Second*30, // resync period
	)

	// Create sandbox provider based on provider type
	sandboxProvider, err := providerFactory.CreateProvider(sandbox.ProviderType(flag.ProviderType))
	if err != nil {
		log.Panicf("Failed to create sandbox provider: %v", err)
	}

	// Start provider (includes cache sync)
	if err := sandboxProvider.Start(ctx); err != nil {
		log.Panicf("Failed to start sandbox provider: %v", err)
	}

	var renewPublisher renewintent.Publisher
	if flag.RenewIntentEnabled {
		redisClient, err := renewintent.RedisClientFromDSN(flag.RenewIntentRedisDSN)
		if err != nil {
			log.Panicf("Failed to create Redis client for renew-intent: %v", err)
		}
		renewPublisher = renewintent.NewRedisPublisher(ctx, redisClient, renewintent.RedisPublisherConfig{
			QueueKey:    flag.RenewIntentQueueKey,
			QueueMaxLen: flag.RenewIntentQueueMaxLen,
			MinInterval: time.Duration(flag.RenewIntentMinIntervalSec) * time.Second,
			Logger:      proxy.Logger,
		})
	}

	// Create reverse proxy with sandbox provider
	reverseProxy := proxy.NewProxy(ctx, sandboxProvider, proxy.Mode(flag.Mode), renewPublisher)
	http.Handle("/", reverseProxy)
	http.HandleFunc("/status.ok", proxy.Healthz)

	if err := http.ListenAndServe(fmt.Sprintf(":%v", flag.Port), nil); err != nil {
		log.Panicf("Error starting http server: %v", err)
	}

	panic("unreachable")
}

func withLogger(ctx context.Context, logLevel string) context.Context {
	logger := slogger.MustNew(slogger.Config{Level: logLevel}).Named("opensandbox.ingress")
	return proxy.WithLogger(ctx, logger)
}


================================================
FILE: components/ingress/pkg/flag/flags.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package flag

var (
	// LogLevel controls the router log verbosity.
	LogLevel string

	// Port controls the HTTP listener port.
	Port int

	// Namespace filters the target sandbox instances.
	Namespace string

	// ProviderType specifies the sandbox provider type (e.g., batchsandbox).
	ProviderType string

	// Mode specifies the sandbox service discovery mode (e.g., header, uri).
	Mode string

	RenewIntentEnabled        bool
	RenewIntentRedisDSN       string
	RenewIntentQueueKey       string
	RenewIntentQueueMaxLen    int
	RenewIntentMinIntervalSec int
)


================================================
FILE: components/ingress/pkg/flag/parser.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package flag

import (
	"flag"
)

func InitFlags() {
	flag.StringVar(&LogLevel, "log-level", "info", "Server log level")
	flag.IntVar(&Port, "port", 28888, "Server listening port (default: 28888)")
	flag.StringVar(&Namespace, "namespace", "opensandbox", "The Kubernetes namespace to watch for sandbox resources")
	flag.StringVar(&ProviderType, "provider-type", "batchsandbox", "The sandbox provider type (default: batchsandbox)")
	flag.StringVar(&Mode, "mode", "header", "The sandbox service discovery mode (default: header)")

	flag.BoolVar(&RenewIntentEnabled, "renew-intent-enabled", false, "Enable publishing renew-intent events to Redis (OSEP-0009)")
	flag.StringVar(&RenewIntentRedisDSN, "renew-intent-redis-dsn", "redis://127.0.0.1:6379/0", "Redis DSN for renew-intent queue")
	flag.StringVar(&RenewIntentQueueKey, "renew-intent-queue-key", "opensandbox:renew:intent", "Redis List key for renew-intent payloads")
	flag.IntVar(&RenewIntentQueueMaxLen, "renew-intent-queue-max-len", 0, "Max renew-intent queue length (0 = no cap)")
	flag.IntVar(&RenewIntentMinIntervalSec, "renew-intent-min-interval", 60, "Min seconds between publishing intents for the same sandbox (client-side throttle)")

	flag.Parse()
}


================================================
FILE: components/ingress/pkg/proxy/header.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package proxy

import "net/http"

var (
	XRealIP         = http.CanonicalHeaderKey("X-Real-IP")
	XForwardedFor   = http.CanonicalHeaderKey("X-Forwarded-For")
	XForwardedProto = http.CanonicalHeaderKey("X-Forwarded-Proto")

	SandboxIngress = http.CanonicalHeaderKey("OpenSandbox-Ingress-To")
	// DeprecatedSandboxIngress is the deprecated header name
	// Deprecated
	DeprecatedSandboxIngress = http.CanonicalHeaderKey("OPEN-SANDBOX-INGRESS")

	AccessControlAllowOrigin  = http.CanonicalHeaderKey("Access-Control-Allow-Origin")
	ReverseProxyServerPowerBy = http.CanonicalHeaderKey("Reverse-Proxy-Server-PowerBy")

	SecWebSocketProtocol = http.CanonicalHeaderKey("Sec-WebSocket-Protocol")
	Cookie               = http.CanonicalHeaderKey("Cookie")
	SetCookie            = http.CanonicalHeaderKey("Set-Cookie")
	Host                 = http.CanonicalHeaderKey("Host")
	Origin               = http.CanonicalHeaderKey("Origin")
)


================================================
FILE: components/ingress/pkg/proxy/healthz.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package proxy

import "net/http"

func Healthz(w http.ResponseWriter, _ *http.Request) {
	w.WriteHeader(http.StatusOK)
	_, _ = w.Write([]byte("OK"))
}


================================================
FILE: components/ingress/pkg/proxy/healthz_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package proxy

import (
	"net/http"
	"net/http/httptest"
	"testing"

	"github.com/stretchr/testify/assert"
)

func TestHealthz(t *testing.T) {
	req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
	rr := httptest.NewRecorder()

	Healthz(rr, req)

	assert.Equal(t, http.StatusOK, rr.Code)
	assert.Equal(t, "OK", rr.Body.String())
}


================================================
FILE: components/ingress/pkg/proxy/host.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package proxy

import (
	"errors"
	"fmt"
	"net/http"
	"strconv"
	"strings"
)

type Mode string

const (
	// ModeHeader is the mode that uses the Host or SandboxIngress header
	// to determine the sandbox instance.
	ModeHeader Mode = "header"

	// ModeURI is the mode that uses the URI path to determine the
	// sandbox instance.
	//
	// Pattern is 'hostname/<sandbox-id>/<sandbox-port>/<path-to-request>'.
	ModeURI Mode = "uri"
)

func (p *Proxy) getSandboxHostDefinition(r *http.Request) (*sandboxHost, error) {
	switch p.mode {
	case ModeHeader:
		targetHost := p.parseTargetHostByHeader(r)
		if targetHost == "" {
			return nil, fmt.Errorf("missing header '%s' or 'Host'", SandboxIngress)
		}

		host, err := p.parseSandboxHost(targetHost)
		if err != nil || host.ingressKey == "" || host.port == 0 {
			return nil, fmt.Errorf("invalid host: %s", targetHost)
		}
		return host, nil
	case ModeURI:
		return p.parseSandboxURI(r)
	}

	return nil, fmt.Errorf("unknown ingress mode: %s", p.mode)
}

func (p *Proxy) parseTargetHostByHeader(r *http.Request) string {
	targetHost := r.Header.Get(SandboxIngress)
	if targetHost != "" {
		return targetHost
	}
	deprecatedTargetHost := r.Header.Get(DeprecatedSandboxIngress)
	if deprecatedTargetHost != "" {
		return deprecatedTargetHost
	}

	return r.Host
}

type sandboxHost struct {
	ingressKey string
	port       int
	requestURI string
}

func (p *Proxy) parseSandboxHost(s string) (*sandboxHost, error) {
	domain := strings.Split(strings.TrimPrefix(strings.TrimPrefix(s, "https://"), "http://"), ".")
	if len(domain) < 1 {
		return &sandboxHost{}, fmt.Errorf("invalid host: %s", s)
	}

	ingressAndPort := strings.Split(domain[0], "-")
	if len(ingressAndPort) <= 1 || ingressAndPort[0] == "" {
		return &sandboxHost{}, fmt.Errorf("invalid host: %s", s)
	}

	ingress := strings.Join(ingressAndPort[:len(ingressAndPort)-1], "-")
	port, err := strconv.Atoi(ingressAndPort[len(ingressAndPort)-1])
	if err != nil {
		return &sandboxHost{}, fmt.Errorf("invalid port format: %w", err)
	}
	return &sandboxHost{ingress, port, ""}, nil
}

func (p *Proxy) parseSandboxURI(r *http.Request) (*sandboxHost, error) {
	path := r.URL.Path
	if path == "" {
		return nil, errors.New("missing URI path")
	}

	// Remove leading slash and split by '/'
	path = strings.TrimPrefix(path, "/")
	parts := strings.SplitN(path, "/", 3)
	if len(parts) < 2 {
		return nil, fmt.Errorf("invalid URI path format: expected '/<sandbox-id>/<sandbox-port>/<path-to-request>', got: %s", r.URL.Path)
	}

	sandboxID := parts[0]
	port, err := strconv.Atoi(parts[1])
	if err != nil {
		return nil, fmt.Errorf("invalid port format: %w", err)
	}
	if sandboxID == "" || port <= 0 {
		return nil, errors.New("missing sandbox-id or sandbox-port in URI path")
	}

	// Extract the remaining path (user's target request URI)
	var requestURI string
	if len(parts) >= 3 && parts[2] != "" {
		requestURI = "/" + parts[2]
	} else {
		requestURI = "/"
	}

	return &sandboxHost{
		ingressKey: sandboxID,
		port:       port,
		requestURI: requestURI,
	}, nil
}


================================================
FILE: components/ingress/pkg/proxy/http.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package proxy

import (
	"net/http"
	"net/http/httputil"
	"net/url"
)

type HTTPProxy struct{}

func NewHTTPProxy() *HTTPProxy {
	return &HTTPProxy{}
}

func (hp *HTTPProxy) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	targetURL := r.URL.String()

	proxy, err := hp.newReverseProxy(targetURL)
	if err != nil {
		http.Error(w, err.Error(), http.StatusBadGateway)
		return
	}

	proxy.ServeHTTP(w, r)
}

func (hp *HTTPProxy) newReverseProxy(targetHost string) (*httputil.ReverseProxy, error) {
	url, err := url.Parse(targetHost)
	if err != nil {
		return nil, err
	}

	proxy := httputil.NewSingleHostReverseProxy(url)
	proxy.Director = func(req *http.Request) {
		req.URL.Scheme = url.Scheme
		req.URL.Host = url.Host
		req.Host = url.Host
		req.Header.Del(SandboxIngress)
	}
	proxy.ModifyResponse = func(response *http.Response) error {
		response.Header.Add(ReverseProxyServerPowerBy, "OpenSandbox-ingress")
		return nil
	}
	return proxy, nil
}


================================================
FILE: components/ingress/pkg/proxy/http_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package proxy

import (
	"context"
	"fmt"
	"io"
	"net/http"
	"net/http/httptest"
	"strconv"
	"testing"
	"time"

	"github.com/alibaba/opensandbox/ingress/pkg/sandbox"
	slogger "github.com/alibaba/opensandbox/internal/logger"
	"github.com/stretchr/testify/assert"
)

// mockProvider implements sandbox.Provider interface for testing
type mockProvider struct {
	endpoints map[string]string // sandboxName -> IP
	notReady  map[string]bool   // sandboxName -> notReady flag
}

func (m *mockProvider) GetEndpoint(sandboxId string) (string, error) {
	if m.notReady != nil && m.notReady[sandboxId] {
		return "", fmt.Errorf("%w: %s", sandbox.ErrSandboxNotReady, sandboxId)
	}
	if ip, ok := m.endpoints[sandboxId]; ok {
		return ip, nil
	}
	return "", fmt.Errorf("%w: %s", sandbox.ErrSandboxNotFound, sandboxId)
}

func (m *mockProvider) Start(_ context.Context) error {
	return nil
}

func Test_HTTPProxy(t *testing.T) {
	t.Run("with header mode", func(t *testing.T) {
		httpProxyWithHeaderMode(t)
	})

	t.Run("with uri mode", func(t *testing.T) {
		httpProxyWithURIMode(t)
	})
}

func httpProxyWithHeaderMode(t *testing.T) {
	server := httptest.NewServer(http.HandlerFunc(realBackendHTTPHandler))
	defer server.Close()
	serverPort := server.URL[len("http://127.0.0.1:"):]

	// Create mock provider with sandbox endpoint
	provider := &mockProvider{
		endpoints: map[string]string{
			"test-sandbox": "127.0.0.1",
		},
	}

	ctx := context.Background()
	Logger = slogger.MustNew(slogger.Config{Level: "debug"})
	proxy := NewProxy(ctx, provider, ModeHeader, nil)

	mux := http.NewServeMux()
	mux.Handle("/", proxy)
	port, err := findAvailablePort()
	assert.Nil(t, err)

	go func() {
		assert.NoError(t, http.ListenAndServe(":"+strconv.Itoa(port), mux))
	}()

	time.Sleep(2 * time.Second)

	// no header
	request, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("http://127.0.0.1:%v/hello", port), nil)
	assert.Nil(t, err)
	response, err := http.DefaultClient.Do(request)
	assert.Nil(t, err)
	assert.Equal(t, http.StatusBadRequest, response.StatusCode)
	bytes, _ := io.ReadAll(response.Body)
	t.Log(string(bytes))

	// no sandbox backend
	request, err = http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("http://127.0.0.1:%v/hello", port), nil)
	request.Header.Set(SandboxIngress, fmt.Sprintf("non-existent-%v", port))
	response, err = http.DefaultClient.Do(request)
	assert.Nil(t, err)
	assert.Equal(t, http.StatusNotFound, response.StatusCode) // ErrSandboxNotFound -> 404
	bytes, _ = io.ReadAll(response.Body)
	t.Log(string(bytes))

	// valid sandbox request
	request, err = http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("http://127.0.0.1:%v/hello?a=1&b=2", port), nil)
	assert.Nil(t, err)

	request.Header.Set(SandboxIngress, fmt.Sprintf("test-sandbox-%v", serverPort))
	response, err = http.DefaultClient.Do(request)
	assert.Nil(t, err)
	if response.StatusCode != http.StatusOK {
		bytes, err := io.ReadAll(response.Body)
		assert.Nil(t, err)
		t.Log(string(bytes))
	}
	assert.Equal(t, http.StatusOK, response.StatusCode)

	// Compatible Host parsing for reverse proxy mode
	request, err = http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("http://127.0.0.1:%v/hello?a=1&b=2", port), nil)
	assert.Nil(t, err)

	request.Host = fmt.Sprintf("test-sandbox-%v.sandbox.alibaba-inc.com", serverPort)
	response, err = http.DefaultClient.Do(request)
	assert.Nil(t, err)
	if response.StatusCode != http.StatusOK {
		bytes, err := io.ReadAll(response.Body)
		assert.Nil(t, err)
		t.Log(string(bytes))
	}
	assert.Equal(t, http.StatusOK, response.StatusCode)
}

func httpProxyWithURIMode(t *testing.T) {
	server := httptest.NewServer(http.HandlerFunc(realBackendHTTPHandler))
	defer server.Close()
	serverPort := server.URL[len("http://127.0.0.1:"):]

	// Create mock provider with sandbox endpoint
	provider := &mockProvider{
		endpoints: map[string]string{
			"test-sandbox": "127.0.0.1",
		},
	}

	ctx := context.Background()
	Logger = slogger.MustNew(slogger.Config{Level: "debug"})
	proxy := NewProxy(ctx, provider, ModeURI, nil)

	mux := http.NewServeMux()
	mux.Handle("/", proxy)
	port, err := findAvailablePort()
	assert.Nil(t, err)

	go func() {
		assert.NoError(t, http.ListenAndServe(":"+strconv.Itoa(port), mux))
	}()

	time.Sleep(2 * time.Second)

	// uri is empty
	request, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("http://127.0.0.1:%v", port), nil)
	assert.Nil(t, err)
	response, err := http.DefaultClient.Do(request)
	assert.Nil(t, err)
	assert.Equal(t, http.StatusBadRequest, response.StatusCode)
	bytes, _ := io.ReadAll(response.Body)
	t.Log(string(bytes))

	// no sandbox backend
	request, err = http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("http://127.0.0.1:%v/non-existent-xxx/80/hello", port), nil)
	response, err = http.DefaultClient.Do(request)
	assert.Nil(t, err)
	assert.Equal(t, http.StatusNotFound, response.StatusCode) // ErrSandboxNotFound -> 404
	bytes, _ = io.ReadAll(response.Body)
	t.Log(string(bytes))

	// valid sandbox request
	request, err = http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("http://127.0.0.1:%v/test-sandbox/%v/hello?a=1&b=2", port, serverPort), nil)
	assert.Nil(t, err)
	response, err = http.DefaultClient.Do(request)
	assert.Nil(t, err)
	if response.StatusCode != http.StatusOK {
		bytes, err := io.ReadAll(response.Body)
		assert.Nil(t, err)
		t.Log(string(bytes))
	}
	assert.Equal(t, http.StatusOK, response.StatusCode)
}

func realBackendHTTPHandler(w http.ResponseWriter, r *http.Request) {
	if r.Method != http.MethodGet {
		http.Error(w, http.StatusText(http.StatusMethodNotAllowed), http.StatusMethodNotAllowed)
		return
	}

	if r.URL.Path != "/hello" {
		http.Error(w, fmt.Sprintf("path is not /hello, but %s", r.URL.Path), http.StatusBadRequest)
	}
	if r.URL.RawQuery != "a=1&b=2" {
		http.Error(w, fmt.Sprintf("query is not a=1&b=2, but %s", r.URL.RawQuery), http.StatusBadRequest)
	}

	w.WriteHeader(http.StatusOK)
	_, _ = w.Write([]byte("hello world"))
}


================================================
FILE: components/ingress/pkg/proxy/logger.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package proxy

import (
	"context"

	slogger "github.com/alibaba/opensandbox/internal/logger"
)

var Logger slogger.Logger

func WithLogger(ctx context.Context, logger slogger.Logger) context.Context {
	Logger = logger
	return ctx
}


================================================
FILE: components/ingress/pkg/proxy/proxy.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package proxy

import (
	"context"
	"errors"
	"fmt"
	"net"
	"net/http"
	"strings"

	"github.com/alibaba/opensandbox/ingress/pkg/renewintent"
	"github.com/alibaba/opensandbox/ingress/pkg/sandbox"
	slogger "github.com/alibaba/opensandbox/internal/logger"
)

type Proxy struct {
	sandboxProvider      sandbox.Provider
	mode                 Mode
	renewIntentPublisher renewintent.Publisher
}

func NewProxy(_ context.Context, sandboxProvider sandbox.Provider, mode Mode, renewIntentPublisher renewintent.Publisher) *Proxy {
	return &Proxy{
		sandboxProvider:      sandboxProvider,
		mode:                 mode,
		renewIntentPublisher: renewIntentPublisher,
	}
}

func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	defer func() {
		if err := recover(); err != nil {
			Logger.With(slogger.Field{Key: "error", Value: err}).Errorf("Proxy: proxy causes panic")
			var errMsg string
			if e, ok := err.(error); ok {
				errMsg = e.Error()
			} else {
				errMsg = fmt.Sprintf("%v", err)
			}
			http.Error(w, errMsg, http.StatusBadGateway)
		}
	}()

	host, err := p.getSandboxHostDefinition(r)
	if err != nil {
		http.Error(w, fmt.Sprintf("OpenSandbox Ingress: %v", err), http.StatusBadRequest)
		return
	}

	targetHost, err, code := p.resolveRealHost(host)
	if err != nil {
		http.Error(w, fmt.Sprintf("OpenSandbox Ingress: %v", err), code)
		return
	}

	if p.renewIntentPublisher != nil {
		p.renewIntentPublisher.PublishIntent(host.ingressKey, host.port, host.requestURI)
	}

	// modify if requestURI is not empty
	if host.requestURI != "" {
		r.URL.Path = host.requestURI
	}

	r.Host = targetHost
	r.URL.Host = targetHost
	r.Header.Del(SandboxIngress)

	Logger.With(
		slogger.Field{Key: "target", Value: targetHost},
		slogger.Field{Key: "client", Value: p.getClientIP(r)},
		slogger.Field{Key: "uri", Value: r.RequestURI},
		slogger.Field{Key: "method", Value: r.Method},
	).Infof("ingress requested")
	p.serve(w, r)
}

func (p *Proxy) serve(w http.ResponseWriter, r *http.Request) {
	if p.isWebSocketRequest(r) {
		if r.URL == nil {
			http.Error(w, "invalid request URL", http.StatusBadRequest)
			return
		}

		if r.URL.Scheme == "" {
			if r.TLS != nil {
				r.URL.Scheme = "wss"
			} else {
				r.URL.Scheme = "ws"
			}
		}
		NewWebSocketProxy(r.URL).ServeHTTP(w, r)
	} else {
		if r.URL.Scheme == "" {
			if r.TLS != nil {
				r.URL.Scheme = "https"
			} else {
				r.URL.Scheme = "http"
			}
		}
		NewHTTPProxy().ServeHTTP(w, r)
	}
}

func (p *Proxy) isWebSocketRequest(r *http.Request) bool {
	if r.Method != http.MethodGet {
		return false
	}
	if r.Header.Get("Upgrade") != "websocket" {
		return false
	}
	if r.Header.Get("Connection") != "Upgrade" {
		return false
	}
	return true
}

func (p *Proxy) resolveRealHost(host *sandboxHost) (string, error, int) {
	// Get endpoint IP from sandbox provider
	endpointIP, err := p.sandboxProvider.GetEndpoint(host.ingressKey)
	if err != nil {
		// Map sandbox errors to HTTP status codes
		switch {
		case errors.Is(err, sandbox.ErrSandboxNotFound):
			return "", err, http.StatusNotFound
		case errors.Is(err, sandbox.ErrSandboxNotReady):
			return "", err, http.StatusServiceUnavailable
		default:
			return "", err, http.StatusBadGateway
		}
	}

	// Construct target host with port
	targetHost := fmt.Sprintf("%s:%d", endpointIP, host.port)
	return targetHost, nil, 0
}

func (p *Proxy) getClientIP(r *http.Request) string {
	clientIP, _, _ := net.SplitHostPort(r.RemoteAddr)
	if len(r.Header.Get(XForwardedFor)) != 0 {
		xff := r.Header.Get(XForwardedFor)
		s := strings.Index(xff, ", ")
		if s == -1 {
			s = len(r.Header.Get(XForwardedFor))
		}
		clientIP = xff[:s]
	} else if len(r.Header.Get(XRealIP)) != 0 {
		clientIP = r.Header.Get(XRealIP)
	}

	return clientIP
}


================================================
FILE: components/ingress/pkg/proxy/proxy_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package proxy

import (
	"net"
	"net/http"
	"net/http/httptest"
	"testing"

	"github.com/stretchr/testify/assert"
)

// Test_WatchPods is removed as we now use BatchSandbox Provider instead of direct Pod watching

func TestIsWebSocketRequest(t *testing.T) {
	proxy := &Proxy{}

	// Valid websocket request
	req := httptest.NewRequest(http.MethodGet, "/ws", nil)
	req.Header.Set("Upgrade", "websocket")
	req.Header.Set("Connection", "Upgrade")
	assert.True(t, proxy.isWebSocketRequest(req))

	// Missing upgrade headers
	req = httptest.NewRequest(http.MethodGet, "/ws", nil)
	assert.False(t, proxy.isWebSocketRequest(req))

	// Wrong method
	req = httptest.NewRequest(http.MethodPost, "/ws", nil)
	req.Header.Set("Upgrade", "websocket")
	req.Header.Set("Connection", "Upgrade")
	assert.False(t, proxy.isWebSocketRequest(req))
}

func TestParseSandboxHost(t *testing.T) {
	proxy := &Proxy{}

	host, err := proxy.parseSandboxHost("sandbox-1234.example.com")
	assert.NoError(t, err)
	assert.Equal(t, "sandbox", host.ingressKey)
	assert.Equal(t, 1234, host.port)

	host, err = proxy.parseSandboxHost("https://alpha-beta-8080.sandbox.test")
	assert.NoError(t, err)
	assert.Equal(t, "alpha-beta", host.ingressKey)
	assert.Equal(t, 8080, host.port)

	_, err = proxy.parseSandboxHost("invalidhost")
	assert.Error(t, err)

	_, err = proxy.parseSandboxHost("-1234.example.com")
	assert.Error(t, err)
}

func TestGetClientIP(t *testing.T) {
	proxy := &Proxy{}

	req := httptest.NewRequest(http.MethodGet, "/", nil)
	req.RemoteAddr = "192.0.2.1:12345"
	assert.Equal(t, "192.0.2.1", proxy.getClientIP(req))

	req = httptest.NewRequest(http.MethodGet, "/", nil)
	req.RemoteAddr = "192.0.2.1:12345"
	req.Header.Set(XRealIP, "203.0.113.5")
	assert.Equal(t, "203.0.113.5", proxy.getClientIP(req))

	req = httptest.NewRequest(http.MethodGet, "/", nil)
	req.RemoteAddr = "192.0.2.1:12345"
	req.Header.Set(XForwardedFor, "10.0.0.1, 198.51.100.2")
	assert.Equal(t, "10.0.0.1", proxy.getClientIP(req))
}

func findAvailablePort() (int, error) {
	listener, err := net.Listen("tcp", "127.0.0.1:0")
	if err != nil {
		return 0, err
	}
	defer listener.Close()

	port := listener.Addr().(*net.TCPAddr).Port
	return port, nil
}


================================================
FILE: components/ingress/pkg/proxy/websocket.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package proxy

import (
	"fmt"
	"io"
	"net"
	"net/http"
	"net/url"
	"strings"

	slogger "github.com/alibaba/opensandbox/internal/logger"
	"github.com/gorilla/websocket"
)

var (
	// defaultWebSocketDialer is a dialer with all fields set to the default zero values.
	defaultWebSocketDialer = websocket.DefaultDialer

	// defaultUpgrader specifies the parameters for upgrading an HTTP
	// connection to a WebSocket connection.
	defaultUpgrader = &websocket.Upgrader{
		ReadBufferSize:  1024,
		WriteBufferSize: 1024,
	}
)

// WebSocketProxy is an HTTP Handler that takes an incoming WebSocket
// connection and proxies it to another server.
type WebSocketProxy struct {
	// director, if non-nil, is a function that may copy additional request
	// headers from the incoming WebSocket connection into the output headers
	// which will be forwarded to another server.
	director func(incoming *http.Request, out http.Header)

	// backend returns the backend URL which the proxy uses to reverse proxy
	// the incoming WebSocket connection. Request is the initial incoming and
	// unmodified request.
	backend func(*http.Request) *url.URL

	//  dialer contains options for connecting to the backend WebSocket server.
	//  If nil, DefaultDialer is used.
	dialer *websocket.Dialer

	// upgrader specifies the parameters for upgrading a incoming HTTP
	// connection to a WebSocket connection. If nil, DefaultUpgrader is used.
	upgrader *websocket.Upgrader
}

// ProxyHandler returns a new http.Handler interface that reverse proxies the
// request to the given target.
func ProxyHandler(target *url.URL) http.Handler { return NewWebSocketProxy(target) }

// NewWebSocketProxy returns a new Websocket reverse proxy that rewrites the
// URL's to the scheme, host and base path provider in target.
func NewWebSocketProxy(target *url.URL) *WebSocketProxy {
	backend := func(r *http.Request) *url.URL {
		// Shallow copy
		u := *target
		u.Fragment = r.URL.Fragment
		u.Path = r.URL.Path
		u.RawQuery = r.URL.RawQuery
		return &u
	}
	return &WebSocketProxy{backend: backend}
}

//nolint:gocognit
func (w *WebSocketProxy) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
	if w.backend == nil {
		http.Error(rw, "WebSocketProxy: backend is not defined", http.StatusInternalServerError)
		return
	}

	backendURL := w.backend(r)
	if backendURL == nil {
		http.Error(rw, "WebSocketProxy: backend URL is nil", http.StatusInternalServerError)
		return
	}

	dialer := w.dialer
	if w.dialer == nil {
		dialer = defaultWebSocketDialer
	}

	// Pass headers from the incoming request to the dialer to forward them to
	// the final destinations.
	requestHeader := http.Header{}
	if origin := r.Header.Get(Origin); origin != "" {
		requestHeader.Add(Origin, origin)
	}
	for _, prot := range r.Header[SecWebSocketProtocol] {
		requestHeader.Add(SecWebSocketProtocol, prot)
	}
	for _, cokiee := range r.Header[Cookie] {
		requestHeader.Add(Cookie, cokiee)
	}
	if r.Host != "" {
		requestHeader.Set(Host, r.Host)
	}

	// Pass X-Forwarded-For headers too, code below is a part of
	// httputil.ReverseProxy. See http://en.wikipedia.org/wiki/X-Forwarded-For
	// for more information
	if clientIP, _, err := net.SplitHostPort(r.RemoteAddr); err == nil {
		// If we aren't the first proxy retain prior
		// X-Forwarded-For information as a comma+space
		// separated list and fold multiple headers into one.
		if prior, ok := r.Header[XForwardedFor]; ok {
			clientIP = strings.Join(prior, ", ") + ", " + clientIP
		}
		requestHeader.Set(XForwardedFor, clientIP)
	}

	// Set the originating protocol of the incoming HTTP request. The SSL might
	// be terminated on our site and because we doing proxy adding this would
	// be helpful for applications on the backend.
	requestHeader.Set(XForwardedProto, "http")
	if r.TLS != nil {
		requestHeader.Set(XForwardedProto, "https")
	}

	// Enable the director to copy any additional headers it desires for
	// forwarding to the remote server.
	if w.director != nil {
		w.director(r, requestHeader)
	}

	// Connect to the backend URL, also pass the headers we get from the requst
	// together with the Forwarded headers we prepared above.
	connBackend, resp, err := dialer.Dial(backendURL.String(), requestHeader)
	if err != nil {
		Logger.With(slogger.Field{Key: "error", Value: err}).Errorf("WebSocketProxy: couldn't dial to remote backend")
		if resp != nil {
			// If the WebSocket handshake fails, ErrBadHandshake is returned
			// along with a non-nil *http.Response so that callers can handle
			// redirects, authentication, etcetera.
			if err := copyResponse(rw, resp); err != nil {
				Logger.With(slogger.Field{Key: "error", Value: err}).Errorf("WebSocketProxy: couldn't write response after failed remote backend handshake")
			}
		} else {
			http.Error(rw, http.StatusText(http.StatusServiceUnavailable), http.StatusServiceUnavailable)
		}
		return
	}
	defer connBackend.Close()

	upgrader := w.upgrader
	if w.upgrader == nil {
		upgrader = defaultUpgrader
	}

	// Only pass those headers to the upgrader.
	upgradeHeader := http.Header{}
	if hdr := resp.Header.Get(SecWebSocketProtocol); hdr != "" {
		upgradeHeader.Set(SecWebSocketProtocol, hdr)
	}
	if hdr := resp.Header.Get(SetCookie); hdr != "" {
		upgradeHeader.Set(SetCookie, hdr)
	}

	// Now upgrade the existing incoming request to a WebSocket connection.
	// Also pass the header that we gathered from the Dial handshake.
	connPub, err := upgrader.Upgrade(rw, r, upgradeHeader)
	if err != nil {
		Logger.With(slogger.Field{Key: "error", Value: err}).Errorf("WebSocketProxy: couldn't upgrade websocket connection")
		return
	}
	defer connPub.Close()

	errClient := make(chan error, 1)
	errBackend := make(chan error, 1)
	replicateWebsocketConn := func(dst, src *websocket.Conn, errc chan error) {
		for {
			msgType, msg, err := src.ReadMessage()
			if err != nil {
				m := websocket.FormatCloseMessage(websocket.CloseNormalClosure, fmt.Sprintf("%v", err))
				if e, ok := err.(*websocket.CloseError); ok { //nolint:errorlint
					if e.Code != websocket.CloseNoStatusReceived {
						m = websocket.FormatCloseMessage(e.Code, e.Text)
					}
				}
				errc <- err
				_ = dst.WriteMessage(websocket.CloseMessage, m)
				break
			}
			err = dst.WriteMessage(msgType, msg)
			if err != nil {
				errc <- err
				break
			}
		}
	}

	go replicateWebsocketConn(connPub, connBackend, errClient)
	go replicateWebsocketConn(connBackend, connPub, errBackend)

	var message string
	select {
	case err = <-errClient:
		message = "WebSocketProxy: Error when copying from backend to client: %v"
	case err = <-errBackend:
		message = "WebSocketProxy: Error when copying from client to backend: %v"

	}
	if e, ok := err.(*websocket.CloseError); !ok || e.Code == websocket.CloseAbnormalClosure { //nolint:errorlint
		Logger.With(slogger.Field{Key: "error", Value: err}).Errorf(message, err)
	}
}

func copyResponse(rw http.ResponseWriter, resp *http.Response) error {
	copyHeader(rw.Header(), resp.Header)
	rw.WriteHeader(resp.StatusCode)
	defer resp.Body.Close()

	_, err := io.Copy(rw, resp.Body)
	return err
}

func copyHeader(dst, src http.Header) {
	for k, vv := range src {
		for _, v := range vv {
			dst.Add(k, v)
		}
	}
}


================================================
FILE: components/ingress/pkg/proxy/websocket_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package proxy

import (
	"context"
	"fmt"
	"log"
	"net/http"
	"strconv"
	"strings"
	"testing"
	"time"

	slogger "github.com/alibaba/opensandbox/internal/logger"
	"github.com/gorilla/websocket"
	"github.com/stretchr/testify/assert"
)

func Test_WebSocketProxy(t *testing.T) {
	t.Run("with header mode", func(t *testing.T) {
		webSocketProxyWithHeaderMode(t)
	})
	t.Run("with uri mode", func(t *testing.T) {
		webSocketProxyWithURIMode(t)
	})
}

func webSocketProxyWithHeaderMode(t *testing.T) {
	// Create mock provider
	provider := &mockProvider{
		endpoints: map[string]string{
			"test-sandbox": "127.0.0.1",
		},
	}

	ctx := context.Background()
	Logger = slogger.MustNew(slogger.Config{Level: "debug"})
	proxy := NewProxy(ctx, provider, ModeHeader, nil)

	mux := http.NewServeMux()
	mux.Handle("/", proxy)
	proxyPort, err := findAvailablePort()
	proxyURL := "ws://127.0.0.1:" + strconv.Itoa(proxyPort)
	assert.Nil(t, err)

	go func() {
		assert.NoError(t, http.ListenAndServe(":"+strconv.Itoa(proxyPort), mux))
	}()

	time.Sleep(2 * time.Second)

	backendPort, err := findAvailablePort()
	assert.Nil(t, err)

	// backend echo server
	go func() {
		mux2 := http.NewServeMux()
		mux2.HandleFunc("/ws", func(w http.ResponseWriter, r *http.Request) {
			t.Logf("r.URL.Path: %s", r.URL.Path)
			t.Logf("r.URL.RawPath: %s", r.URL.RawPath)
			t.Logf("r.Host: %s", r.Host)
			// Don't upgrade if original host header isn't preserved
			assert.True(t, strings.HasPrefix(r.Host, "127.0.0.1"))

			conn, err := defaultUpgrader.Upgrade(w, r, nil)
			if err != nil {
				log.Println(err)
				return
			}

			messageType, p, err := conn.ReadMessage()
			if err != nil {
				return
			}

			if err = conn.WriteMessage(messageType, p); err != nil {
				return
			}
		})

		err := http.ListenAndServe(":"+strconv.Itoa(backendPort), mux2)
		if err != nil {
			t.Error("ListenAndServe: ", err)
			return
		}
	}()

	time.Sleep(time.Millisecond * 100)

	// frontend server, dial now our proxy, which will reverse proxy our
	// message to the backend websocket server.
	h := http.Header{}
	h.Set(SandboxIngress, "test-sandbox-"+strconv.Itoa(backendPort))
	conn, _, err := websocket.DefaultDialer.Dial(proxyURL+"/ws", h)
	if err != nil {
		t.Fatal(err)
	}

	// write a message and send it to the backend server
	msg := "hello kite"
	err = conn.WriteMessage(websocket.TextMessage, []byte(msg))
	if err != nil {
		t.Error(err)
	}

	messageType, p, err := conn.ReadMessage()
	if err != nil {
		t.Error(err)
	}

	if messageType != websocket.TextMessage {
		t.Error("incoming message type is not Text")
	}

	if msg != string(p) {
		t.Errorf("expecting: %s, got: %s", msg, string(p))
	}
}

func webSocketProxyWithURIMode(t *testing.T) {
	// Create mock provider
	provider := &mockProvider{
		endpoints: map[string]string{
			"test-sandbox": "127.0.0.1",
		},
	}

	ctx := context.Background()
	Logger = slogger.MustNew(slogger.Config{Level: "debug"})
	proxy := NewProxy(ctx, provider, ModeURI, nil)

	mux := http.NewServeMux()
	mux.Handle("/", proxy)
	proxyPort, err := findAvailablePort()
	proxyURL := "ws://127.0.0.1:" + strconv.Itoa(proxyPort)
	assert.Nil(t, err)

	go func() {
		assert.NoError(t, http.ListenAndServe(":"+strconv.Itoa(proxyPort), mux))
	}()

	time.Sleep(2 * time.Second)

	backendPort, err := findAvailablePort()
	assert.Nil(t, err)

	// backend echo server
	go func() {
		mux2 := http.NewServeMux()
		mux2.HandleFunc("/ws", func(w http.ResponseWriter, r *http.Request) {
			t.Logf("r.URL.Path: %s", r.URL.Path)
			t.Logf("r.URL.RawPath: %s", r.URL.RawPath)
			t.Logf("r.Host: %s", r.Host)
			// Don't upgrade if original host header isn't preserved
			assert.True(t, strings.HasPrefix(r.Host, "127.0.0.1"))

			conn, err := defaultUpgrader.Upgrade(w, r, nil)
			if err != nil {
				log.Println(err)
				return
			}

			messageType, p, err := conn.ReadMessage()
			if err != nil {
				return
			}

			if err = conn.WriteMessage(messageType, p); err != nil {
				return
			}
		})

		err := http.ListenAndServe(":"+strconv.Itoa(backendPort), mux2)
		if err != nil {
			t.Error("ListenAndServe: ", err)
			return
		}
	}()

	time.Sleep(time.Millisecond * 100)

	// frontend server, dial now our proxy, which will reverse proxy our
	// message to the backend websocket server.
	h := http.Header{}
	h.Set(SandboxIngress, "test-sandbox-"+strconv.Itoa(backendPort))
	conn, _, err := websocket.DefaultDialer.Dial(proxyURL+fmt.Sprintf("/test-sandbox/%v", backendPort)+"/ws", h)
	if err != nil {
		t.Fatal(err)
	}

	// write a message and send it to the backend server
	msg := "hello kite"
	err = conn.WriteMessage(websocket.TextMessage, []byte(msg))
	if err != nil {
		t.Error(err)
	}

	messageType, p, err := conn.ReadMessage()
	if err != nil {
		t.Error(err)
	}

	if messageType != websocket.TextMessage {
		t.Error("incoming message type is not Text")
	}

	if msg != string(p) {
		t.Errorf("expecting: %s, got: %s", msg, string(p))
	}
}


================================================
FILE: components/ingress/pkg/renewintent/intent.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package renewintent

import "time"

type Intent struct {
	SandboxID  string `json:"sandbox_id"`
	ObservedAt string `json:"observed_at"`
	Port       int    `json:"port,omitempty"`
	RequestURI string `json:"request_uri,omitempty"`
}

func NewIntent(sandboxID string, port int, requestURI string) Intent {
	return Intent{
		SandboxID:  sandboxID,
		ObservedAt: time.Now().UTC().Format(time.RFC3339Nano),
		Port:       port,
		RequestURI: requestURI,
	}
}


================================================
FILE: components/ingress/pkg/renewintent/intent_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package renewintent

import (
	"encoding/json"
	"testing"

	"github.com/stretchr/testify/assert"
)

func TestNewIntent(t *testing.T) {
	intent := NewIntent("sb-123", 8080, "/api/foo")
	assert.Equal(t, "sb-123", intent.SandboxID)
	assert.Equal(t, 8080, intent.Port)
	assert.Equal(t, "/api/foo", intent.RequestURI)
	assert.NotEmpty(t, intent.ObservedAt)
}

func TestIntent_JSONRoundTrip(t *testing.T) {
	intent := NewIntent("my-sandbox", 80, "/")
	data, err := json.Marshal(intent)
	assert.NoError(t, err)
	var decoded Intent
	err = json.Unmarshal(data, &decoded)
	assert.NoError(t, err)
	assert.Equal(t, intent.SandboxID, decoded.SandboxID)
	assert.Equal(t, intent.Port, decoded.Port)
	assert.Equal(t, intent.RequestURI, decoded.RequestURI)
}

func TestIntent_JSONHasRequiredFields(t *testing.T) {
	intent := NewIntent("id", 0, "")
	data, err := json.Marshal(intent)
	assert.NoError(t, err)
	var m map[string]interface{}
	err = json.Unmarshal(data, &m)
	assert.NoError(t, err)
	for _, key := range []string{"sandbox_id", "observed_at"} {
		_, ok := m[key]
		assert.True(t, ok, "missing required JSON field %q", key)
	}
}


================================================
FILE: components/ingress/pkg/renewintent/publisher.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package renewintent

type Publisher interface {
	PublishIntent(sandboxID string, port int, requestURI string)
}


================================================
FILE: components/ingress/pkg/renewintent/redis.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package renewintent

import (
	"context"
	"encoding/json"
	"errors"
	"sync"
	"sync/atomic"
	"time"

	"github.com/alibaba/opensandbox/internal/logger"
	"github.com/redis/go-redis/v9"
	"k8s.io/apimachinery/pkg/util/wait"
)

const (
	redisOpTimeout = 5 * time.Second
	publishWorkers = 4
	publishChanCap = 8192
)

type RedisPublisherConfig struct {
	QueueKey    string
	QueueMaxLen int
	MinInterval time.Duration
	Logger      logger.Logger
}

type intentReq struct {
	sandboxID  string
	port       int
	requestURI string
}

type RedisPublisher struct {
	client   *redis.Client
	cfg      RedisPublisherConfig
	lastSent sync.Map
	ch       chan intentReq
	stopped  atomic.Bool
}

func NewRedisPublisher(ctx context.Context, client *redis.Client, cfg RedisPublisherConfig) *RedisPublisher {
	p := &RedisPublisher{client: client, cfg: cfg, ch: make(chan intentReq, publishChanCap)}
	for i := 0; i < publishWorkers; i++ {
		go func() {
			for {
				select {
				case req := <-p.ch:
					p.doPublish(req.sandboxID, req.port, req.requestURI)
				case <-ctx.Done():
					return
				}
			}
		}()
	}

	go func() {
		<-ctx.Done()
		p.stopped.Store(true)
	}()

	if cfg.MinInterval > 0 {
		go wait.UntilWithContext(ctx, p.runCleanupThrottle, cfg.MinInterval*2)
	}
	return p
}

func (p *RedisPublisher) shouldSendIntent(sandboxID string) bool {
	if p.cfg.MinInterval <= 0 {
		return true
	}
	now := time.Now()
	if v, ok := p.lastSent.Load(sandboxID); ok {
		if now.Sub(v.(time.Time)) < p.cfg.MinInterval {
			return false
		}
	}
	p.lastSent.Store(sandboxID, now)
	return true
}

func (p *RedisPublisher) PublishIntent(sandboxID string, port int, requestURI string) {
	if p.stopped.Load() {
		return
	}
	select {
	case p.ch <- intentReq{sandboxID: sandboxID, port: port, requestURI: requestURI}:
	default:
	}
}

func (p *RedisPublisher) doPublish(sandboxID string, port int, requestURI string) {
	if !p.shouldSendIntent(sandboxID) {
		return
	}

	intent := NewIntent(sandboxID, port, requestURI)
	payload, err := json.Marshal(intent)
	if err != nil {
		p.cfg.Logger.With(logger.Field{Key: "sandbox_id", Value: sandboxID}).Errorf("renewintent: marshal intent: %v", err)
		return
	}

	ctx, cancel := context.WithTimeout(context.Background(), redisOpTimeout)
	defer cancel()
	pipe := p.client.Pipeline()
	pipe.LPush(ctx, p.cfg.QueueKey, string(payload))
	if p.cfg.QueueMaxLen > 0 {
		pipe.LTrim(ctx, p.cfg.QueueKey, 0, int64(p.cfg.QueueMaxLen-1))
	}
	_, err = pipe.Exec(ctx)
	if err != nil {
		p.cfg.Logger.With(
			logger.Field{Key: "sandbox_id", Value: sandboxID},
			logger.Field{Key: "queue_key", Value: p.cfg.QueueKey},
			logger.Field{Key: "error", Value: err},
		).Errorf("renewintent: redis publish failed")
		return
	}
	p.cfg.Logger.With(
		logger.Field{Key: "sandbox_id", Value: sandboxID},
		logger.Field{Key: "queue_key", Value: p.cfg.QueueKey},
	).Debugf("renewintent: published")
}

func RedisClientFromDSN(dsn string) (*redis.Client, error) {
	opts, err := redis.ParseURL(dsn)
	if err != nil {
		return nil, err
	}
	if opts == nil {
		return nil, errors.New("renewintent: redis DSN produced nil options")
	}
	return redis.NewClient(opts), nil
}

func (p *RedisPublisher) runCleanupThrottle(_ context.Context) {
	cutoff := time.Now().Add(-p.cfg.MinInterval * 2)
	p.lastSent.Range(func(key, value any) bool {
		if value.(time.Time).Before(cutoff) {
			p.lastSent.Delete(key)
		}
		return true
	})
}


================================================
FILE: components/ingress/pkg/renewintent/redis_bench_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package renewintent

import (
	"context"
	"fmt"
	"testing"

	"github.com/alibaba/opensandbox/internal/logger"
	"github.com/alicebob/miniredis/v2"
	"github.com/redis/go-redis/v9"
)

type nopLogger struct{}

func (nopLogger) Debugf(string, ...any)                {}
func (nopLogger) Infof(string, ...any)                 {}
func (nopLogger) Warnf(string, ...any)                 {}
func (nopLogger) Errorf(string, ...any)                {}
func (n nopLogger) With(...logger.Field) logger.Logger { return n }
func (n nopLogger) Named(string) logger.Logger         { return n }
func (nopLogger) Sync() error                          { return nil }

// Benchmarks use miniredis (in-memory Redis) so timing excludes real network I/O.

func BenchmarkRedisPublisher_PublishIntent(b *testing.B) {
	mr, err := miniredis.Run()
	if err != nil {
		b.Fatal(err)
	}
	defer mr.Close()

	client := redis.NewClient(&redis.Options{Addr: mr.Addr()})
	defer client.Close()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	cfg := RedisPublisherConfig{
		QueueKey:    "opensandbox:renew:intent",
		QueueMaxLen: 0,
		MinInterval: 0,
		Logger:      nopLogger{},
	}
	p := NewRedisPublisher(ctx, client, cfg)

	sandboxID := "bench-sandbox"
	port := 8080
	requestURI := "/api/health"

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		p.PublishIntent(sandboxID, port, requestURI)
	}
}

func BenchmarkRedisPublisher_PublishIntent_Throttled(b *testing.B) {
	mr, err := miniredis.Run()
	if err != nil {
		b.Fatal(err)
	}
	defer mr.Close()

	client := redis.NewClient(&redis.Options{Addr: mr.Addr()})
	defer client.Close()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()
	cfg := RedisPublisherConfig{
		QueueKey:    "opensandbox:renew:intent",
		QueueMaxLen: 0,
		MinInterval: 1 << 30, // large so throttle skips most
		Logger:      nopLogger{},
	}
	p := NewRedisPublisher(ctx, client, cfg)

	sandboxID := "bench-sandbox"
	port := 8080
	requestURI := "/api/health"

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		p.PublishIntent(sandboxID, port, requestURI)
	}
}

func BenchmarkRedisPublisher_PublishIntent_ManySandboxes(b *testing.B) {
	mr, err := miniredis.Run()
	if err != nil {
		b.Fatal(err)
	}
	defer mr.Close()

	client := redis.NewClient(&redis.Options{Addr: mr.Addr()})
	defer client.Close()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()
	cfg := RedisPublisherConfig{
		QueueKey:    "opensandbox:renew:intent",
		QueueMaxLen: 0,
		MinInterval: 0,
		Logger:      nopLogger{},
	}
	p := NewRedisPublisher(ctx, client, cfg)

	port := 8080
	requestURI := "/api/health"

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		sandboxID := fmt.Sprintf("sandbox-%d", i%1000)
		p.PublishIntent(sandboxID, port, requestURI)
	}
}


================================================
FILE: components/ingress/pkg/sandbox/agent_sandbox_provider.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package sandbox

import (
	"context"
	"crypto/sha256"
	"encoding/hex"
	"errors"
	"fmt"
	"regexp"
	"strings"
	"time"

	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
	"k8s.io/apimachinery/pkg/runtime/schema"
	"k8s.io/apimachinery/pkg/util/validation"
	"k8s.io/client-go/dynamic"
	"k8s.io/client-go/dynamic/dynamicinformer"
	"k8s.io/client-go/rest"
	"k8s.io/client-go/tools/cache"
)

const (
	agentSandboxGroup    = "agents.x-k8s.io"
	agentSandboxVersion  = "v1alpha1"
	agentSandboxResource = "sandboxes"

	agentSandboxConditionReady = "Ready"
	agentSandboxNamePrefix     = "sandbox"
)

var (
	dns1035InvalidChars     = regexp.MustCompile(`[^a-z0-9-]+`)
	dns1035DuplicateHyphens = regexp.MustCompile(`-+`)
)

// AgentSandboxProvider implements Provider for agents.x-k8s.io Sandbox CR.
// It uses a dynamic informer to watch resources in the target namespace.
type AgentSandboxProvider struct {
	informerFactory dynamicinformer.DynamicSharedInformerFactory
	informer        cache.SharedIndexInformer
	namespace       string
	gvr             schema.GroupVersionResource
}

// NewAgentSandboxProvider creates a Provider backed by dynamic informer.
func NewAgentSandboxProvider(config *rest.Config, namespace string, resyncPeriod time.Duration) *AgentSandboxProvider {
	dyn, err := dynamic.NewForConfig(config)
	if err != nil {
		panic(fmt.Sprintf("failed to create dynamic client: %v", err))
	}

	return newAgentSandboxProviderWithClient(dyn, namespace, resyncPeriod)
}

// newAgentSandboxProviderWithClient is a helper for tests to inject fake dynamic client.
func newAgentSandboxProviderWithClient(dyn dynamic.Interface, namespace string, resyncPeriod time.Duration) *AgentSandboxProvider {
	gvr := schema.GroupVersionResource{
		Group:    agentSandboxGroup,
		Version:  agentSandboxVersion,
		Resource: agentSandboxResource,
	}

	factory := dynamicinformer.NewFilteredDynamicSharedInformerFactory(
		dyn,
		resyncPeriod,
		namespace,
		nil, // no extra list options
	)

	informer := factory.ForResource(gvr).Informer()

	return &AgentSandboxProvider{
		informerFactory: factory,
		informer:        informer,
		namespace:       namespace,
		gvr:             gvr,
	}
}

func agentSandboxResourceName(sandboxId string) string {
	return toDNS1035Label(sandboxId, agentSandboxNamePrefix)
}

func toDNS1035Label(value, prefix string) string {
	normalized := strings.ToLower(strings.TrimSpace(value))
	normalized = dns1035InvalidChars.ReplaceAllString(normalized, "-")
	normalized = dns1035DuplicateHyphens.ReplaceAllString(normalized, "-")
	normalized = strings.Trim(normalized, "-")

	hash := sha256.Sum256([]byte(value))
	suffix := hex.EncodeToString(hash[:])[:8]

	if normalized == "" {
		normalized = prefix + "-" + suffix
	} else if !startsWithLetter(normalized) {
		normalized = prefix + "-" + normalized
	}

	if len(normalized) > validation.DNS1035LabelMaxLength {
		maxBase := validation.DNS1035LabelMaxLength - len(suffix) - 1
		base := normalized
		if len(base) > maxBase {
			base = base[:maxBase]
		}
		base = strings.Trim(base, "-")
		if !startsWithLetter(base) {
			base = prefix
		}
		normalized = base + "-" + suffix
	}

	return strings.Trim(normalized, "-")
}

func startsWithLetter(value string) bool {
	if value == "" {
		return false
	}
	first := value[0]
	return first >= 'a' && first <= 'z'
}

func legacyAgentSandboxName(sandboxId string) string {
	legacyPrefix := agentSandboxNamePrefix + "-"
	if strings.HasPrefix(sandboxId, legacyPrefix) {
		return sandboxId
	}
	return legacyPrefix + sandboxId
}

func resourceNameCandidates(sandboxId string) []string {
	candidates := []string{}
	primary := agentSandboxResourceName(sandboxId)
	candidates = append(candidates, primary)
	if sandboxId != primary {
		candidates = append(candidates, sandboxId)
	}
	legacy := legacyAgentSandboxName(sandboxId)
	if legacy != primary && legacy != sandboxId {
		candidates = append(candidates, legacy)
	}
	return candidates
}

func (a *AgentSandboxProvider) GetEndpoint(sandboxId string) (string, error) {
	candidates := resourceNameCandidates(sandboxId)
	var (
		obj    any
		exists bool
		err    error
	)
	for _, name := range candidates {
		key := fmt.Sprintf("%s/%s", a.namespace, name)
		obj, exists, err = a.informer.GetStore().GetByKey(key)
		if err != nil {
			return "", fmt.Errorf("failed to get AgentSandbox %s: %w", key, err)
		}
		if exists {
			break
		}
	}
	if !exists {
		return "", fmt.Errorf("%w: %s/%s", ErrSandboxNotFound, a.namespace, sandboxId)
	}

	u, ok := obj.(*unstructured.Unstructured)
	if !ok {
		return "", fmt.Errorf("unexpected object type for sandbox %s: %T", sandboxId, obj)
	}

	status, ok := u.Object["status"].(map[string]any)
	if !ok {
		return "", fmt.Errorf("%w: sandbox %s missing status", ErrSandboxNotReady, sandboxId)
	}

	// Check ready condition first; must be Ready=True to proceed.
	if ready, reason, message := a.checkSandboxReadyCondition(status); !ready {
		return "", fmt.Errorf("%w: sandbox %s not ready (%s: %s)", ErrSandboxNotReady, sandboxId, reason, message)
	}

	serviceFQDN, _ := status["serviceFQDN"].(string)
	if serviceFQDN == "" {
		return "", fmt.Errorf("%w: sandbox %s has no serviceFQDN", ErrSandboxNotReady, sandboxId)
	}

	return serviceFQDN, nil
}

// Start starts the informer factory and waits for cache sync.
func (a *AgentSandboxProvider) Start(ctx context.Context) error {
	a.informerFactory.Start(ctx.Done())

	if !cache.WaitForCacheSync(ctx.Done(), a.informer.HasSynced) {
		return errors.New("failed to sync AgentSandbox informer cache")
	}

	return nil
}

// checkSandboxReadyCondition inspects status.conditions for Ready=True.
// Returns (isReady, reason, message).
//
// https://github.com/kubernetes-sigs/agent-sandbox/blob/main/controllers/sandbox_controller.go#L195
func (a *AgentSandboxProvider) checkSandboxReadyCondition(status map[string]any) (bool, string, string) {
	conds, ok := status["conditions"].([]any)
	if !ok {
		return false, "NoConditions", "no sandbox conditions reported"
	}
	for _, c := range conds {
		m, ok := c.(map[string]any)
		if !ok {
			continue
		}
		if t, _ := m["type"].(string); t != agentSandboxConditionReady {
			continue
		}
		if s, _ := m["status"].(string); s == string(metav1.ConditionTrue) {
			return true, agentSandboxConditionReady, ""
		}
		reason, _ := m["reason"].(string)
		message, _ := m["message"].(string)
		if reason == "" {
			reason = "DependenciesNotReady"
		}
		if message == "" {
			message = "Ready condition is not True"
		}
		return false, reason, message
	}

	return false, "ReadyConditionMissing", "ready condition missing"
}

var _ Provider = (*AgentSandboxProvider)(nil)


================================================
FILE: components/ingress/pkg/sandbox/agent_sandbox_provider_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package sandbox

import (
	"context"
	"errors"
	"strings"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
	"k8s.io/apimachinery/pkg/runtime"
	"k8s.io/apimachinery/pkg/runtime/schema"
	dynamicfake "k8s.io/client-go/dynamic/fake"
)

// buildUnstructuredSandbox creates a minimal unstructured Sandbox object.
func buildUnstructuredSandbox(name, namespace string) *unstructured.Unstructured {
	return &unstructured.Unstructured{
		Object: map[string]any{
			"apiVersion": agentSandboxGroup + "/" + agentSandboxVersion,
			"kind":       "Sandbox",
			"metadata": map[string]any{
				"name":      name,
				"namespace": namespace,
			},
			"spec": map[string]any{
				"podTemplate": map[string]any{
					"spec": map[string]any{
						"containers": []any{},
					},
					"metadata": map[string]any{},
				},
			},
		},
	}
}

func TestAgentSandboxProvider_Start_Success(t *testing.T) {
	namespace := "test-ns"

	obj := buildUnstructuredSandbox("demo", namespace)
	scheme := runtime.NewScheme()
	gvr := schema.GroupVersionResource{
		Group:    agentSandboxGroup,
		Version:  agentSandboxVersion,
		Resource: agentSandboxResource,
	}
	fakeDyn := dynamicfake.NewSimpleDynamicClientWithCustomListKinds(
		scheme,
		map[schema.GroupVersionResource]string{
			gvr: "SandboxList",
		},
		obj,
	)

	provider := newAgentSandboxProviderWithClient(fakeDyn, namespace, 30*time.Second)

	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
	defer cancel()

	err := provider.Start(ctx)
	assert.NoError(t, err, "Start should succeed with fake dynamic informer")

	// Manually seed store (fake dynamic client doesn't backfill informer cache automatically)
	err = provider.informer.GetStore().Add(obj)
	assert.NoError(t, err)

	key := obj.GetNamespace() + "/" + obj.GetName()
	_, exists, _ := provider.informer.GetStore().GetByKey(key)
	assert.True(t, exists, "informer cache should accept added object after start")
}

func TestAgentSandboxProvider_Start_ContextCancelled(t *testing.T) {
	namespace := "test-ns"

	scheme := runtime.NewScheme()
	gvr := schema.GroupVersionResource{
		Group:    agentSandboxGroup,
		Version:  agentSandboxVersion,
		Resource: agentSandboxResource,
	}
	fakeDyn := dynamicfake.NewSimpleDynamicClientWithCustomListKinds(
		scheme,
		map[schema.GroupVersionResource]string{
			gvr: "SandboxList",
		},
	)

	provider := newAgentSandboxProviderWithClient(fakeDyn, namespace, 30*time.Second)

	ctx, cancel := context.WithCancel(context.Background())
	cancel() // cancel before start

	err := provider.Start(ctx)
	assert.Error(t, err, "Start should fail when context already cancelled")
}

func TestAgentSandboxProvider_GetEndpoint_ServiceFQDN(t *testing.T) {
	namespace := "test-ns"
	obj := buildUnstructuredSandbox("demo", namespace)
	obj.Object["status"] = map[string]any{
		"serviceFQDN": "sandbox.demo.svc.cluster.local",
		"conditions": []any{
			map[string]any{
				"type":   "Ready",
				"status": "True",
			},
		},
	}

	scheme := runtime.NewScheme()
	gvr := schema.GroupVersionResource{
		Group:    agentSandboxGroup,
		Version:  agentSandboxVersion,
		Resource: agentSandboxResource,
	}
	fakeDyn := dynamicfake.NewSimpleDynamicClientWithCustomListKinds(
		scheme,
		map[schema.GroupVersionResource]string{
			gvr: "SandboxList",
		},
	)

	provider := newAgentSandboxProviderWithClient(fakeDyn, namespace, 30*time.Second)

	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
	defer cancel()
	err := provider.Start(ctx)
	assert.NoError(t, err)

	// Seed store
	err = provider.informer.GetStore().Add(obj)
	assert.NoError(t, err)

	endpoint, err := provider.GetEndpoint("demo")
	assert.NoError(t, err)
	assert.Equal(t, "sandbox.demo.svc.cluster.local", endpoint)
}

func TestAgentSandboxProvider_GetEndpoint_NotFound(t *testing.T) {
	namespace := "test-ns"

	scheme := runtime.NewScheme()
	gvr := schema.GroupVersionResource{
		Group:    agentSandboxGroup,
		Version:  agentSandboxVersion,
		Resource: agentSandboxResource,
	}
	fakeDyn := dynamicfake.NewSimpleDynamicClientWithCustomListKinds(
		scheme,
		map[schema.GroupVersionResource]string{
			gvr: "SandboxList",
		},
	)

	provider := newAgentSandboxProviderWithClient(fakeDyn, namespace, 30*time.Second)

	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
	defer cancel()
	err := provider.Start(ctx)
	assert.NoError(t, err)

	_, err = provider.GetEndpoint("missing")
	assert.Error(t, err)
	assert.True(t, errors.Is(err, ErrSandboxNotFound))
}

func TestAgentSandboxProvider_GetEndpoint_NoServiceFQDN(t *testing.T) {
	namespace := "test-ns"
	obj := buildUnstructuredSandbox("demo", namespace)
	obj.Object["status"] = map[string]any{}

	scheme := runtime.NewScheme()
	gvr := schema.GroupVersionResource{
		Group:    agentSandboxGroup,
		Version:  agentSandboxVersion,
		Resource: agentSandboxResource,
	}
	fakeDyn := dynamicfake.NewSimpleDynamicClientWithCustomListKinds(
		scheme,
		map[schema.GroupVersionResource]string{
			gvr: "SandboxList",
		},
	)

	provider := newAgentSandboxProviderWithClient(fakeDyn, namespace, 30*time.Second)

	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
	defer cancel()
	err := provider.Start(ctx)
	assert.NoError(t, err)

	// Seed store
	err = provider.informer.GetStore().Add(obj)
	assert.NoError(t, err)

	_, err = provider.GetEndpoint("demo")
	assert.Error(t, err)
	assert.True(t, errors.Is(err, ErrSandboxNotReady))
}

func TestAgentSandboxProvider_GetEndpoint_NotReadyCondition(t *testing.T) {
	namespace := "test-ns"
	obj := buildUnstructuredSandbox("demo", namespace)
	obj.Object["status"] = map[string]any{
		"serviceFQDN": "sandbox.demo.svc.cluster.local",
		"conditions": []any{
			map[string]any{
				"type":    "Ready",
				"status":  "False",
				"reason":  "DependenciesNotReady",
				"message": "Pod not ready",
			},
		},
	}

	scheme := runtime.NewScheme()
	gvr := schema.GroupVersionResource{
		Group:    agentSandboxGroup,
		Version:  agentSandboxVersion,
		Resource: agentSandboxResource,
	}
	fakeDyn := dynamicfake.NewSimpleDynamicClientWithCustomListKinds(
		scheme,
		map[schema.GroupVersionResource]string{
			gvr: "SandboxList",
		},
	)

	provider := newAgentSandboxProviderWithClient(fakeDyn, namespace, 30*time.Second)

	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
	defer cancel()
	err := provider.Start(ctx)
	assert.NoError(t, err)

	// Seed store
	err = provider.informer.GetStore().Add(obj)
	assert.NoError(t, err)

	_, err = provider.GetEndpoint("demo")
	assert.Error(t, err)
	assert.True(t, errors.Is(err, ErrSandboxNotReady))
}

func TestToDNS1035Label_HashOnSymbolOnlyIDs(t *testing.T) {
	name1 := toDNS1035Label("!!!", agentSandboxNamePrefix)
	name2 := toDNS1035Label("???", agentSandboxNamePrefix)

	assert.NotEqual(t, name1, name2)
	assert.Regexp(t, `^sandbox-[0-9a-f]{8}$`, name1)
	assert.Regexp(t, `^sandbox-[0-9a-f]{8}$`, name2)
}

func TestToDNS1035Label_PrefixesDigitStart(t *testing.T) {
	name := toDNS1035Label("1234", agentSandboxNamePrefix)
	assert.Equal(t, "sandbox-1234", name)
}

func TestToDNS1035Label_TruncatesWithHashSuffix(t *testing.T) {
	input := "A" + strings.Repeat("b", 100)
	name := toDNS1035Label(input, agentSandboxNamePrefix)

	assert.LessOrEqual(t, len(name), 63)
	assert.Regexp(t, `^[a-z][a-z0-9-]*$`, name)
	assert.Regexp(t, `[0-9a-f]{8}$`, name)
}


================================================
FILE: components/ingress/pkg/sandbox/batchsandbox_provider.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package sandbox

import (
	"context"
	"errors"
	"fmt"
	"time"

	kerrors "k8s.io/apimachinery/pkg/api/errors"
	"k8s.io/client-go/rest"
	"k8s.io/client-go/tools/cache"

	clientset "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned"
	informers "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/informers/externalversions"
	listers "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/listers/sandbox/v1alpha1"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/utils"
)

// BatchSandboxProvider implements Provider interface for BatchSandbox resources
type BatchSandboxProvider struct {
	informerFactory informers.SharedInformerFactory
	lister          listers.BatchSandboxLister
	informerSynced  cache.InformerSynced
	namespace       string
}

// NewBatchSandboxProvider creates a new BatchSandboxProvider
func NewBatchSandboxProvider(
	config *rest.Config,
	namespace string,
	resyncPeriod time.Duration,
) *BatchSandboxProvider {
	clientset, err := clientset.NewForConfig(config)
	if err != nil {
		panic(fmt.Sprintf("failed to create sandbox clientset: %v", err))
	}

	informerFactory := informers.NewSharedInformerFactoryWithOptions(
		clientset,
		resyncPeriod,
		informers.WithNamespace(namespace),
	)

	batchSandboxInformer := informerFactory.Sandbox().V1alpha1().BatchSandboxes()

	return &BatchSandboxProvider{
		informerFactory: informerFactory,
		lister:          batchSandboxInformer.Lister(),
		informerSynced:  batchSandboxInformer.Informer().HasSynced,
		namespace:       namespace,
	}
}

// Start starts the informer factory and waits for cache sync
func (p *BatchSandboxProvider) Start(ctx context.Context) error {
	p.informerFactory.Start(ctx.Done())

	// Wait for cache sync
	if !cache.WaitForCacheSync(ctx.Done(), p.informerSynced) {
		return errors.New("failed to sync BatchSandbox informer cache")
	}

	return nil
}

// GetEndpoint retrieves the endpoint IP for a BatchSandbox
func (p *BatchSandboxProvider) GetEndpoint(sandboxId string) (string, error) {
	// Get BatchSandbox from cache using lister with provider's namespace
	batchSandbox, err := p.lister.BatchSandboxes(p.namespace).Get(sandboxId)
	if err != nil {
		if kerrors.IsNotFound(err) {
			return "", fmt.Errorf("%w: %s/%s", ErrSandboxNotFound, p.namespace, sandboxId)
		}
		return "", fmt.Errorf("failed to get BatchSandbox %s/%s: %w", p.namespace, sandboxId, err)
	}

	// Check if BatchSandbox is ready
	if batchSandbox.Status.Ready < 1 {
		return "", fmt.Errorf("%w: %s/%s (ready: %d/%d)",
			ErrSandboxNotReady, p.namespace, sandboxId, batchSandbox.Status.Ready, batchSandbox.Status.Replicas)
	}

	// Get endpoints from BatchSandbox using kubernetes utils
	endpoints, err := utils.GetEndpoints(batchSandbox)
	if err != nil {
		return "", fmt.Errorf("%w: %s/%s: %w", ErrSandboxNotReady, p.namespace, sandboxId, err)
	}

	// Return the first available endpoint
	return endpoints[0], nil
}

var _ Provider = (*BatchSandboxProvider)(nil)


================================================
FILE: components/ingress/pkg/sandbox/batchsandbox_provider_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package sandbox

import (
	"context"
	"errors"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	fakeclientset "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned/fake"
	informers "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/informers/externalversions"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/utils"
)

// Note: Integration tests with real informers are in e2e tests
// Unit tests here focus on provider behavior

// TestBatchSandboxProvider_WithFakeInformer tests the provider using fake clientset and informer
func TestBatchSandboxProvider_WithFakeInformer(t *testing.T) {
	namespace := "test-namespace"

	// Create a ready BatchSandbox with valid endpoints
	readyBatchSandbox := &sandboxv1alpha1.BatchSandbox{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "ready-sandbox",
			Namespace: namespace,
			Annotations: map[string]string{
				utils.AnnotationEndpoints: `["10.0.0.1", "10.0.0.2"]`,
			},
		},
		Spec: sandboxv1alpha1.BatchSandboxSpec{
			Replicas: ptr(int32(2)),
		},
		Status: sandboxv1alpha1.BatchSandboxStatus{
			Replicas: 2,
			Ready:    2,
		},
	}

	// Create a not ready BatchSandbox
	notReadyBatchSandbox := &sandboxv1alpha1.BatchSandbox{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "not-ready-sandbox",
			Namespace: namespace,
		},
		Spec: sandboxv1alpha1.BatchSandboxSpec{
			Replicas: ptr(int32(1)),
		},
		Status: sandboxv1alpha1.BatchSandboxStatus{
			Replicas: 1,
			Ready:    0,
		},
	}

	// Create fake clientset with test objects
	fakeClient := fakeclientset.NewSimpleClientset(readyBatchSandbox, notReadyBatchSandbox)

	// Create informer factory
	informerFactory := informers.NewSharedInformerFactoryWithOptions(
		fakeClient,
		time.Second*30,
		informers.WithNamespace(namespace),
	)

	batchSandboxInformer := informerFactory.Sandbox().V1alpha1().BatchSandboxes()

	// Create provider
	provider := &BatchSandboxProvider{
		informerFactory: informerFactory,
		lister:          batchSandboxInformer.Lister(),
		informerSynced:  batchSandboxInformer.Informer().HasSynced,
		namespace:       namespace,
	}

	// Start informer and wait for cache sync
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	err := provider.Start(ctx)
	assert.NoError(t, err, "Provider should start successfully")

	// Manually add objects to informer cache (fake clientset doesn't auto-populate informer)
	err = batchSandboxInformer.Informer().GetStore().Add(readyBatchSandbox)
	assert.NoError(t, err)
	err = batchSandboxInformer.Informer().GetStore().Add(notReadyBatchSandbox)
	assert.NoError(t, err)

	// Test 1: Get endpoint from ready sandbox
	t.Run("GetEndpoint from ready sandbox", func(t *testing.T) {
		endpoint, err := provider.GetEndpoint("ready-sandbox")
		assert.NoError(t, err)
		assert.Equal(t, "10.0.0.1", endpoint, "Should return first endpoint IP")
	})

	// Test 2: Get endpoint from not ready sandbox
	t.Run("GetEndpoint from not ready sandbox", func(t *testing.T) {
		_, err := provider.GetEndpoint("not-ready-sandbox")
		assert.Error(t, err)
		assert.True(t, errors.Is(err, ErrSandboxNotReady), "Should return ErrSandboxNotReady")
		assert.Contains(t, err.Error(), "not ready")
	})

	// Test 3: Get endpoint from non-existent sandbox
	t.Run("GetEndpoint from non-existent sandbox", func(t *testing.T) {
		_, err := provider.GetEndpoint("non-existent")
		assert.Error(t, err)
		assert.True(t, errors.Is(err, ErrSandboxNotFound), "Should return ErrSandboxNotFound")
		assert.Contains(t, err.Error(), "not found")
	})
}

// TestBatchSandboxProvider_MissingAnnotation tests sandbox without endpoints annotation
func TestBatchSandboxProvider_MissingAnnotation(t *testing.T) {
	namespace := "test-namespace"

	// Create BatchSandbox without endpoints annotation
	batchSandbox := &sandboxv1alpha1.BatchSandbox{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "no-annotation-sandbox",
			Namespace: namespace,
		},
		Spec: sandboxv1alpha1.BatchSandboxSpec{
			Replicas: ptr(int32(1)),
		},
		Status: sandboxv1alpha1.BatchSandboxStatus{
			Replicas: 1,
			Ready:    1,
		},
	}

	fakeClient := fakeclientset.NewSimpleClientset(batchSandbox)
	informerFactory := informers.NewSharedInformerFactoryWithOptions(
		fakeClient,
		time.Second*30,
		informers.WithNamespace(namespace),
	)

	batchSandboxInformer := informerFactory.Sandbox().V1alpha1().BatchSandboxes()

	provider := &BatchSandboxProvider{
		informerFactory: informerFactory,
		lister:          batchSandboxInformer.Lister(),
		informerSynced:  batchSandboxInformer.Informer().HasSynced,
		namespace:       namespace,
	}

	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	err := provider.Start(ctx)
	assert.NoError(t, err)

	// Manually add object to informer cache
	err = batchSandboxInformer.Informer().GetStore().Add(batchSandbox)
	assert.NoError(t, err)

	_, err = provider.GetEndpoint("no-annotation-sandbox")
	assert.Error(t, err)
	assert.True(t, errors.Is(err, ErrSandboxNotReady), "Should return ErrSandboxNotReady")
	assert.Contains(t, err.Error(), "has no annotations")
}

// TestBatchSandboxProvider_InvalidAnnotation tests sandbox with invalid annotation format
func TestBatchSandboxProvider_InvalidAnnotation(t *testing.T) {
	namespace := "test-namespace"

	batchSandbox := &sandboxv1alpha1.BatchSandbox{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "invalid-annotation-sandbox",
			Namespace: namespace,
			Annotations: map[string]string{
				utils.AnnotationEndpoints: `invalid-json`,
			},
		},
		Spec: sandboxv1alpha1.BatchSandboxSpec{
			Replicas: ptr(int32(1)),
		},
		Status: sandboxv1alpha1.BatchSandboxStatus{
			Replicas: 1,
			Ready:    1,
		},
	}

	fakeClient := fakeclientset.NewSimpleClientset(batchSandbox)
	informerFactory := informers.NewSharedInformerFactoryWithOptions(
		fakeClient,
		time.Second*30,
		informers.WithNamespace(namespace),
	)

	batchSandboxInformer := informerFactory.Sandbox().V1alpha1().BatchSandboxes()

	provider := &BatchSandboxProvider{
		informerFactory: informerFactory,
		lister:          batchSandboxInformer.Lister(),
		informerSynced:  batchSandboxInformer.Informer().HasSynced,
		namespace:       namespace,
	}

	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	err := provider.Start(ctx)
	assert.NoError(t, err)

	// Manually add object to informer cache
	err = batchSandboxInformer.Informer().GetStore().Add(batchSandbox)
	assert.NoError(t, err)

	_, err = provider.GetEndpoint("invalid-annotation-sandbox")
	assert.Error(t, err)
	assert.True(t, errors.Is(err, ErrSandboxNotReady), "Should return ErrSandboxNotReady")
	assert.Contains(t, err.Error(), "failed to parse")
}

// TestBatchSandboxProvider_DynamicUpdate tests adding object after informer starts
func TestBatchSandboxProvider_DynamicUpdate(t *testing.T) {
	namespace := "test-namespace"

	fakeClient := fakeclientset.NewSimpleClientset()
	informerFactory := informers.NewSharedInformerFactoryWithOptions(
		fakeClient,
		time.Second*30,
		informers.WithNamespace(namespace),
	)

	batchSandboxInformer := informerFactory.Sandbox().V1alpha1().BatchSandboxes()

	provider := &BatchSandboxProvider{
		informerFactory: informerFactory,
		lister:          batchSandboxInformer.Lister(),
		informerSynced:  batchSandboxInformer.Informer().HasSynced,
		namespace:       namespace,
	}

	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	err := provider.Start(ctx)
	assert.NoError(t, err)

	// Initially no sandbox exists
	_, err = provider.GetEndpoint("dynamic-sandbox")
	assert.Error(t, err)
	assert.True(t, errors.Is(err, ErrSandboxNotFound), "Should return ErrSandboxNotFound")
	assert.Contains(t, err.Error(), "not found")

	// Add a new BatchSandbox
	newBatchSandbox := &sandboxv1alpha1.BatchSandbox{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "dynamic-sandbox",
			Namespace: namespace,
			Annotations: map[string]string{
				utils.AnnotationEndpoints: `["10.0.0.100"]`,
			},
		},
		Spec: sandboxv1alpha1.BatchSandboxSpec{
			Replicas: ptr(int32(1)),
		},
		Status: sandboxv1alpha1.BatchSandboxStatus{
			Replicas: 1,
			Ready:    1,
		},
	}

	_, err = fakeClient.SandboxV1alpha1().BatchSandboxes(namespace).Create(
		context.Background(), newBatchSandbox, metav1.CreateOptions{})
	assert.NoError(t, err)

	// Wait for informer to pick up the change
	assert.Eventually(t, func() bool {
		endpoint, err := provider.GetEndpoint("dynamic-sandbox")
		return err == nil && endpoint == "10.0.0.100"
	}, 3*time.Second, 100*time.Millisecond, "Informer should eventually sync the new object")
}

// TestBatchSandboxProvider_StartCacheSyncFailure tests cache sync timeout
func TestBatchSandboxProvider_StartCacheSyncFailure(t *testing.T) {
	namespace := "test-namespace"

	fakeClient := fakeclientset.NewSimpleClientset()
	informerFactory := informers.NewSharedInformerFactoryWithOptions(
		fakeClient,
		time.Second*30,
		informers.WithNamespace(namespace),
	)

	batchSandboxInformer := informerFactory.Sandbox().V1alpha1().BatchSandboxes()

	provider := &BatchSandboxProvider{
		informerFactory: informerFactory,
		lister:          batchSandboxInformer.Lister(),
		informerSynced:  batchSandboxInformer.Informer().HasSynced,
		namespace:       namespace,
	}

	// Create a context that expires immediately
	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Nanosecond)
	defer cancel()

	// Wait for context to expire
	time.Sleep(10 * time.Millisecond)

	err := provider.Start(ctx)
	assert.Error(t, err, "Should fail when cache sync times out")
	assert.Contains(t, err.Error(), "failed to sync")
}

// TestBatchSandboxProvider_GetEndpointNonNotFoundError tests non-IsNotFound K8s errors
func TestBatchSandboxProvider_GetEndpointNonNotFoundError(t *testing.T) {
	namespace := "test-namespace"

	// Create a sandbox with Ready status but missing endpoint annotation
	batchSandbox := &sandboxv1alpha1.BatchSandbox{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "missing-endpoint-sandbox",
			Namespace: namespace,
			Annotations: map[string]string{
				utils.AnnotationEndpoints: `["10.0.0.1"]`,
			},
		},
		Spec: sandboxv1alpha1.BatchSandboxSpec{
			Replicas: ptr(int32(1)),
		},
		Status: sandboxv1alpha1.BatchSandboxStatus{
			Replicas: 1,
			Ready:    1,
		},
	}

	fakeClient := fakeclientset.NewSimpleClientset(batchSandbox)
	informerFactory := informers.NewSharedInformerFactoryWithOptions(
		fakeClient,
		time.Second*30,
		informers.WithNamespace(namespace),
	)

	batchSandboxInformer := informerFactory.Sandbox().V1alpha1().BatchSandboxes()

	provider := &BatchSandboxProvider{
		informerFactory: informerFactory,
		lister:          batchSandboxInformer.Lister(),
		informerSynced:  batchSandboxInformer.Informer().HasSynced,
		namespace:       namespace,
	}

	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	err := provider.Start(ctx)
	assert.NoError(t, err)

	// Manually add object to informer cache
	err = batchSandboxInformer.Informer().GetStore().Add(batchSandbox)
	assert.NoError(t, err)

	// Should successfully get endpoint
	endpoint, err := provider.GetEndpoint("missing-endpoint-sandbox")
	assert.NoError(t, err)
	assert.Equal(t, "10.0.0.1", endpoint)
}

// ptr is a helper function to create int32 pointer
func ptr(i int32) *int32 {
	return &i
}


================================================
FILE: components/ingress/pkg/sandbox/errors_test.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package sandbox

import (
	"errors"
	"fmt"
	"testing"
)

// Ensure wrapping ErrSandboxNotReady keeps errors.Is behavior.
func TestErrSandboxNotReadyWrapping(t *testing.T) {
	wrapped := fmt.Errorf("%w: custom detail", ErrSandboxNotReady)

	if !errors.Is(wrapped, ErrSandboxNotReady) {
		t.Fatalf("expected errors.Is to match ErrSandboxNotReady, got false; err=%v", wrapped)
	}
}


================================================
FILE: components/ingress/pkg/sandbox/factory.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package sandbox

import (
	"fmt"
	"time"

	"k8s.io/client-go/rest"
)

// DefaultProviderFactory is the default implementation of ProviderFactory
type DefaultProviderFactory struct {
	config       *rest.Config
	namespace    string
	resyncPeriod time.Duration
}

// NewProviderFactory creates a new DefaultProviderFactory
func NewProviderFactory(config *rest.Config, namespace string, resyncPeriod time.Duration) *DefaultProviderFactory {
	return &DefaultProviderFactory{
		config:       config,
		namespace:    namespace,
		resyncPeriod: resyncPeriod,
	}
}

// CreateProvider creates a Provider instance based on the provider type
func (f *DefaultProviderFactory) CreateProvider(providerType ProviderType) (Provider, error) {
	switch providerType {
	case ProviderTypeBatchSandbox:
		return NewBatchSandboxProvider(f.config, f.namespace, f.resyncPeriod), nil
	case ProviderTypeAgentSandbox:
		return NewAgentSandboxProvider(f.config, f.namespace, f.resyncPeriod), nil
	default:
		return nil, fmt.Errorf("unsupported provider type: %s", providerType)
	}
}


================================================
FILE: components/ingress/pkg/sandbox/provider.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package sandbox

import (
	"context"
	"errors"
)

type ProviderType string

const (
	ProviderTypeBatchSandbox ProviderType = "batchsandbox"
	ProviderTypeAgentSandbox ProviderType = "agent-sandbox"
)

func (tpy ProviderType) String() string { return string(tpy) }

// Standard errors for Provider operations
var (
	// ErrSandboxNotFound indicates the sandbox resource does not exist
	ErrSandboxNotFound = errors.New("sandbox not found")

	// ErrSandboxNotReady indicates the sandbox exists but is not ready
	// This includes: not enough ready replicas, missing endpoints, invalid configuration
	ErrSandboxNotReady = errors.New("sandbox not ready")
)

// Provider defines the interface for sandbox resource providers
// Implementations include BatchSandboxProvider, AgentSandboxProvider, etc.
type Provider interface {
	// GetEndpoint retrieves the IP address for a sandbox by its id/name
	// The namespace is determined by the provider's configuration
	// Returns the first available IP from the endpoints annotation
	// Returns error if sandbox not found or no endpoints available
	// Note: This is a local cache query, no network I/O involved
	GetEndpoint(sandboxId string) (string, error)

	// Start initializes and starts the provider's informer cache
	// Waits for cache sync before returning
	// Must be called before using GetEndpoint
	Start(ctx context.Context) error
}

// ProviderFactory creates a Provider instance based on the provider type
type ProviderFactory interface {
	CreateProvider(providerType ProviderType) (Provider, error)
}


================================================
FILE: components/internal/go.mod
================================================
module github.com/alibaba/opensandbox/internal

go 1.24.0

require go.uber.org/zap v1.27.0

require go.uber.org/multierr v1.10.0 // indirect


================================================
FILE: components/internal/go.sum
================================================
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ=
go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=


================================================
FILE: components/internal/logger/logger.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package logger

// Field is a structured logging key/value pair.
type Field struct {
	Key   string
	Value any
}

// Logger defines the minimal logging surface shared by components.
//   - Formatted levels: Debugf/Infof/Warnf/Errorf
//   - With: attach structured fields to derived logger
//   - Named: derive a sub-logger with name
//   - Sync: flush buffers (no-op for implementations that don't buffer)
type Logger interface {
	Debugf(template string, args ...any)
	Infof(template string, args ...any)
	Warnf(template string, args ...any)
	Errorf(template string, args ...any)
	With(fields ...Field) Logger
	Named(name string) Logger
	Sync() error
}


================================================
FILE: components/internal/logger/zap.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package logger

import (
	"os"
	"strings"

	"go.uber.org/zap"
	"go.uber.org/zap/zapcore"
)

const envLogOutput = "OPENSANDBOX_LOG_OUTPUT"

// Config is the minimal configuration to align execd/ingress defaults.
// - JSON encoding, ISO8601 time
// - Caller/stacktrace disabled
// - Stdout as default output
// - Level defaults to info
type Config struct {
	Level            string   // debug|info|warn|error|fatal (default: info)
	OutputPaths      []string // default: stdout
	ErrorOutputPaths []string // default: OutputPaths
}

// New creates a zap-backed Logger with the provided config.
func New(cfg Config) (Logger, error) {
	cfg = applyEnvOutputs(cfg)

	zapCfg := zap.NewProductionConfig()
	zapCfg.Level = zap.NewAtomicLevelAt(parseLevel(cfg.Level))
	zapCfg.EncoderConfig.EncodeTime = zapcore.ISO8601TimeEncoder
	zapCfg.EncoderConfig.CallerKey = ""
	zapCfg.DisableCaller = true
	zapCfg.DisableStacktrace = true
	zapCfg.EncoderConfig.StacktraceKey = ""

	zapCfg.OutputPaths = cfg.OutputPaths
	zapCfg.ErrorOutputPaths = cfg.ErrorOutputPaths

	base, err := zapCfg.Build()
	if err != nil {
		return nil, err
	}
	return &zapLogger{base: base, sugar: base.Sugar()}, nil
}

// MustNew is a convenience helper that panics on error.
func MustNew(cfg Config) Logger {
	l, err := New(cfg)
	if err != nil {
		panic(err)
	}
	return l
}

// AsZapSugared returns the underlying zap SugaredLogger when available.
func AsZapSugared(l Logger) (*zap.SugaredLogger, bool) {
	zl, ok := l.(*zapLogger)
	if !ok {
		return nil, false
	}
	return zl.sugar, true
}

type zapLogger struct {
	base  *zap.Logger
	sugar *zap.SugaredLogger
}

func (l *zapLogger) Debugf(template string, args ...any) {
	l.sugar.Debugf(template, args...)
}

func (l *zapLogger) Infof(template string, args ...any) {
	l.sugar.Infof(template, args...)
}

func (l *zapLogger) Warnf(template string, args ...any) {
	l.sugar.Warnf(template, args...)
}

func (l *zapLogger) Errorf(template string, args ...any) {
	l.sugar.Errorf(template, args...)
}

func (l *zapLogger) With(fields ...Field) Logger {
	if len(fields) == 0 {
		return l
	}
	zfs := make([]zap.Field, 0, len(fields))
	for _, f := range fields {
		zfs = append(zfs, zap.Any(f.Key, f.Value))
	}
	nb := l.base.With(zfs...)
	return &zapLogger{base: nb, sugar: nb.Sugar()}
}

func (l *zapLogger) Named(name string) Logger {
	nb := l.base.Named(name)
	return &zapLogger{base: nb, sugar: nb.Sugar()}
}

func (l *zapLogger) Sync() error {
	return l.base.Sync()
}

func parseLevel(level string) zapcore.Level {
	switch strings.ToLower(level) {
	case "debug":
		return zapcore.DebugLevel
	case "warn", "warning":
		return zapcore.WarnLevel
	case "error":
		return zapcore.ErrorLevel
	case "fatal":
		return zapcore.FatalLevel
	default:
		return zapcore.InfoLevel
	}
}

func applyEnvOutputs(cfg Config) Config {
	envVal := strings.TrimSpace(os.Getenv(envLogOutput))
	if len(cfg.OutputPaths) == 0 {
		if envVal != "" {
			cfg.OutputPaths = splitAndTrim(envVal)
		} else {
			cfg.OutputPaths = []string{"stdout"}
		}
	}
	if len(cfg.ErrorOutputPaths) == 0 {
		// Default error output matches output paths.
		cfg.ErrorOutputPaths = cfg.OutputPaths
	}
	return cfg
}

func splitAndTrim(s string) []string {
	parts := strings.Split(s, ",")
	out := make([]string, 0, len(parts))
	for _, p := range parts {
		if v := strings.TrimSpace(p); v != "" {
			out = append(out, v)
		}
	}
	return out
}


================================================
FILE: components/internal/version/version.go
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package version

import (
	"fmt"
	"runtime"
)

// Package values are typically overridden at build time via -ldflags.
var (
	// Version is the component version.
	Version = "dirty"
	// BuildTime is when the binary was built.
	BuildTime = "assigned-at-build-time"
	// GitCommit is the commit id used to build the binary.
	GitCommit = "assigned-at-build-time"
)

// EchoVersion prints build info for the given component name (e.g. "OpenSandbox Ingress", "OpenSandbox Execd").
// All components can use this by passing their display name.
func EchoVersion(componentName string) {
	fmt.Println("=====================================================")
	fmt.Printf(" %s\n", componentName)
	fmt.Println("-----------------------------------------------------")
	fmt.Printf(" Version     : %s\n", Version)
	fmt.Printf(" Git Commit  : %s\n", GitCommit)
	fmt.Printf(" Build Time  : %s\n", BuildTime)
	fmt.Printf(" Go Version  : %s\n", runtime.Version())
	fmt.Printf(" Platform    : %s/%s\n", runtime.GOOS, runtime.GOARCH)
	fmt.Println("=====================================================")
}


================================================
FILE: docs/.nvmrc
================================================
22


================================================
FILE: docs/.vitepress/config.mts
================================================
import { defineConfig } from "vitepress";
import { loadManifest } from "./scripts/docs-manifest.mjs";

const manifest = loadManifest();
const docsBase = process.env.DOCS_BASE || "/";

export default defineConfig({
  title: "OpenSandbox",
  description: "OpenSandbox documentation site for users and developers",
  head: [["link", { rel: "icon", type: "image/svg+xml", href: "/favicon.svg" }]],
  cleanUrls: true,
  lastUpdated: true,
  base: docsBase,
  ignoreDeadLinks: [/^https?:\/\/localhost/, /\/README$/, /\/index$/, "./contributing"],
  srcExclude: ["node_modules/**", "README_zh.md", "RELEASE_NOTE_TEMPLATE.md"],
  rewrites: manifest.rewrites,
  themeConfig: {
    logo: "/assets/logo.svg",
    search: {
      provider: "local",
    },
    socialLinks: [{ icon: "github", link: "https://github.com/alibaba/OpenSandbox" }],
    nav: manifest.nav.en,
    sidebar: {
      ...manifest.sidebar.en,
      ...manifest.sidebar.zh,
    },
    outline: {
      level: [2, 3],
    },
  },
  locales: {
    root: {
      label: "English",
      lang: "en-US",
      themeConfig: {
        nav: manifest.nav.en,
      },
    },
    zh: {
      label: "简体中文",
      lang: "zh-CN",
      themeConfig: {
        nav: manifest.nav.zh,
      },
    },
  },
});


================================================
FILE: docs/.vitepress/scripts/docs-manifest.mjs
================================================
import fs from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const repoRoot = path.resolve(__dirname, "../../../");
const docsRoot = path.join(repoRoot, "docs");
const generatedRoot = path.join(docsRoot, "generated");
const manifestPath = path.join(docsRoot, ".vitepress", "generated", "manifest.json");

const blobBaseUrl = "https://github.com/alibaba/OpenSandbox/blob/main";
const treeBaseUrl = "https://github.com/alibaba/OpenSandbox/tree/main";
const rawBaseUrl = "https://raw.githubusercontent.com/alibaba/OpenSandbox/main";

const ignoredDirNames = new Set([
  ".git",
  ".github",
  "node_modules",
  ".vitepress",
  ".pytest_cache",
  "generated",
  ".venv",
  "venv",
  "__pycache__",
  "dist",
  "build",
  "target",
  "bin",
]);
const zhReadmePattern = /^README(?:[-_](?:zh|zh-cn|zh_cn))?\.md$/i;
const standardReadmePattern = /^README\.md$/i;

const sectionDefinitions = [
  {
    id: "modules",
    scanRoots: ["server", "components", "sandboxes", "kubernetes", "specs", "sdks"],
    includeDevelopment: true,
  },
  {
    id: "examples",
    scanRoots: ["examples"],
    includeDevelopment: false,
  },
  {
    id: "community",
    scanRoots: ["oseps"],
    includeDevelopment: false,
  },
];

const manualEntries = [
  {
    key: "guide-home",
    sectionId: "overview",
    slug: "overview/home",
    enPath: "README.md",
    zhPath: "docs/README_zh.md",
    titleEn: "OpenSandbox",
    titleZh: "OpenSandbox",
  },
  {
    key: "guide-architecture",
    sectionId: "overview",
    slug: "overview/architecture",
    enPath: "docs/architecture.md",
    zhPath: null,
    titleEn: "Architecture",
    titleZh: "架构设计",
  },
  {
    key: "guide-network",
    sectionId: "modules",
    slug: "design/single-host-network",
    enPath: "docs/single_host_network.md",
    zhPath: null,
    titleEn: "Single Host Network",
    titleZh: "单机场景网络设计",
  },
  {
    key: "community-contributing",
    sectionId: "community",
    slug: "community/contributing",
    enPath: "CONTRIBUTING.md",
    zhPath: null,
    titleEn: "Contributing",
    titleZh: "参与贡献",
  },
  {
    key: "community-code-of-conduct",
    sectionId: "community",
    slug: "community/code-of-conduct",
    enPath: "CODE_OF_CONDUCT.md",
    zhPath: null,
    titleEn: "Code of Conduct",
    titleZh: "行为准则",
  },
];

const moduleGroupLabels = {
  en: {
    sdks: "SDKs",
    specs: "Specs & API",
    server: "Server",
    components: "Components",
    sandboxes: "Sandboxes",
    kubernetes: "Kubernetes",
    design: "Design",
  },
  zh: {
    sdks: "SDKs",
    specs: "Specs & API",
    server: "Server",
    components: "Components",
    sandboxes: "Sandboxes",
    kubernetes: "Kubernetes",
    design: "设计",
  },
};

const communityGroupLabels = {
  en: {
    community: "Community",
    oseps: "OSEPs",
  },
  zh: {
    community: "社区",
    oseps: "OSEPs",
  },
};

const shortTitleByPath = {
  "sdks/code-interpreter/javascript/README.md": "Code Interpreter JS SDK",
  "sdks/code-interpreter/kotlin/README.md": "Code Interpreter Kotlin SDK",
  "sdks/code-interpreter/python/README.md": "Code Interpreter Python SDK",
  "sdks/code-interpreter/csharp/README.md": "Code Interpreter C# SDK",
  "sdks/sandbox/javascript/README.md": "Sandbox JS SDK",
  "sdks/sandbox/kotlin/README.md": "Sandbox Kotlin SDK",
  "sdks/sandbox/python/README.md": "Sandbox Python SDK",
  "sdks/sandbox/csharp/README.md": "Sandbox C# SDK",
  "sdks/mcp/sandbox/python/README.md": "MCP Sandbox Python SDK",
  "cli/README.md": "CLI (Python)",
  "sdks/sandbox/kotlin/sandbox-api/build/generated/api/execd/README.md": "Sandbox Execd API (Kotlin)",
  "sdks/sandbox/kotlin/sandbox-api/build/generated/api/lifecycle/README.md": "Sandbox Lifecycle API (Kotlin)",

  "examples/agent-sandbox/README.md": "Agent Sandbox",
  "examples/aio-sandbox/README.md": "AIO Sandbox",
  "examples/chrome/README.md": "Chrome",
  "examples/claude-code/README.md": "Claude Code",
  "examples/code-interpreter/README.md": "Code Interpreter",
  "examples/codex-cli/README.md": "Codex CLI",
  "examples/desktop/README.md": "Desktop (VNC)",
  "examples/gemini-cli/README.md": "Gemini CLI",
  "examples/google-adk/README.md": "Google ADK",
  "examples/host-volume-mount/README.md": "Host Volume Mount",
  "examples/langgraph/README.md": "LangGraph",
  "examples/playwright/README.md": "Playwright",
  "examples/README.md": "Examples Overview",
  "examples/rl-training/README.md": "RL Training",
  "examples/vscode/README.md": "VS Code",

  "server/README.md": "Server",
  "server/DEVELOPMENT.md": "Server Development",
  "components/ingress/README.md": "Ingress",
  "components/ingress/DEVELOPMENT.md": "Ingress Development",
  "components/egress/README.md": "Egress Sidecar",
  "components/execd/README.md": "execd",
  "components/execd/DEVELOPMENT.md": "execd Development",
  "sandboxes/code-interpreter/README.md": "Code Interpreter Runtime",
  "kubernetes/README.md": "Kubernetes Controller",
  "kubernetes/examples/task-executor/README.md": "Task Executor",
  "kubernetes/examples/controller/README.md": "Controller Example",
  "oseps/README.md": "OSEP Overview",
};

const shortTitleByPathZh = {
  "sdks/code-interpreter/javascript/README.md": "代码解释器 JS SDK",
  "sdks/code-interpreter/kotlin/README.md": "代码解释器 Kotlin SDK",
  "sdks/code-interpreter/python/README.md": "代码解释器 Python SDK",
  "sdks/code-interpreter/csharp/README.md": "代码解释器 C# SDK",
  "sdks/sandbox/javascript/README.md": "沙箱 JS SDK",
  "sdks/sandbox/kotlin/README.md": "沙箱 Kotlin SDK",
  "sdks/sandbox/python/README.md": "沙箱 Python SDK",
  "sdks/sandbox/csharp/README.md": "沙箱 C# SDK",
  "sdks/mcp/sandbox/python/README.md": "MCP 沙箱 Python SDK",
  "cli/README.md": "CLI（Python）",
  "sdks/sandbox/kotlin/sandbox-api/build/generated/api/execd/README.md": "沙箱 Execd API（Kotlin）",
  "sdks/sandbox/kotlin/sandbox-api/build/generated/api/lifecycle/README.md": "沙箱生命周期 API（Kotlin）",

  "examples/agent-sandbox/README.md": "Agent Sandbox",
  "examples/aio-sandbox/README.md": "AIO 沙箱",
  "examples/chrome/README.md": "Chrome",
  "examples/claude-code/README.md": "Claude Code",
  "examples/code-interpreter/README.md": "代码解释器",
  "examples/codex-cli/README.md": "Codex CLI",
  "examples/desktop/README.md": "桌面环境（VNC）",
  "examples/gemini-cli/README.md": "Gemini CLI",
  "examples/google-adk/README.md": "Google ADK",
  "examples/host-volume-mount/README.md": "宿主机目录挂载",
  "examples/langgraph/README.md": "LangGraph",
  "examples/playwright/README.md": "Playwright",
  "examples/README.md": "示例总览",
  "examples/rl-training/README.md": "强化学习训练",
  "examples/vscode/README.md": "VS Code",

  "server/README.md": "Server",
  "server/DEVELOPMENT.md": "Server 开发指南",
  "components/ingress/README.md": "Ingress",
  "components/ingress/DEVELOPMENT.md": "Ingress 开发指南",
  "components/egress/README.md": "Egress Sidecar",
  "components/execd/README.md": "execd",
  "components/execd/DEVELOPMENT.md": "execd 开发指南",
  "sandboxes/code-interpreter/README.md": "代码解释器运行时",
  "kubernetes/README.md": "Kubernetes 控制器",
  "kubernetes/examples/task-executor/README.md": "Task Executor",
  "kubernetes/examples/controller/README.md": "Controller 示例",
  "oseps/README.md": "OSEP 总览",
};

function ensureDir(dirPath) {
  fs.mkdirSync(dirPath, { recursive: true });
}

function rmIfExists(targetPath) {
  if (fs.existsSync(targetPath)) {
    fs.rmSync(targetPath, { recursive: true, force: true, maxRetries: 5, retryDelay: 80 });
  }
}

function walkMarkdownFiles(absDirPath, acc = []) {
  const entries = fs.readdirSync(absDirPath, { withFileTypes: true });
  for (const entry of entries) {
    if (ignoredDirNames.has(entry.name)) {
      continue;
    }
    const absPath = path.join(absDirPath, entry.name);
    if (entry.isDirectory()) {
      walkMarkdownFiles(absPath, acc);
      continue;
    }
    if (!entry.isFile()) {
      continue;
    }
    if (entry.name.endsWith(".md")) {
      acc.push(absPath);
    }
  }
  return acc;
}

function shouldIgnoreRepoPath(repoRelPath) {
  const normalized = repoRelPath.replaceAll("\\", "/");
  const denylistFragments = [
    "/.venv/",
    "/venv/",
    "/node_modules/",
    "/docs/.vitepress/",
    "/docs/generated/",
    "/.pytest_cache/",
    "/__pycache__/",
    "/dist/",
    "/build/",
    "/target/",
    "/bin/",
  ];
  return denylistFragments.some((fragment) => normalized.includes(fragment));
}

function toRepoRelative(absPath) {
  return path.relative(repoRoot, absPath).replaceAll(path.sep, "/");
}

function readHeadingTitle(absPath, fallbackTitle) {
  if (!fs.existsSync(absPath)) {
    return fallbackTitle;
  }
  const content = fs.readFileSync(absPath, "utf8");
  const lines = content.split(/\r?\n/);
  let inFence = false;
  for (const line of lines) {
    const trimmed = line.trimStart();
    if (trimmed.startsWith("```")) {
      inFence = !inFence;
      continue;
    }
    if (inFence) {
      continue;
    }
    const matched = trimmed.match(/^#{1,3}\s+(.+)$/);
    if (matched) {
      return matched[1].trim();
    }
  }
  return fallbackTitle;
}

function normalizeTitleWhitespace(title) {
  return title.replace(/\s+/g, " ").trim();
}

function shortenOsepTitle(repoRelPath, title, locale = "en") {
  const match = repoRelPath.match(/^oseps\/(0\d{3})-(.+)\.md$/i);
  if (!match) {
    return title;
  }
  const number = match[1];
  const slug = match[2].toLowerCase();
  if (locale === "zh") {
    if (slug.includes("fqdn") && slug.includes("egress")) {
      return `OSEP-${number}: FQDN 出口访问控制`;
    }
    if (slug.includes("agent-sandbox") || slug.includes("kubernetes-sigs")) {
      return `OSEP-${number}: Kubernetes Agent Sandbox 支持`;
    }
    if (slug.includes("volume")) {
      return `OSEP-${number}: Volume 与 VolumeBinding 支持`;
    }
  }
  if (slug.includes("fqdn") && slug.includes("egress")) {
    return `OSEP-${number}: FQDN Egress Control`;
  }
  if (slug.includes("agent-sandbox") || slug.includes("kubernetes-sigs")) {
    return `OSEP-${number}: Agent Sandbox on Kubernetes`;
  }
  if (slug.includes("volume")) {
    return `OSEP-${number}: Volume & VolumeBinding Support`;
  }
  const readable = slug
    .split("-")
    .map((part) => (part.length <= 3 ? part.toUpperCase() : part.charAt(0).toUpperCase() + part.slice(1)))
    .join(" ");
  return `OSEP-${number}: ${readable}`;
}

function shortenTitleByRule(title) {
  let next = normalizeTitleWhitespace(title);
  next = next.replace(/^Alibaba\s+/i, "");
  next = next.replace(/^OpenSandbox\s+/i, "");
  next = next.replace(/\bJavaScript\/TypeScript\b/g, "JS");
  next = next.replace(/\bJava\/Kotlin\b/g, "Kotlin");
  next = next.replace(/\s+Example$/i, "");
  next = next.replace(/\s+SDK for /i, " ");
  return normalizeTitleWhitespace(next);
}

function shortenTitleByRuleZh(title) {
  let next = normalizeTitleWhitespace(title);
  next = next.replace(/^Alibaba\s+/i, "");
  next = next.replace(/^OpenSandbox\s+/i, "");
  next = next.replace(/\bJavaScript\/TypeScript\b/g, "JS");
  next = next.replace(/\bJava\/Kotlin\b/g, "Kotlin");
  next = next.replace(/\s+Example$/i, " 示例");
  next = next.replace(/\s+SDK for /i, " ");
  return normalizeTitleWhitespace(next);
}

function getShortTitle(repoRelPath, currentTitle, locale = "en") {
  if (locale === "zh" && shortTitleByPathZh[repoRelPath]) {
    return shortTitleByPathZh[repoRelPath];
  }
  if (locale !== "zh" && shortTitleByPath[repoRelPath]) {
    return shortTitleByPath[repoRelPath];
  }
  if (/^oseps\/0\d{3}-.+\.md$/i.test(repoRelPath)) {
    return shortenOsepTitle(repoRelPath, currentTitle, locale);
  }
  if (locale === "zh") {
    return shortenTitleByRuleZh(currentTitle);
  }
  return shortenTitleByRule(currentTitle);
}

function toYamlString(value) {
  return `"${String(value).replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`;
}

function normalizeSlugFromPath(relPath) {
  const normalized = relPath.replaceAll("\\", "/");
  const dirName = path.posix.dirname(normalized);
  const baseName = path.posix.basename(normalized);
  const lowerBase = baseName.toLowerCase();

  if (lowerBase === "readme.md" || zhReadmePattern.test(baseName)) {
    return dirName === "." ? "overview/home" : `${dirName}/readme`;
  }
  if (lowerBase === "development.md") {
    return `${dirName}/development`;
  }
  return normalized.replace(/\.md$/i, "");
}

function resolveZhCandidate(repoRelPath, readmeCandidatesByDir) {
  const dir = path.posix.dirname(repoRelPath);
  const candidates = readmeCandidatesByDir.get(dir) ?? [];
  for (const candidate of candidates) {
    if (candidate.toLowerCase() !== "readme.md") {
      return `${dir}/${candidate}`;
    }
  }
  return null;
}

function buildGeneratedAssetPath(locale, routeSlug, resolvedRepoPath) {
  const normalized = resolvedRepoPath.replaceAll("\\", "/");
  if (!normalized.startsWith("docs/assets/")) {
    return null;
  }
  const generatedDir = path.posix.dirname(`generated/${locale}/${routeSlug}.md`);
  const assetPath = normalized.replace(/^docs\//, "");
  let relativePath = path.posix.relative(generatedDir, assetPath);
  if (!relativePath || relativePath === "") {
    relativePath = "./";
  }
  if (!relativePath.startsWith(".") && !relativePath.startsWith("/")) {
    relativePath = `./${relativePath}`;
  }
  return relativePath;
}

function normalizeLinkTarget(target, sourceDirRel, isImage, routeSlug, locale) {
  if (
    target.startsWith("http://") ||
    target.startsWith("https://") ||
    target.startsWith("mailto:") ||
    target.startsWith("#") ||
    target.startsWith("data:") ||
    target.startsWith("/")
  ) {
    return target;
  }

  const [rawPath, hashFragment] = target.split("#");
  const resolvedPath = path.posix.normalize(path.posix.join(sourceDirRel, rawPath));
  const localAssetPath = isImage ? buildGeneratedAssetPath(locale, routeSlug, resolvedPath) : null;
  if (localAssetPath) {
    if (hashFragment) {
      return `${localAssetPath}#${hashFragment}`;
    }
    return localAssetPath;
  }

  const urlBase = isImage
    ? `${rawBaseUrl}/${resolvedPath}`
    : fs.existsSync(path.join(repoRoot, resolvedPath)) &&
      fs.statSync(path.join(repoRoot, resolvedPath)).isDirectory()
      ? `${treeBaseUrl}/${resolvedPath}`
      : `${blobBaseUrl}/${resolvedPath}`;

  if (hashFragment) {
    return `${urlBase}#${hashFragment}`;
  }
  return urlBase;
}

function rewriteRelativeLinks(markdown, sourceRelPath, routeSlug, locale) {
  const sourceDirRel = path.posix.dirname(sourceRelPath);

  const withMarkdownLinks = markdown.replace(
    /(!?)\[([^\]]*?)\]\(([^)]+)\)/g,
    (_match, imageMark, text, linkValue) => {
      const trimmed = linkValue.trim();
      if (!trimmed) {
        return _match;
      }
      const firstSpace = trimmed.search(/\s/);
      const target = firstSpace === -1 ? trimmed : trimmed.slice(0, firstSpace);
      const trailing = firstSpace === -1 ? "" : trimmed.slice(firstSpace);
      const rewrittenTarget = normalizeLinkTarget(target, sourceDirRel, imageMark === "!", routeSlug, locale);
      return `${imageMark}[${text}](${rewrittenTarget}${trailing})`;
    },
  );

  return withMarkdownLinks.replace(
    /<img([^>]*?)src=(["'])([^"']+)\2([^>]*)>/gi,
    (matched, before, quote, src, after) => {
      const rewritten = normalizeLinkTarget(src, sourceDirRel, true, routeSlug, locale);
      return `<img${before}src=${quote}${rewritten}${quote}${after}>`;
    },
  );
}

function renderPageSource({ locale, title, sourceRelPath, routeSlug, passthrough = false }) {
  const sourceAbsPath = path.join(repoRoot, sourceRelPath);
  const sourceMarkdown = fs.readFileSync(sourceAbsPath, "utf8");
  const displayTitle = title || readHeadingTitle(sourceAbsPath, path.posix.basename(sourceRelPath, ".md"));

  let body = sourceMarkdown;
  if (!passthrough) {
    body = rewriteRelativeLinks(sourceMarkdown, sourceRelPath, routeSlug, locale);
  }

  const sourceUrl = `${blobBaseUrl}/${sourceRelPath}`;
  const sourceNotice =
    locale === "zh"
      ? `> 此页内容来自仓库源文件：[\`${sourceRelPath}\`](${sourceUrl})`
      : `> This page is sourced from: [\`${sourceRelPath}\`](${sourceUrl})`;


  return `---\ntitle: ${toYamlString(displayTitle)}\n---\n\n${body}\n\n---\n\n${sourceNotice}\n`;
}

function prettifyPathTitle(repoRelPath) {
  const dirPath = path.posix.dirname(repoRelPath);
  if (dirPath === "." || dirPath === "docs") {
    return "Overview";
  }
  return dirPath
    .split("/")
    .map((part) =>
      part
        .replaceAll("-", " ")
        .replaceAll("_", " ")
        .replace(/\b\w/g, (ch) => ch.toUpperCase()),
    )
    .join(" / ");
}

function collectAutoEntries() {
  const readmeCandidatesByDir = new Map();
  const entries = [];

  for (const section of sectionDefinitions) {
    for (const scanRoot of section.scanRoots) {
      const absScanRoot = path.join(repoRoot, scanRoot);
      if (!fs.existsSync(absScanRoot)) {
        continue;
      }
      const files = walkMarkdownFiles(absScanRoot);
      for (const absPath of files) {
        const repoRelPath = toRepoRelative(absPath);
        if (shouldIgnoreRepoPath(repoRelPath)) {
          continue;
        }
        const fileName = path.posix.basename(repoRelPath);
        const dirName = path.posix.dirname(repoRelPath);

        if (zhReadmePattern.test(fileName)) {
          const arr = readmeCandidatesByDir.get(dirName) ?? [];
          arr.push(fileName);
          readmeCandidatesByDir.set(dirName, arr);
        }
      }
    }
  }

  for (const section of sectionDefinitions) {
    for (const scanRoot of section.scanRoots) {
      const absScanRoot = path.join(repoRoot, scanRoot);
      if (!fs.existsSync(absScanRoot)) {
        continue;
      }
      const files = walkMarkdownFiles(absScanRoot);
      for (const absPath of files) {
        const repoRelPath = toRepoRelative(absPath);
        if (shouldIgnoreRepoPath(repoRelPath)) {
          continue;
        }
        const fileName = path.posix.basename(repoRelPath);
        if (zhReadmePattern.test(fileName) && !standardReadmePattern.test(fileName)) {
          continue;
        }

        const isReadme = standardReadmePattern.test(fileName);
        const isDevelopment = fileName === "DEVELOPMENT.md";
        const isOsepDoc = section.id === "community" && /^0\d{3}-.+\.md$/i.test(fileName);
        if (!isReadme && !(section.includeDevelopment && isDevelopment) && !isOsepDoc) {
          continue;
        }

        const zhCandidate = isReadme ? resolveZhCandidate(repoRelPath, readmeCandidatesByDir) : null;
        const entryKey = `auto:${section.id}:${repoRelPath}`;
        const slug = normalizeSlugFromPath(repoRelPath);
        const titleFallback = isDevelopment ? `${prettifyPathTitle(repoRelPath)} Development` : prettifyPathTitle(repoRelPath);
        entries.push({
          key: entryKey,
          sectionId: section.id,
          slug,
          enPath: repoRelPath,
          zhPath: zhCandidate,
          titleEn: getShortTitle(repoRelPath, readHeadingTitle(absPath, titleFallback), "en"),
          titleZh: getShortTitle(
            repoRelPath,
            readHeadingTitle(
            zhCandidate ? path.join(repoRoot, zhCandidate) : absPath,
            readHeadingTitle(absPath, titleFallback),
            ),
            "zh",
          ),
        });
      }
    }
  }

  const unique = new Map();
  for (const item of entries) {
    if (!unique.has(item.key)) {
      unique.set(item.key, item);
    }
  }
  return [...unique.values()].sort((a, b) => a.slug.localeCompare(b.slug));
}

function buildEntries() {
  const autoEntries = collectAutoEntries();
  const all = [...manualEntries, ...autoEntries];
  const uniqueBySlug = new Map();

  for (const item of all) {
    if (uniqueBySlug.has(item.slug)) {
      continue;
    }
    uniqueBySlug.set(item.slug, item);
  }
  return [...uniqueBySlug.values()];
}

function toSidebarItems(entries, locale) {
  return entries
    .map((entry) => ({
      text: locale === "zh" ? entry.titleZh || entry.titleEn : entry.titleEn,
      link: locale === "zh" ? `/zh/${entry.slug}` : `/${entry.slug}`,
    }))
    .sort((a, b) => a.link.localeCompare(b.link));
}

function buildOverviewSidebar(entries, locale) {
  const overviewEntries = entries.filter((entry) => entry.sectionId === "overview");
  const slugOrder = ["overview/home", "overview/architecture"];
  const items = overviewEntries
    .sort((a, b) => {
      const ai = slugOrder.indexOf(a.slug);
      const bi = slugOrder.indexOf(b.slug);
      if (ai === -1 && bi === -1) return a.slug.localeCompare(b.slug);
      if (ai === -1) return 1;
      if (bi === -1) return -1;
      return ai - bi;
    })
    .map((entry) => ({
      text: locale === "zh" ? entry.titleZh || entry.titleEn : entry.titleEn,
      link: locale === "zh" ? `/zh/${entry.slug}` : `/${entry.slug}`,
    }));
  if (items.length === 0) {
    return [];
  }
  return [{ text: locale === "zh" ? "Overview" : "Overview", items }];
}

function buildModulesSidebar(entries, locale) {
  const modules = entries.filter((entry) => entry.sectionId === "modules");
  const byPrefix = new Map();
  for (const entry of modules) {
    const prefix = entry.slug.split("/")[0];
    const arr = byPrefix.get(prefix) ?? [];
    arr.push(entry);
    byPrefix.set(prefix, arr);
  }

  const order = ["sdks", "specs", "design", "server", "components", "sandboxes", "kubernetes"];
  const blocks = [];
  for (const prefix of order) {
    const groupEntries = byPrefix.get(prefix);
    if (!groupEntries || groupEntries.length === 0) {
      continue;
    }
    blocks.push({
      text: moduleGroupLabels[locale][prefix],
      items: toSidebarItems(groupEntries, locale),
    });
  }
  return blocks;
}

function buildExamplesSidebar(entries, locale) {
  const items = toSidebarItems(entries.filter((entry) => entry.sectionId === "examples"), locale);
  if (items.length === 0) {
    return [];
  }
  return [{ text: locale === "zh" ? "示例" : "Examples", items }];
}

function buildCommunitySidebar(entries, locale) {
  const blocks = [];
  const communityEntries = entries.filter(
    (entry) => entry.sectionId === "community" && entry.slug.startsWith("community/"),
  );
  if (communityEntries.length > 0) {
    blocks.push({
      text: communityGroupLabels[locale].community,
      items: toSidebarItems(communityEntries, locale),
    });
  }

  const osepReadmeEntries = entries.filter((entry) => entry.sectionId === "community" && entry.slug === "oseps/readme");
  const osepDocEntries = entries.filter(
    (entry) => entry.sectionId === "community" && entry.slug.startsWith("oseps/") && entry.slug !== "oseps/readme",
  );
  const sortedOsepDocs = osepDocEntries.sort((a, b) => a.slug.localeCompare(b.slug));
  const osepItems = [...toSidebarItems(osepReadmeEntries, locale), ...toSidebarItems(sortedOsepDocs, locale)];
  if (osepItems.length > 0) {
    blocks.push({
      text: communityGroupLabels[locale].oseps,
      items: osepItems,
    });
  }

  return blocks;
}

function buildSidebarByPath(entries, locale) {
  const prefix = locale === "zh" ? "/zh" : "";
  const overviewSidebar = buildOverviewSidebar(entries, locale);
  const modulesSidebar = buildModulesSidebar(entries, locale);
  const examplesSidebar = buildExamplesSidebar(entries, locale);
  const communitySidebar = buildCommunitySidebar(entries, locale);

  const sidebar = {
    [`${prefix}/`]: overviewSidebar,
    [`${prefix}/overview/`]: overviewSidebar,
    [`${prefix}/examples/`]: examplesSidebar,
    [`${prefix}/community/`]: communitySidebar,
    [`${prefix}/oseps/`]: communitySidebar,
  };

  for (const modulesPrefix of ["server", "components", "sandboxes", "kubernetes", "specs", "sdks", "design"]) {
    sidebar[`${prefix}/${modulesPrefix}/`] = modulesSidebar;
  }
  return sidebar;
}

function writeGeneratedPages(entries) {
  rmIfExists(generatedRoot);
  ensureDir(path.join(generatedRoot, "en"));
  ensureDir(path.join(generatedRoot, "zh"));

  const rewrites = {};
  const pages = [];

  for (const entry of entries) {
    const enSourcePath = entry.enPath;
    const zhSourcePath = entry.zhPath || entry.enPath;
    const enGeneratedRel = `generated/en/${entry.slug}.md`;
    const zhGeneratedRel = `generated/zh/${entry.slug}.md`;
    const enGeneratedAbs = path.join(docsRoot, enGeneratedRel);
    const zhGeneratedAbs = path.join(docsRoot, zhGeneratedRel);
    ensureDir(path.dirname(enGeneratedAbs));
    ensureDir(path.dirname(zhGeneratedAbs));

    fs.writeFileSync(
      enGeneratedAbs,
      renderPageSource({
        locale: "en",
        title: entry.titleEn,
        sourceRelPath: enSourcePath,
        routeSlug: entry.slug,
        passthrough: entry.passthrough === true,
      }),
      "utf8",
    );

    fs.writeFileSync(
      zhGeneratedAbs,
      renderPageSource({
        locale: "zh",
        title: entry.titleZh || entry.titleEn,
        sourceRelPath: zhSourcePath,
        routeSlug: entry.slug,
        passthrough: entry.passthrough === true,
      }),
      "utf8",
    );

    rewrites[enGeneratedRel] = `${entry.slug}.md`;
    rewrites[zhGeneratedRel] = `zh/${entry.slug}.md`;

    pages.push({
      key: entry.key,
      slug: entry.slug,
      en: enSourcePath,
      zh: zhSourcePath,
    });
  }

  return { rewrites, pages };
}

export function buildManifest() {
  const entries = buildEntries();
  const { rewrites, pages } = writeGeneratedPages(entries);
  const manifest = {
    generatedAt: new Date().toISOString(),
    pages,
    nav: {
      en: [
        { text: "Overview", link: "/overview/home" },
        { text: "Project", link: "/sdks/sandbox/python/readme" },
        { text: "Examples", link: "/examples/readme" },
        { text: "Community", link: "/community/contributing" },
      ],
      zh: [
        { text: "Overview", link: "/zh/overview/home" },
        { text: "Project", link: "/zh/sdks/sandbox/python/readme" },
        { text: "Examples", link: "/zh/examples/readme" },
        { text: "Community", link: "/zh/community/contributing" },
      ],
    },
    sidebar: {
      en: buildSidebarByPath(entries, "en"),
      zh: buildSidebarByPath(entries, "zh"),
    },
    rewrites,
  };

  ensureDir(path.dirname(manifestPath));
  fs.writeFileSync(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`, "utf8");
  return manifest;
}

export function loadManifest() {
  try {
    if (!fs.existsSync(manifestPath)) {
      return buildManifest();
    }
    const data = JSON.parse(fs.readFileSync(manifestPath, "utf8"));
    if (!data || !data.generatedAt || !data.nav || !data.sidebar || !data.rewrites) {
      return buildManifest();
    }
    return buildManifest();
  } catch (_error) {
    return buildManifest();
  }
}

if (process.argv[1] === fileURLToPath(import.meta.url)) {
  const manifest = buildManifest();
  // Keep logging terse for CI output.
  console.log(`docs manifest generated (${manifest.pages.length} pages)`);
}


================================================
FILE: docs/.vitepress/theme/index.ts
================================================
import DefaultTheme from "vitepress/theme";
import "./styles.css";

export default DefaultTheme;


================================================
FILE: docs/.vitepress/theme/styles.css
================================================
:root {
  --vp-c-brand-1: #2563eb;
  --vp-c-brand-2: #1d4ed8;
  --vp-c-brand-3: #1e40af;
}

.VPFeature {
  border: 1px solid var(--vp-c-divider);
  border-radius: 14px;
}

.vp-doc blockquote {
  border-left: 3px solid var(--vp-c-brand-1);
}

/* Keep README badge rows inline in VitePress docs pages */
.vp-doc p[align="center"] {
  text-align: center;
}

.vp-doc p[align="center"] a {
  display: inline-flex;
  align-items: center;
  text-decoration: none;
  margin: 2px 4px;
}

.vp-doc p[align="center"] a img {
  display: inline-block;
  margin: 0;
  vertical-align: middle;
}

.scenario-grid {
  display: grid;
  grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
  gap: 14px;
  margin: 16px 0 6px;
}

.vp-doc .scenario-card {
  display: block;
  border: 1px solid var(--vp-c-divider);
  border-radius: 12px;
  padding: 14px;
  text-decoration: none;
  color: inherit;
  background: var(--vp-c-bg-soft);
  transition: border-color 0.2s ease, transform 0.2s ease;
}

.vp-doc .scenario-card:hover {
  border-color: var(--vp-c-brand-1);
  transform: translateY(-1px);
  text-decoration: none;
}

.vp-doc .scenario-card h3 {
  margin: 0 0 8px;
  font-size: 16px;
  text-decoration: none;
}

.vp-doc .scenario-card p {
  margin: 0;
  font-size: 14px;
  line-height: 1.5;
  color: var(--vp-c-text-2);
  text-decoration: none;
}


================================================
FILE: docs/README.md
================================================
# OpenSandbox Docs Site

This directory hosts the VitePress site for OpenSandbox.

## Local development

```bash
nvm use 22
cd docs
pnpm install
pnpm docs:dev
```

## Build

```bash
nvm use 22
cd docs
pnpm install
pnpm docs:build
```

## Notes

- Site content is generated from repository README and docs markdown files.
- Run `pnpm docs:sync` to regenerate the manifest and routed pages.
- Run `pnpm docs:spec` to regenerate `docs/public/api/spec-inline.js` from `specs/sandbox-lifecycle.yml`.


================================================
FILE: docs/README_zh.md
================================================
<div align="center">
  <img src="assets/logo.svg" alt="OpenSandbox logo" width="150" />

  <h1>OpenSandbox</h1>

<p align="center">
  <a href="https://github.com/alibaba/OpenSandbox">
    <img src="https://img.shields.io/github/stars/alibaba/OpenSandbox.svg?style=social" alt="GitHub stars" />
  </a>
  <a href="https://deepwiki.com/alibaba/OpenSandbox">
    <img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki" />
  </a>
  <a href="https://www.apache.org/licenses/LICENSE-2.0.html">
    <img src="https://img.shields.io/badge/license-Apache%202.0-blue.svg" alt="license" />
  </a>
  <a href="https://badge.fury.io/py/opensandbox">
    <img src="https://badge.fury.io/py/opensandbox.svg" alt="PyPI version" />
  </a>
  <a href="https://badge.fury.io/js/@alibaba-group%2Fopensandbox">
    <img src="https://badge.fury.io/js/@alibaba-group%2Fopensandbox.svg" alt="npm version" />
  </a>
  <a href="https://landscape.cncf.io/?item=orchestration-management--scheduling-orchestration--opensandbox">
    <img src="https://img.shields.io/badge/CNCF-Landscape-0C66E4" alt="CNCF Landscape" />
  </a>
  <a href="https://qr.dingtalk.com/action/joingroup?code=v1,k1,A4Bgl5q1I1eNU/r33D18YFNrMY108aFF38V+r19RJOM=&_dt_no_comment=1&origin=11">
    <img src="https://img.shields.io/badge/DingTalk-Join-0089FF?logo=dingtalk&logoColor=white" alt="DingTalk" />
  </a>
  <a href="https://github.com/alibaba/OpenSandbox/actions">
    <img src="https://github.com/alibaba/OpenSandbox/actions/workflows/real-e2e.yml/badge.svg?branch=main" alt="E2E Status" />
  </a>
</p>

  <hr />
</div>

中文 | [English](../README.md)

OpenSandbox 是一个面向 AI 应用场景设计的「通用沙箱平台」，为LLM相关的能力（命令执行、文件操作、代码执行、浏览器操作、Agent 运行等）提供 **多语言 SDK、沙箱接口协议和沙箱运行时**。

OpenSandbox 已进入 [CNCF Landscape](https://landscape.cncf.io/?item=orchestration-management--scheduling-orchestration--opensandbox)。

## 核心特性

- **多语言 SDK**：提供 Python、Java/Kotlin、JavaScript/TypeScript、C#/.NET 等语言的客户端 SDK，Go SDK 仍在规划中。
- **沙箱协议**：定义了沙箱生命周期管理 API 和沙箱执行 API。你可以通过这些沙箱协议扩展自己的沙箱运行时。
- **沙箱运行时**：沙箱全生命周期管理，支持 Docker 和[自研高性能 Kubernetes 运行时](../kubernetes)，实现本地运行、企业级大规模分布式沙箱调度。
- **沙箱环境**：内置 Command、Filesystem、Code Interpreter 实现。并提供 Coding Agent（Claude Code 等）、浏览器自动化（Chrome、Playwright）和桌面环境（VNC、VS Code）等示例。
- **网络策略**：提供统一的 [Ingress Gateway](../components/ingress) 实现，并支持多种路由策略；提供单实例级别的沙箱[出口网络限制](../components/egress)。
- **强隔离安全**：支持 gVisor、Kata Containers 和 Firecracker 微虚拟机等安全容器运行时，为沙箱工作负载与宿主机之间提供增强的安全隔离。详见 [安全容器运行时指南](secure-container.md)。

## 使用示例

### 沙箱基础操作

环境要求：

- Docker（本地运行必需）
- Python 3.10+（本地 runtime 和快速开始）

#### 1. 安装并配置 Server

```bash
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker-zh
```

> 如果需要开发或使用源码编译，可通过clone仓库进行开发。
> 
> ```bash
> git clone https://github.com/alibaba/OpenSandbox.git
> cd OpenSandbox/server
> uv sync
> cp example.config.toml ~/.sandbox.toml # Copy configuration file
> uv run python -m src.main # Start the service
> ```

#### 2. 启动沙箱 Server

```bash
opensandbox-server

# Show help
opensandbox-server -h
```

#### 3. 创建代码解释器，并在沙箱中执行命令

安装 Code Interpreter SDK

```bash
uv pip install opensandbox-code-interpreter
```

创建沙箱并执行命令

```python
import asyncio
from datetime import timedelta

from code_interpreter import CodeInterpreter, SupportedLanguage
from opensandbox import Sandbox
from opensandbox.models import WriteEntry

async def main() -> None:
    # 1. Create a sandbox
    sandbox = await Sandbox.create(
        "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2",
        entrypoint= ["/opt/opensandbox/code-interpreter.sh"],
        env={"PYTHON_VERSION": "3.11"},
        timeout=timedelta(minutes=10),
    )

    async with sandbox:

        # 2. Execute a shell command
        execution = await sandbox.commands.run("echo 'Hello OpenSandbox!'")
        print(execution.logs.stdout[0].text)

        # 3. Write a file
        await sandbox.files.write_files([
            WriteEntry(path="/tmp/hello.txt", data="Hello World", mode=644)
        ])

        # 4. Read a file
        content = await sandbox.files.read_file("/tmp/hello.txt")
        print(f"Content: {content}") # Content: Hello World

        # 5. Create a code interpreter
        interpreter = await CodeInterpreter.create(sandbox)

        # 6. 执行 Python 代码（单次执行：直接传 language）
        result = await interpreter.codes.run(
              """
                  import sys
                  print(sys.version)
                  result = 2 + 2
                  result
              """,
              language=SupportedLanguage.PYTHON,
        )

        print(result.result[0].text) # 4
        print(result.logs.stdout[0].text) # 3.11.14

    # 7. Cleanup the sandbox
    await sandbox.kill()

if __name__ == "__main__":
    asyncio.run(main())
```

### 更多示例

OpenSandbox 提供了丰富的示例来演示不同场景下的沙箱使用方式。所有示例代码位于 `examples/` 目录下。

#### 🎯 基础示例

- **[code-interpreter](../examples/code-interpreter/README.md)** - Code Interpreter SDK 的端到端沙箱流程示例。
- **[aio-sandbox](../examples/aio-sandbox/README.md)** - 使用 OpenSandbox SDK 与 agent-sandbox 的一体化沙箱示例。
- **[agent-sandbox](../examples/agent-sandbox/README.md)** - 通过 kubernetes-sigs/agent-sandbox 在 Kubernetes 上运行 OpenSandbox。

#### 🤖 Coding Agent 集成

在 OpenSandbox 中，集成各类 Coding Agent，包括 Claude Code、Google Gemini、OpenAI Codex、Kimi CLI 等。

- **[claude-code](../examples/claude-code/README.md)** - 在 OpenSandbox 中运行 Claude Code。
- **[gemini-cli](../examples/gemini-cli/README.md)** - 在 OpenSandbox 中运行 Google Gemini CLI。
- **[codex-cli](../examples/codex-cli/README.md)** - 在 OpenSandbox 中运行 OpenAI Codex CLI。
- **[kimi-cli](../examples/kimi-cli/README.md)** - 在 OpenSandbox 中运行 [Kimi CLI](https://github.com/MoonshotAI/kimi-cli)（Moonshot AI）。
- **[langgraph](../examples/langgraph/README.md)** - 基于 LangGraph 状态机编排沙箱任务与回退重试。
- **[google-adk](../examples/google-adk/README.md)** - 使用 Google ADK 通过 OpenSandbox 工具读写文件并执行命令。
- **[nullclaw](../examples/nullclaw/README.md)** - 在沙箱中启动 Nullclaw Gateway。
- **[openclaw](../examples/openclaw/README_zh.md)** - 在沙箱中启动 OpenClaw Gateway。

#### 🌐 浏览器与桌面环境

- **[chrome](../examples/chrome/README.md)** - 带 VNC 与 DevTools 的无头 Chromium，用于自动化/调试。
- **[playwright](../examples/playwright/README.md)** - Playwright + Chromium 无头抓取与测试示例。
- **[desktop](../examples/desktop/README.md)** - 通过 VNC 访问的完整桌面环境沙箱。
- **[vscode](../examples/vscode/README.md)** - 在沙箱中运行 code-server（VS Code Web）进行远程开发。

#### 🧠 机器学习与训练

- **[rl-training](../examples/rl-training/README.md)** - 在沙箱中运行 DQN CartPole 训练，输出 checkpoint 与训练汇总。

更多详细信息请参考 [examples](../examples/README.md) 和各示例目录下的 README 文件。

## 项目结构

| 目录 | 说明                                                |
|------|---------------------------------------------------|
| [`sdks/`](../sdks/) | 多语言 SDK（Python、Java/Kotlin、TypeScript/JavaScript、C#/.NET）      |
| [`specs/`](../specs/) | OpenAPI 与生命周期规范                                   |
| [`server/`](../server/README_zh.md) | Python FastAPI 沙箱生命周期服务，并集成多种运行时实现                |
| [`kubernetes/`](../kubernetes/README-ZH.md) | Kubernetes 部署与示例                                  |
| [`components/execd/`](../components/execd/README_zh.md) | 沙箱执行守护进程，负责命令和文件操作                                |
| [`components/ingress/`](../components/ingress/README.md) | 沙箱流量入口代理                                          |
| [`components/egress/`](../components/egress/README.md) | 沙箱网络 Egress 访问控制                                  |
| [`sandboxes/`](../sandboxes/) | 沙箱运行时实现与镜像（如 code-interpreter）                    |
| [`examples/`](../examples/README.md) | 集成示例和使用案例                                         |
| [`oseps/`](../oseps/README.md) | OpenSandbox Enhancement Proposals                 |
| [`docs/`](../docs/) | 架构和设计文档                                           |
| [`tests/`](../tests/) | 跨组件端到端测试                                          |
| [`scripts/`](../scripts/) | 开发和维护脚本                                           |

详细架构请参阅 [docs/architecture.md](architecture.md)。

## 文档

- [docs/architecture.md](architecture.md) – 整体架构 & 设计理念
- [oseps/README.md](../oseps/README.md) – OpenSandbox 增强提案 (OSEPs)
- SDK
  - Sandbox 基础 SDK（[Java\Kotlin SDK](../sdks/sandbox/kotlin/README_zh.md)、[Python SDK](../sdks/sandbox/python/README_zh.md)、[JavaScript/TypeScript SDK](../sdks/sandbox/javascript/README_zh.md)、[C#/.NET SDK](../sdks/sandbox/csharp/README_zh.md)）- 包含沙箱生命周期、命令执行、文件操作
  - Code Interpreter SDK（[Java\Kotlin SDK](../sdks/code-interpreter/kotlin/README_zh.md) 、[Python SDK](../sdks/code-interpreter/python/README_zh.md)、[JavaScript/TypeScript SDK](../sdks/code-interpreter/javascript/README_zh.md)、[C#/.NET SDK](../sdks/code-interpreter/csharp/README_zh.md)）- 代码解释器
- [specs/README.md](../specs/README_zh.md) - 包含沙箱生命周期 API 和沙箱执行 API 的 OpenAPI 定义
- [server/README.md](../server/README_zh.md) - 包含沙箱 Server 的启动和配置，支持 Docker 与 Kubernetes Runtime

## 许可证

本项目采用 [Apache 2.0 License](../LICENSE) 开源。

你可以在遵守许可条款的前提下，将 OpenSandbox 用于个人或商业项目。

## 路线图 [2026.03]

### SDK

- **沙箱客户端连接池** - 客户端沙箱连接池管理，提供预配置的沙箱实例，以毫秒级速度获取沙箱环境。
- **Go SDK** - Go 客户端 SDK，用于沙箱生命周期管理、命令执行和文件操作。

### Sandbox Runtime

- **持久化存储** - 沙箱的持久化存储挂载（参见 [Proposal 0003](../oseps/0003-volume-and-volumebinding-support.md)）。
- **本地轻量级沙箱** - 为运行在 PC 上的 AI 工具提供轻量级沙箱。
- **安全容器** - 为在容器内运行的 AI Agent 提供安全沙箱。

### Deployment

- **部署指南** - 自托管 Kubernetes 集群的部署指南。

## 联系与讨论

- Issue：通过 GitHub Issues 提交 bug、功能请求或设计讨论
- 钉钉群：加入 [OpenSandbox 技术交流群](https://qr.dingtalk.com/action/joingroup?code=v1,k1,A4Bgl5q1I1eNU/r33D18YFNrMY108aFF38V+r19RJOM=&_dt_no_comment=1&origin=11)

欢迎一起把 OpenSandbox 打造成 AI 场景下的通用沙箱基础设施。

## Star History

[![Star History Chart](https://api.star-history.com/svg?repos=alibaba/OpenSandbox&type=date&legend=top-left)](https://www.star-history.com/#alibaba/OpenSandbox&type=date&legend=top-left)


================================================
FILE: docs/RELEASE_NOTE_TEMPLATE.md
================================================
# [component' name] [version]

## What's New

Some Docs if needed 

### ✨ Features
- Feature-1 (#123)
- Feature-2 (#456)

### 🐛 Bug Fixes
- Bug-2 (#456)

### ⚠️ Breaking Changes
- xxx (#789)

### 📦 Misc
- workflow update (#789)
- deps update (#789)
- tests update (#789)

## 👥 Contributors

Thanks to these contributors ❤️

- @alice
- @bob


================================================
FILE: docs/architecture.md
================================================
# OpenSandbox Architecture

OpenSandbox is a universal sandbox platform designed for AI application scenarios, providing a complete solution with multi-language SDKs, standardized sandbox protocols, and flexible runtime implementations. This document describes the overall architecture and design philosophy of OpenSandbox.

## Architecture Overview

![OpenSandbox Architecture](assets/architecture.svg)

The OpenSandbox architecture consists of four main layers:

1. **SDKs Layer** - Client libraries for interacting with sandboxes
2. **Specs Layer** - OpenAPI specifications defining the protocols
3. **Runtime Layer** - Server implementations managing sandbox lifecycle
4. **Sandbox Instances Layer** - Running sandbox containers with injected execution daemons

## 1. OpenSandbox SDKs

The SDK layer provides high-level abstractions for developers to interact with sandboxes. It handles communication with both the Sandbox Lifecycle API and the Sandbox Execution API.

### Core SDK Components

#### 1.1 Sandbox

The `Sandbox` class is the primary entry point for managing sandbox lifecycle:

- **Create**: Provision new sandbox instances from container images
- **Manage**: Monitor sandbox state, renew expiration, retrieve endpoints
- **Destroy**: Terminate sandbox instances when no longer needed

**Key Features:**
- Async/await support for non-blocking operations
- Automatic state polling for provisioning progress
- Resource quota management (CPU, memory, GPU)
- Metadata and environment variable injection
- TTL-based automatic expiration with renewal

#### 1.2 Filesystem

The `Filesystem` component provides comprehensive file operations within sandboxes:

- **CRUD Operations**: Create, read, update, and delete files and directories
- **Bulk Operations**: Upload/download multiple files efficiently
- **Search**: Glob-based file searching with pattern matching
- **Permissions**: Manage file ownership, group, and mode (chmod)
- **Metadata**: Retrieve file info including size, timestamps, permissions

**Use Cases:**
- Uploading code files and dependencies
- Downloading execution results and artifacts
- Managing workspace directories
- Searching for files by pattern

#### 1.3 Commands

The `Commands` component enables shell command execution within sandboxes:

- **Foreground Execution**: Run commands synchronously with real-time output streaming
- **Background Execution**: Launch long-running processes in detached mode
- **Stream Support**: Capture stdout/stderr via Server-Sent Events (SSE)
- **Process Control**: Interrupt running commands via context cancellation
- **Working Directory**: Specify custom working directory for command execution

**Use Cases:**
- Running build commands (e.g., `npm install`, `pip install`)
- Executing system utilities (e.g., `git`, `docker`)
- Starting web servers or services
- Running test suites

#### 1.4 CodeInterpreter

The `CodeInterpreter` component provides stateful code execution across multiple programming languages:

- **Multi-Language Support**: Python, Java, JavaScript, TypeScript, Go, Bash
- **Session Management**: Maintain execution state across multiple code blocks
- **Jupyter Integration**: Built on Jupyter kernel protocol for robust execution
- **Result Streaming**: Real-time output via SSE with execution counts
- **Error Handling**: Structured error responses with tracebacks

**Key Features:**
- Variable persistence across executions within same session
- Display data in multiple MIME types (text, HTML, images)
- Execution interruption support
- Execution timing and performance metrics

**Use Cases:**
- Interactive coding environments (e.g., Jupyter notebooks)
- AI code generation and execution
- Data analysis and visualization
- Educational coding platforms

### SDK Language Support

OpenSandbox provides SDKs in multiple languages:

- **Python SDK** (`sdks/sandbox/python`, `sdks/code-interpreter/python`)
- **Java/Kotlin SDK** (`sdks/sandbox/kotlin`, `sdks/code-interpreter/kotlin`)
- **TypeScript SDK** (Roadmap)

All SDKs follow the same design patterns and provide consistent APIs across languages.

## 2. OpenSandbox Specs

The Specs layer defines two core OpenAPI specifications that establish the contract between SDKs and runtime implementations.

### 2.1 Sandbox Lifecycle Spec

**File**: `specs/sandbox-lifecycle.yml`

The Lifecycle Spec defines the API for managing sandbox instances throughout their lifecycle.

#### Core Operations

| Operation | Endpoint | Description |
|-----------|----------|-------------|
| **Create** | `POST /sandboxes` | Create a new sandbox from a container image |
| **List** | `GET /sandboxes` | List sandboxes with filtering and pagination |
| **Get** | `GET /sandboxes/{id}` | Retrieve sandbox details and status |
| **Delete** | `DELETE /sandboxes/{id}` | Terminate a sandbox |
| **Pause** | `POST /sandboxes/{id}/pause` | Pause a running sandbox |
| **Resume** | `POST /sandboxes/{id}/resume` | Resume a paused sandbox |
| **Renew** | `POST /sandboxes/{id}/renew-expiration` | Extend sandbox TTL |
| **Endpoint** | `GET /sandboxes/{id}/endpoints/{port}` | Get public URL for a port |

### 2.2 Sandbox Execution Spec

**File**: `specs/execd-api.yaml`

The Execution Spec defines the API for interacting with running sandbox instances. This API is implemented by the `execd` daemon injected into each sandbox.

#### API Categories

**Health**
- `GET /ping` - Health check

**Code Interpreting**
- `POST /code/context` - Create execution context
- `POST /code` - Execute code with streaming output
- `DELETE /code` - Interrupt code execution

**Command Execution**
- `POST /command` - Execute shell command
- `DELETE /command` - Interrupt command

**Filesystem**
- `GET /files/info` - Get file metadata
- `DELETE /files` - Remove files
- `POST /files/permissions` - Change permissions
- `POST /files/mv` - Rename/move files
- `GET /files/search` - Search files by glob pattern
- `POST /files/replace` - Replace file content
- `POST /files/upload` - Upload files
- `GET /files/download` - Download files
- `POST /directories` - Create directories
- `DELETE /directories` - Remove directories

**Metrics**
- `GET /metrics` - Get system metrics snapshot
- `GET /metrics/watch` - Stream metrics via SSE

## 3. OpenSandbox Runtime

The Runtime layer implements the Sandbox Lifecycle Spec and manages the orchestration of sandbox containers.

### 3.1 Server Architecture

**Location**: `server/`

The OpenSandbox server is a FastAPI-based service providing:

- **Lifecycle Management**: Create, monitor, pause, resume, and terminate sandboxes
- **Pluggable Runtimes**: Docker (production-ready), Kubernetes (production-ready)
- **Async Provisioning**: Background creation to reduce latency
- **Automatic Expiration**: Configurable TTL with renewal support
- **Access Control**: API key authentication
- **Observability**: Unified status tracking with transition logging

### 3.2 Runtime Implementations

#### Docker Runtime (Ready)

**Features:**
- Direct Docker API integration
- Two networking modes:
  - **Host Mode**: Containers share host network (single instance)
  - **Bridge Mode**: Isolated networking with HTTP routing
- Container lifecycle management
- Resource quota enforcement
- Private registry authentication
- Volume mounting for execd injection
- Automatic cleanup on expiration

**Key Responsibilities:**
1. Pull container images (with auth support)
2. Create containers with resource limits
3. Inject execd binary and start script
4. Monitor container state
5. Handle pause/resume operations
6. Clean up terminated containers

#### Kubernetes Runtime (Ready)

**Features:**
- Built-in **[BatchSandbox](https://github.com/alibaba/OpenSandbox/tree/main/kubernetes)** runtime with sandbox pooling, high-throughput batch creation, and heterogeneous task orchestration; also compatible with **[SIG agent-sandbox](https://github.com/kubernetes-sigs/agent-sandbox)** as an alternative runtime
- Support for different secure container runtimes (e.g., kata-containers, gVisor)
- Helm-based deployment for controller and server, see [documentation](https://github.com/alibaba/OpenSandbox/blob/main/kubernetes/charts/opensandbox/README.md)

**Planned Features:**
- Unified network storage mounting (ossfs, NAS, custom PVC) in both pooled and non-pooled modes
- Pause/resume support

#### Custom Runtime

The pluggable architecture allows implementing custom runtimes by:
1. Implementing the Lifecycle Spec APIs
2. Managing sandbox provisioning and cleanup
3. Injecting execd into sandbox instances
4. Reporting sandbox state transitions

### 3.3 Networking and Routing

#### Sandbox Router

**Purpose**: Provides HTTP/HTTPS load balancing to sandbox instance ports.

**Features:**
- Dynamic endpoint generation based on sandbox ID and port
- Supports both domain-based and wildcard routing
- Reverse proxy to sandbox container ports
- Automatic cleanup when sandbox terminates

**Endpoint Format**: `{domain}/sandboxes/{sandboxId}/port/{port}`

**Use Cases:**
- Accessing web applications running in sandboxes
- Connecting to development servers (e.g., VS Code Server)
- Exposing APIs and services
- VNC and remote desktop access

## 4. Sandbox Instances

Sandbox instances are running containers that host user workloads with an injected execution daemon.

### 4.1 Container Structure

Each sandbox instance consists of:

1. **Base Container**: User-specified image (e.g., `ubuntu:22.04`, `python:3.11`)
2. **execd Daemon**: Injected execution agent implementing the Execution Spec
3. **Entrypoint Process**: User-defined main process

### 4.2 execd - Execution Daemon

**Location**: `components/execd/`

execd is a Go-based HTTP daemon built on the Beego framework.

#### Core Responsibilities

1. **Code Execution**: Manage Jupyter kernel sessions for multi-language code execution
2. **Command Execution**: Run shell commands with output streaming
3. **File Operations**: Provide filesystem API for remote file management
4. **Metrics Collection**: Monitor and report CPU, memory usage

#### Architecture

**Technology Stack:**
- **Language**: Go 1.24+
- **Web Framework**: Beego
- **Jupyter Integration**: WebSocket-based Jupyter protocol client
- **Streaming**: Server-Sent Events (SSE)

**Package Structure:**
- `pkg/flag/` - Configuration and CLI flags
- `pkg/web/` - HTTP layer (controllers, models, router)
- `pkg/runtime/` - Execution dispatcher
- `pkg/jupyter/` - Jupyter kernel client
- `pkg/util/` - Utilities and helpers

#### Jupyter Integration

execd integrates with Jupyter Server running inside the container:

1. **Session Management**: Create and maintain kernel sessions
2. **WebSocket Communication**: Real-time bidirectional communication
3. **Message Protocol**: Jupyter message spec implementation
4. **Stream Parsing**: Parse execution results, outputs, errors

**Supported Kernels:**
- Python (IPython)
- Java (IJava)
- JavaScript (IJavaScript)
- TypeScript (ITypeScript)
- Go (gophernotes)
- Bash

### 4.3 Injection Mechanism

The execd daemon is injected into sandbox containers during creation:

**Docker Runtime Injection Process:**

1. **Pull execd Image**: Retrieve the execd container image
2. **Extract Binary**: Copy execd binary from image to temporary location
3. **Volume Mount**: Mount execd binary and startup script into target container
4. **Entrypoint Override**: Modify container entrypoint to start execd first
5. **User Process Launch**: execd forks and executes the user's entrypoint

**Startup Sequence:**

```bash
# Container starts with modified entrypoint
/opt/opensandbox/start.sh
  ↓
# Start Jupyter Server
jupyter notebook --port=54321 --no-browser --ip=0.0.0.0
  ↓
# Start execd daemon
/opt/opensandbox/execd --jupyter-host=http://127.0.0.1:54321 --port=44772
  ↓
# Execute user entrypoint
exec "${USER_ENTRYPOINT[@]}"
```

**Benefits:**
- Transparent to user code
- No image modification required
- Dynamic injection at runtime
- Works with any base image

## 5. Communication Flow

### 5.1 Sandbox Creation Flow

```
User/SDK
   │
   │ 1. POST /sandboxes (image, entrypoint, resources)
   ▼
Server (Lifecycle API)
   │
   │ 2. Pull container image
   │ 3. Inject execd binary
   │ 4. Create container with entrypoint override
   │ 5. Start container
   ▼
Sandbox Instance
   │
   │ 6. Start execd daemon
   │ 7. Start Jupyter Server
   │ 8. Execute user entrypoint
   ▼
Running (State)
```

### 5.2 Code Execution Flow

```
User/SDK
   │
   │ 1. Create sandbox
   │ 2. Get execd endpoint
   ▼
CodeInterpreter SDK
   │
   │ 3. POST /code/context (create session)
   │ 4. POST /code (execute code)
   ▼
execd (Execution API)
   │
   │ 5. Route to Jupyter runtime
   ▼
Jupyter Runtime
   │
   │ 6. WebSocket to Jupyter Server
   │ 7. Send execute_request
   ▼
Jupyter Kernel (Python/Java/etc.)
   │
   │ 8. Execute code
   │ 9. Stream output events
   ▼
execd
   │
   │ 10. Convert to SSE events
   │ 11. Stream to client
   ▼
CodeInterpreter SDK
   │
   │ 12. Parse events
   │ 13. Return result to user
   ▼
User/Application
```

### 5.3 File Operations Flow

```
User/SDK
   │
   │ 1. Upload files
   ▼
Filesystem SDK
   │
   │ 2. POST /files/upload (multipart)
   ▼
execd (Execution API)
   │
   │ 3. Write to filesystem
   │ 4. Set permissions
   ▼
Sandbox Container Filesystem
```

## 6. Design Principles

### 6.1 Protocol-First Design

- All interactions defined by OpenAPI specifications
- Clear contracts between components
- Enables polyglot implementations
- Supports custom runtime implementations

### 6.2 Separation of Concerns

- **SDK**: Client-side abstraction and convenience
- **Specs**: Protocol definition and documentation
- **Runtime**: Sandbox orchestration and lifecycle
- **execd**: In-sandbox execution and operations

### 6.3 Extensibility

- Pluggable runtime implementations
- Custom sandbox images
- Multiple SDK languages
- Additional Jupyter kernels

### 6.4 Security

- API key authentication for lifecycle operations
- Token-based authentication for execution operations
- Isolated sandbox environments
- Resource quota enforcement
- Network isolation options

### 6.5 Observability

- Structured state transitions
- Real-time metrics streaming
- Comprehensive logging
- Health check endpoints

## 7. Use Cases

### 7.1 AI Code Generation and Execution

AI models (like Claude, GPT-4, Gemini) generate code that needs to be executed safely:

- **Isolation**: Run untrusted AI-generated code in sandboxes
- **Multi-Language**: Support various programming languages
- **Iteration**: Maintain state across multiple code generations
- **Feedback**: Capture execution results and errors for AI refinement

**Examples**: [claude-code](../examples/claude-code/), [gemini-cli](../examples/gemini-cli/), [codex-cli](../examples/codex-cli/)

### 7.2 Interactive Coding Environments

Build web-based coding platforms and notebooks:

- **Code Execution**: Run code in isolated environments
- **File Management**: Upload/download project files
- **Terminal Access**: Execute shell commands
- **Collaboration**: Share sandbox instances

**Examples**: [code-interpreter](../examples/code-interpreter/)

### 7.3 Browser Automation and Testing

Automate web browsers for testing and scraping:

- **Headless Browsers**: Chrome, Playwright
- **Remote Debugging**: DevTools protocol
- **VNC Access**: Visual debugging
- **Network Isolation**: Controlled environment

**Examples**: [chrome](../examples/chrome/), [playwright](../examples/playwright/)

### 7.4 Remote Development Environments

Provide cloud-based development workspaces:

- **VS Code Server**: Full IDE in browser
- **Desktop Environments**: VNC-based desktops
- **Tool Pre-installation**: Language runtimes, build tools
- **Port Forwarding**: Access development servers

**Examples**: [vscode](../examples/vscode/), [desktop](../examples/desktop/)

### 7.5 Continuous Integration and Testing

Run build and test pipelines in isolated environments:

- **Reproducible Builds**: Consistent container images
- **Parallel Execution**: Multiple sandbox instances
- **Artifact Collection**: Download build outputs
- **Resource Limits**: Prevent resource exhaustion

## 8. Conclusion

OpenSandbox provides a complete, production-ready platform for building AI-powered applications that require safe code execution, file management, and command execution in isolated environments. The architecture is designed to be:

- **Universal**: Works with any container image
- **Extensible**: Pluggable runtimes and custom implementations
- **Developer-Friendly**: Multi-language SDKs with consistent APIs
- **Production-Ready**: Robust lifecycle management and observability
- **Secure**: Isolated environments with access control

The protocol-first design ensures that all components can evolve independently while maintaining compatibility. Whether you're building AI coding assistants, interactive notebooks, or remote development environments, OpenSandbox provides the foundation you need.

## 9. References

- [Contributing Guide](contributing.md)
- [Sandbox Lifecycle Spec](../specs/sandbox-lifecycle.yml)
- [Sandbox Execution Spec](../specs/execd-api.yaml)
- [Server Documentation](../server/README.md)
- [execd Documentation](../components/execd/README.md)
- [Python SDK](../sdks/sandbox/python/README.md)
- [Java/Kotlin SDK](../sdks/sandbox/kotlin/README.md)
- [Examples](../examples/README.md)


================================================
FILE: docs/index.md
================================================
---
layout: home

hero:
  name: OpenSandbox
  text: Universal Sandbox Infrastructure for AI Applications
  tagline: Securely run commands, filesystems, code interpreters, browsers, and developer tools in isolated runtime environments.
  actions:
    - theme: brand
      text: Quick Start
      link: /overview/home
    - theme: alt
      text: Explore Architecture
      link: /overview/architecture

features:
  - title: Sandbox Lifecycle and Runtime Management
    details: Provision, monitor, renew, and terminate sandbox instances with Docker and Kubernetes-oriented runtime capabilities.
  - title: Multi-Language SDKs and Unified APIs
    details: Build with Python, Java/Kotlin, and JavaScript SDKs on top of standardized lifecycle and execution protocols.
  - title: Powerful In-Sandbox Execution
    details: Execute shell commands, manage files, run multi-language code interpreters, expose ports, and stream logs/metrics.
  - title: Built for Real AI Workloads
    details: Supports coding agents, browser automation, remote development, AI code execution, and RL training scenarios.
---

## Typical Scenarios

OpenSandbox is now listed in the [CNCF Landscape](https://landscape.cncf.io/?item=orchestration-management--scheduling-orchestration--opensandbox).

<div class="scenario-grid">
  <a class="scenario-card" href="./examples/claude-code/readme">
    <h3>Coding Agents</h3>
    <p>Run Claude Code, Gemini CLI, Codex, and other agent tools in isolated sandboxes.</p>
  </a>
  <a class="scenario-card" href="./examples/playwright/readme">
    <h3>Browser Automation</h3>
    <p>Execute Chrome and Playwright workloads with controlled runtime, filesystem, and networking.</p>
  </a>
  <a class="scenario-card" href="./examples/vscode/readme">
    <h3>Remote Development</h3>
    <p>Host VS Code Web and desktop-like environments for secure cloud development workflows.</p>
  </a>
  <a class="scenario-card" href="./examples/code-interpreter/readme">
    <h3>AI Code Execution</h3>
    <p>Run model-generated code safely, stream outputs, and iterate quickly with reproducible environments.</p>
  </a>
  <a class="scenario-card" href="./examples/rl-training/readme">
    <h3>RL Training</h3>
    <p>Launch reinforcement learning tasks with managed sandbox lifecycle and resource controls.</p>
  </a>
</div>

Explore all scenario references in [Examples](./examples/readme).


================================================
FILE: docs/manual-cleanup-refactor-guide.md
================================================
# Manual Cleanup Refactor Guide

## Background

GitHub issue: `alibaba/OpenSandbox#442`

Issue summary:

- Support non-expiring sandboxes
- Let callers manage cleanup explicitly
- Keep existing TTL-based behavior for current users
- Work across Docker and Kubernetes runtimes where supported

Current implementation does not support this. TTL is a hard requirement in:

- API request/response models
- Docker runtime scheduling and restore logic
- Kubernetes workload creation and renew flows

This document captures the recommended refactor direction before implementation starts.

## Refactor Goal

Introduce a manual cleanup mode without adding a new top-level mode field for now.

Chosen semantic:

- `timeout` present: sandbox uses TTL behavior
- `timeout` omitted or `null`: sandbox uses manual cleanup behavior

Non-goals for this refactor:

- Do not support magic values like `timeout=0` or `timeout=-1`
- Do not redesign the lifecycle API beyond what is required for manual cleanup
- Do not overload `renew_expiration` to switch a sandbox from manual mode back to TTL mode

## Compatibility and Rollout

This refactor is compatible through a controlled upgrade path, not through strict protocol backward compatibility.

Important compatibility fact:

- Once manual cleanup is enabled in an environment, lifecycle responses may contain `expiresAt=null`
- Lifecycle responses may also serialize other nullable fields explicitly as `null` instead of omitting them
- Older SDKs that assume `expiresAt` is always a timestamp may fail when they call `create`, `get`, or `list`
- Older schema-generated clients may also fail if they assume fields such as `metadata`, `status.reason`,
  `status.message`, or `status.lastTransitionAt` are always omitted or always non-null
- Existing TTL-based callers are unaffected as long as they do not encounter manual-cleanup sandboxes

Recommended rollout order:

1. Upgrade all SDKs/clients that read lifecycle API responses
2. Upgrade the server
3. Only then start creating sandboxes with `timeout` omitted or `null`

Operational rule:

- Do not create manual-cleanup sandboxes in a shared environment until all readers of the lifecycle API have been upgraded

This should be called out explicitly in release notes and upgrade documentation.

## Why This Approach

Compared with adding `expirationMode`, using `timeout: Optional[int]` is the smallest compatible change that still maps cleanly to the feature request.

Advantages:

- Smaller API and SDK surface change
- Easier migration from the current TTL-only model
- Preserves current behavior for existing clients that already send `timeout`

Tradeoffs:

- Mode becomes implicit rather than explicit
- `timeout == null` can mean either deliberate manual mode or missing input
- Future expansion beyond `ttl/manual` may require a second API refactor

For the current scope, these tradeoffs are acceptable.

## Current State

### API layer

TTL is currently mandatory.

Relevant files:

- `server/src/api/schema.py`
- `specs/sandbox-lifecycle.yml`

Current constraints:

- `CreateSandboxRequest.timeout` is required and bounded to `60-86400`
- `CreateSandboxResponse.expiresAt` is required
- `Sandbox.expiresAt` is required
- `RenewSandboxExpirationRequest.expiresAt` is required and assumes the sandbox already has TTL semantics

### Docker runtime

Relevant file:

- `server/src/services/docker.py`

Current behavior:

- Creation always computes `expires_at = created_at + timeout`
- Creation always schedules expiration via in-process timer
- Existing sandboxes are restored from the expiration label on server startup
- Sandbox read/list responses always expose `expiresAt`
- `renew_expiration()` only supports extending TTL

### Kubernetes runtime

Relevant files:

- `server/src/services/k8s/kubernetes_service.py`
- `server/src/services/k8s/batchsandbox_provider.py`
- `server/src/services/k8s/agent_sandbox_provider.py`

Current behavior:

- Creation always computes `expires_at = created_at + timeout`
- BatchSandbox writes `spec.expireTime`
- agent-sandbox writes `spec.shutdownTime`
- `renew_expiration()` patches those fields
- Sandbox read/list responses expose `expiresAt`

## Target API Semantics

### Create request

`CreateSandboxRequest.timeout` should become optional.

Rules:

- `timeout` omitted or `null` means manual cleanup mode
- `timeout` present means TTL mode
- If present, `timeout` must still satisfy `60 <= timeout <= 86400`
- `timeout=0` and `timeout<0` remain invalid

Suggested request examples:

TTL mode:

```json
{
  "image": { "uri": "python:3.11" },
  "timeout": 3600,
  "resourceLimits": {},
  "entrypoint": ["sleep", "infinity"]
}
```

Manual cleanup mode:

```json
{
  "image": { "uri": "python:3.11" },
  "resourceLimits": {},
  "entrypoint": ["sleep", "infinity"]
}
```

### Response models

`expiresAt` should become nullable in:

- `CreateSandboxResponse`
- `Sandbox`

Rules:

- TTL sandbox: `expiresAt` contains an RFC 3339 timestamp
- Manual sandbox: `expiresAt` is `null`

### Renew expiration API

Do not use `renew_expiration` as a mode switch.

Recommended behavior:

- TTL sandbox: renew works as it does today
- Manual sandbox: renew fails clearly

Recommended response:

- `409 Conflict` preferred
- `400 Bad Request` acceptable if existing error handling makes that much simpler

Recommended error message:

- `"Sandbox <id> does not have automatic expiration enabled."`

## Implementation Strategy

## 1. API and schema updates

Files to update:

- `server/src/api/schema.py`
- `specs/sandbox-lifecycle.yml`

Required changes:

- Make `CreateSandboxRequest.timeout` optional
- Make `CreateSandboxResponse.expiresAt` optional
- Make `Sandbox.expiresAt` optional
- Update field descriptions to document manual cleanup behavior
- Update request/response examples in the OpenAPI spec

Recommended validation rule:

- No custom mode field
- Validation only enforces bounds when `timeout` is not `None`

## 2. Docker runtime refactor

File to update:

- `server/src/services/docker.py`

### Target behavior

For manual sandboxes:

- No expiration timestamp is computed
- No expiration label is written
- A dedicated runtime marker should be written (for example `opensandbox.io/manual-cleanup=true`)
- No expiration timer is scheduled
- Sandbox survives server restart without restoration warnings
- Read/list responses return `expiresAt=None`

### Concrete refactor points

#### Creation context

Current logic:

- `_prepare_creation_context()` always returns a concrete `expires_at`

Target logic:

- Return `expires_at: Optional[datetime]`
- `None` when `request.timeout is None`

#### Label building

Current logic:

- Expiration label is assumed to exist

Target logic:

- Only write `SANDBOX_EXPIRES_AT_LABEL` when `expires_at is not None`
- Write a dedicated manual-cleanup label/annotation when `expires_at is None`

#### Provisioning

Current logic:

- `_provision_sandbox()` always schedules expiration

Target logic:

- Only call `_schedule_expiration()` when `expires_at is not None`

#### Sandbox reconstruction

Current logic:

- `_container_to_sandbox()` falls back to a concrete `expires_at`

Target logic:

- Manual sandbox should produce `expiresAt=None`
- Avoid fallback behavior that fabricates an expiration timestamp from `created_at`

#### Restore path

Current logic:

- `_restore_existing_sandboxes()` warns when a sandbox is missing the expiration label

Target logic:

- Missing expiration label should only be treated as valid when the manual-cleanup marker is present
- Continue warning on sandboxes that have neither an expiration label nor a manual-cleanup marker
- Only restore timers for TTL sandboxes that actually carry expiration metadata

#### Renew path

Current logic:

- `renew_expiration()` assumes every sandbox has TTL enabled

Target logic:

- Reject renewal if the manual-cleanup marker is present
- Continue treating "missing expiration metadata without manual marker" as malformed state rather than silently converting it to manual mode

## 3. Kubernetes service refactor

Files to update:

- `server/src/services/k8s/kubernetes_service.py`
- `server/src/services/k8s/workload_provider.py`
- `server/src/services/k8s/batchsandbox_provider.py`
- `server/src/services/k8s/agent_sandbox_provider.py`

### Key risk

Kubernetes support depends on the underlying CRDs.

Open question:

- Can BatchSandbox omit `spec.expireTime`?
- Can agent-sandbox omit `spec.shutdownTime`?

This must be confirmed before claiming end-to-end support.

### Recommended capability design

Add a provider capability check:

- `supports_manual_cleanup() -> bool`

Persist the chosen mode on workload metadata as well:

- TTL sandbox: keep expiration field populated
- Manual sandbox: omit expiration field and write a provider-neutral marker (label or annotation)

Rationale:

- Docker can support manual cleanup immediately
- Kubernetes providers may differ based on CRD semantics
- The server should fail clearly when the selected provider cannot represent a non-expiring sandbox

### Service-layer behavior

In `KubernetesSandboxService.create_sandbox()`:

- Compute `expires_at: Optional[datetime]`
- If `request.timeout is None` and provider does not support manual cleanup, fail early with a clear message

Suggested message:

- `"Manual cleanup mode is not supported by the current Kubernetes workload provider."`

### BatchSandbox provider behavior

If supported by the CRD:

- Make `expires_at` optional in provider interfaces
- Omit `spec.expireTime` when `expires_at is None`
- `get_expiration()` should return `None` when the field is absent
- `update_expiration()` should reject manual sandboxes instead of silently enabling TTL

If not supported by the CRD:

- Return `False` from `supports_manual_cleanup()`
- Keep current `expireTime` behavior unchanged

### agent-sandbox provider behavior

If supported by the CRD:

- Make `expires_at` optional in provider interfaces
- Omit `spec.shutdownTime` when `expires_at is None`
- `get_expiration()` should return `None` when the field is absent
- `update_expiration()` should reject manual sandboxes

If not supported by the CRD:

- Return `False` from `supports_manual_cleanup()`
- Keep current `shutdownTime` behavior unchanged

## 4. Interface changes

Files likely affected:

- `server/src/services/sandbox_service.py`
- `server/src/services/k8s/workload_provider.py`

Required updates:

- Any method signature currently assuming `expires_at: datetime` should be reviewed
- Provider creation/update/get-expiration flows should allow `Optional[datetime]` where needed
- Abstract service docs should describe manual cleanup semantics

## Error Handling Guidance

Recommended failure cases:

### Unsupported runtime/provider

Case:

- User omits `timeout`
- Provider cannot represent non-expiring sandbox

Response:

- HTTP 400

Message:

- `"Manual cleanup mode is not supported by the current runtime/provider."`

### Renew called for manual sandbox

Response:

- HTTP 409 preferred

Message:

- `"Sandbox <id> does not have automatic expiration enabled."`

### Invalid timeout values

Keep current behavior:

- Reject `timeout=0`
- Reject negative values
- Reject values above max bound

## Compatibility Plan

This refactor should preserve backward compatibility for current users.

Expected compatibility behavior:

- Existing clients sending `timeout` continue to work unchanged
- Existing responses for TTL sandboxes remain unchanged
- New manual-cleanup behavior is opt-in via omission of `timeout`

Compatibility caveat:

- Any generated SDKs may need regeneration because `timeout` and `expiresAt` types change from required to optional
- Generated SDKs should also tolerate explicit `null` values in optional lifecycle fields, not only missing fields
- Cross-SDK request shapes do not need to be byte-for-byte identical if language constraints differ. In particular, the
  C# SDK may use an explicit `ManualCleanup` flag instead of `timeout=null` so it can keep "unset means use default TTL"
  distinct from "explicitly request manual cleanup".

## Testing Plan

### API/schema tests

Files likely affected:

- `server/tests/test_schema.py`
- route tests covering create/get/list/renew

Add coverage for:

- Create request without `timeout`
- Create request with valid `timeout`
- Reject `timeout=0`
- Create response with `expiresAt=null`
- Sandbox model with `expiresAt=null`

### Docker tests

File likely affected:

- `server/tests/test_docker_service.py`

Add coverage for:

- Manual sandbox creation does not schedule expiration
- Manual sandbox creation does not write expiration label
- Manual sandbox get/list returns `expiresAt=None`
- Server restart restore path ignores manual sandboxes without warning
- Renew expiration on manual sandbox fails clearly
- TTL sandbox behavior remains unchanged

### Kubernetes service tests

Files likely affected:

- `server/tests/k8s/test_kubernetes_service.py`
- `server/tests/k8s/test_batchsandbox_provider.py`
- `server/tests/k8s/test_agent_sandbox_provider.py`

Add coverage for:

- Manual mode rejected when provider capability is false
- Manual mode omits expiration fields when provider capability is true
- Manual mode writes the runtime marker when provider capability is true
- `get_expiration()` returns `None` when expiration field is absent
- Renew expiration fails for manual sandboxes
- TTL sandbox behavior remains unchanged

### Spec/SDK validation

Follow-up checks:

- Regenerate or validate OpenAPI docs if needed
- Verify generated SDKs handle optional `timeout` and nullable `expiresAt`

## Suggested Implementation Order

1. Update schema models in `server/src/api/schema.py`
2. Update OpenAPI spec in `specs/sandbox-lifecycle.yml`
3. Refactor Docker runtime to support `expires_at: Optional[datetime]`
4. Add Kubernetes provider capability plumbing
5. Implement Kubernetes manual mode only where confirmed supported
6. Add and update tests
7. Regenerate SDK/spec artifacts if required by repo workflow

## Open Questions Before Coding

These should be resolved early in the branch:

1. Does BatchSandbox allow `spec.expireTime` to be omitted?
2. Does agent-sandbox allow `spec.shutdownTime` to be omitted?
3. Should renew-on-manual return `400` or `409`?
4. Should list/get expose any explicit hint that a sandbox is manual, or is `expiresAt=null` sufficient?

Recommended implementation default for questions 1 and 2 until confirmed:

- Return `False` from `supports_manual_cleanup()` for both Kubernetes providers
- Enable Kubernetes manual mode only after CRD behavior is verified by tests or upstream documentation

Recommended answer for question 4:

- `expiresAt=null` is sufficient for the first iteration

## Summary

The smallest practical refactor is:

- Make `timeout` optional
- Treat missing `timeout` as manual cleanup mode
- Make `expiresAt` nullable
- Support manual mode in Docker immediately
- Gate Kubernetes support behind provider capability and CRD validation
- Keep `renew_expiration()` TTL-only

This preserves current behavior while creating a clear path to non-expiring sandboxes with limited API churn.


================================================
FILE: docs/package.json
================================================
{
  "name": "opensandbox-docs",
  "private": true,
  "packageManager": "pnpm@9.15.0",
  "scripts": {
    "docs:sync": "node .vitepress/scripts/docs-manifest.mjs",
    "docs:spec": "node ../scripts/spec-doc/generate-spec.js --output docs/public/api/spec-inline.js",
    "docs:dev": "pnpm docs:sync && pnpm docs:spec && vitepress dev",
    "docs:build": "pnpm docs:sync && pnpm docs:spec && vitepress build",
    "docs:preview": "vitepress preview"
  },
  "devDependencies": {
    "vitepress": "^1.6.4"
  }
}


================================================
FILE: docs/secure-container.md
================================================
# Secure Container Runtime Guide

This guide explains how to use secure container runtimes with OpenSandbox to provide hardware-level isolation for executing untrusted AI-generated code.

## Table of Contents

- [Overview](#overview)
- [Server Configuration](#server-configuration)
- [Docker Mode](#docker-mode)
- [Kubernetes Mode](#kubernetes-mode)
- [User Guide](#user-guide)
- [Administrator Guide](#administrator-guide)
- [Troubleshooting and Best Practices](#troubleshooting-and-best-practices)

---

## Overview

### What are Secure Container Runtimes?

Secure container runtimes provide stronger isolation than the standard runc runtime used by Docker and containerd. They add additional security layers through different mechanisms:

| Runtime | Isolation Mechanism | Startup Overhead | Memory Overhead | Best For |
|---------|---------------------|------------------|-----------------|----------|
| **runc** (default) | Process-level cgroups | ~0ms | Minimal | Trusted workloads, local development |
| **gVisor** | User-space kernel (syscall interception) | ~10-50ms | ~50MB | General workloads with low overhead |
| **Kata (QEMU)** | Full VM with QEMU hypervisor | ~500ms | ~20-50MB | Maximum compatibility and isolation |
| **Kata (Firecracker)** | MicroVM with Firecracker hypervisor | ~125ms | ~5MB | High density, minimal footprint |
| **Kata (CLH)** | Cloud Hypervisor | ~200ms | ~10-20MB | Balanced performance and isolation |

### Why Use Secure Runtimes?

OpenSandbox is designed to execute untrusted code generated by AI models (Claude, GPT-4, Gemini, etc.). Secure runtimes provide:

1. **Container Escape Protection**: Prevents malicious code from breaking out of the container
2. **Kernel-Level Isolation**: Each sandbox gets its own kernel context
3. **Multi-Tenant Safety**: Different users' sandboxes are strongly isolated
4. **Compliance**: Meets security requirements for regulated industries

### Supported Runtime Types

OpenSandbox supports the following secure runtime types through server-level configuration:

- `"gvisor"` - Google gVisor with runsc
- `"kata"` - Kata Containers with QEMU hypervisor (default)
- `"firecracker"` - Kata Containers with Firecracker hypervisor
- `""` (empty) - Standard runc (default, no secure runtime)

### Key Design Principle

**Server-Level Configuration**: The secure runtime is configured once at the server level by administrators. All sandboxes on that server transparently use the configured runtime. SDK users and API callers require **no code changes**.

---

## Server Configuration

Secure runtimes are configured through the `~/.sandbox.toml` configuration file. The server validates the configured runtime at startup and will refuse to start if the runtime is unavailable.

### Configuration File

Edit `~/.sandbox.toml`:

```toml
[runtime]
type = "docker"  # or "kubernetes"
execd_image = "opensandbox/execd:latest"

# Secure container runtime configuration
# When enabled, ALL sandboxes on this server use the specified runtime
[secure_runtime]
# Runtime type: "", "gvisor", "kata", "firecracker"
type = ""

# Docker mode: OCI runtime name (e.g., "runsc" for gVisor, "kata-runtime" for Kata)
# Required when runtime.type = "docker" and type is not empty
docker_runtime = "runsc"

# Kubernetes mode: RuntimeClass name (e.g., "gvisor", "kata-qemu", "kata-fc")
# Required when runtime.type = "kubernetes" and type is not empty
k8s_runtime_class = "gvisor"
```

### Configuration Examples

#### Example 1: gVisor on Docker

```toml
[runtime]
type = "docker"
execd_image = "opensandbox/execd:latest"

[secure_runtime]
type = "gvisor"
docker_runtime = "runsc"
k8s_runtime_class = "gvisor"
```

#### Example 2: Kata Containers on Kubernetes

```toml
[runtime]
type = "kubernetes"
execd_image = "opensandbox/execd:latest"

[secure_runtime]
type = "kata"
docker_runtime = "kata-runtime"
k8s_runtime_class = "kata-qemu"
```

#### Example 3: Kata + Firecracker on Kubernetes

```toml
[runtime]
type = "kubernetes"
execd_image = "opensandbox/execd:latest"

[secure_runtime]
type = "firecracker"
docker_runtime = ""  # Not supported in Docker mode
k8s_runtime_class = "kata-fc"
```

### Startup Validation

When the server starts, it automatically validates that the configured secure runtime is available:

```bash
$ opensandbox-server
INFO     Validating secure runtime for Docker backend
INFO     Docker OCI runtime 'runsc' is available: {...}
INFO     Application startup complete.
```

If the runtime is not available, the server will refuse to start with a clear error message:

```
ERROR    Configured Docker runtime 'runsc' is not available.
        Available runtimes: runc.
        Please install and configure it in /etc/docker/daemon.json.
```

---

## Docker Mode

Docker mode is fully supported for secure container runtimes.

### Prerequisites

- Docker daemon installed and running
- Secure runtime installed on the host

### gVisor Setup for Docker

#### Step 1: Install gVisor runsc

For Docker mode, you only need to install the **runsc** OCI runtime:

```bash
# Ubuntu/Debian
curl -fsSL https://gvisor.dev/archive.key | sudo gpg --dearmor -o /usr/share/keyrings/gvisor-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/gvisor-archive-keyring.gpg] https://storage.googleapis.com/gvisor/releases release main" | \
  sudo tee /etc/apt/sources.list.d/gvisor.list
sudo apt-get update && sudo apt-get install -y runsc

# Verify installation
runsc --version
```

> **Note**: For Docker mode, only `runsc` is required. The `containerd-shim-runsc-v1` is only needed for Kubernetes/containerd.
>
> **Reference**: See [gVisor Installation Guide](https://gvisor.dev/docs/user_guide/install/) for other distributions and installation methods.

#### Step 2: Configure Docker daemon

Use the `runsc install` command to automatically configure Docker daemon:

```bash
sudo runsc install
```

Or manually edit `/etc/docker/daemon.json`:

```json
{
  "runtimes": {
    "runsc": {
      "path": "/usr/bin/runsc",
      "runtimeArgs": [
        "--platform=systrap",
        "--network=host"
      ]
    }
  }
}
```

Restart Docker:

```bash
sudo systemctl restart docker
```

> **Reference**: See [gVisor Docker Quick Start](https://gvisor.dev/docs/user_guide/quick_start/docker/) for more details.

#### Step 3: Configure OpenSandbox Server

Edit `~/.sandbox.toml`:

```toml
[runtime]
type = "docker"
execd_image = "opensandbox/execd:latest"

[secure_runtime]
type = "gvisor"
docker_runtime = "runsc"
```

#### Step 4: Start Server and Verify

```bash
opensandbox-server
```

Create a test sandbox:

```bash
curl -X POST http://localhost:8080/v1/sandboxes \
  -H "Content-Type: application/json" \
  -d '{
    "image": {"uri": "python:3.11"},
    "timeout": 3600,
    "resourceLimits": {"cpu": "500m", "memory": "512Mi"},
    "entrypoint": ["python", "-u", "-c", "import time\nwhile True: print('hello from gVisor!'); time.sleep(1)"],
    "metadata": {
      "name": "gvisor-docker-sandbox"
    }
  }'
```

Verify the runtime:

```bash
docker ps --format "{{.ID}}\t{{.Image}}\t{{.Names}}"
docker inspect <container_id> | grep -A2 Runtime
# Expected output:
# "Runtime": "runsc",
```

### Kata Containers Setup for Docker

#### System Requirements

Kata Containers requires hardware virtualization support. Verify your system meets the following requirements:

**Hardware Virtualization Support:**
```bash
# Check if CPU supports hardware virtualization (VT-x for Intel, AMD-V for AMD)
lscpu | grep Virtualization
# Expected output: Virtualization: VT-x (Intel) or AMD-V (AMD)

# Alternatively on Intel
grep -E --color=auto 'vmx|svm' /proc/cpuinfo
# Expected: vmx (Intel) or svm (AMD) flags present
```

**KVM Module:**
```bash
# Check if KVM module is loaded
lsmod | grep kvm
# Expected: kvm_intel (Intel) or kvm_amd (AMD)

# If not loaded, load KVM module
sudo modprobe kvm_intel  # For Intel
# or
sudo modprobe kvm_amd    # For AMD
```

**Kernel Requirements:**
- Linux kernel 5.10 or later recommended
- KVM enabled in kernel config

**Docker Requirements:**
- Docker 20.10 or later
- `/etc/docker/daemon.json` configured for Kata runtime

#### Installation

Download and install Kata Containers static binaries from GitHub releases:

```bash
# Find the latest release at https://github.com/kata-containers/kata-containers/releases
KATA_VERSION="3.27.0"
wget https://github.com/kata-containers/kata-containers/releases/download/${KATA_VERSION}/kata-static-${KATA_VERSION}-amd64.tar.zst

# Extract to root directory - Kata will be installed in /opt/kata
zstd -d kata-static-${KATA_VERSION}-amd64.tar.zst
tar -xvf kata-static-${KATA_VERSION}-amd64.tar -C /

# Create symbolic links for PATH access
sudo ln -sf /opt/kata/bin/kata-runtime /usr/local/bin/kata-runtime
sudo ln -sf /opt/kata/bin/containerd-shim-kata-v2 /usr/local/bin/containerd-shim-kata-v2

# Verify installation
kata-runtime --version
```

#### Configure Docker Daemon

Edit `/etc/docker/daemon.json` to register Kata as a runtime:

```json
{
  "default-runtime": "runc",
  "runtimes": {
    "kata": {
      "runtimeType": "io.containerd.kata.v2"
    }
  }
}
```

Restart Docker to apply changes:

```bash
sudo systemctl restart docker

# Verify Kata is available in Docker
docker info | grep -A5 Runtimes
# Expected output should include "io.containerd.runc.v2 kata"
```

#### Configure OpenSandbox Server

Edit `~/.sandbox.toml`:

```toml
[runtime]
type = "docker"
execd_image = "opensandbox/execd:latest"

[secure_runtime]
type = "kata"
docker_runtime = "kata"
```

#### Verify Installation

**Test with OpenSandbox API**

Create a sandbox and verify it's running in a VM by checking the kernel:

```bash
# Create a test sandbox
curl --location 'http://127.0.0.1:8080/v1/sandboxes' \
  --header 'Content-Type: application/json' \
  --data '{
    "image": {"uri": "ubuntu:latest"},
    "timeout": 3600,
    "resourceLimits": {"cpu": "500m", "memory": "512Mi"},
    "entrypoint": ["/bin/bash", "-c", "while true; do uname -a; sleep 1; done"],
    "metadata": {
      "name": "kata-sandbox"
    }
  }'
```

Check the container's kernel to verify VM isolation:

```bash
# Get the container ID
docker ps | grep kata-sandbox

# Check the kernel inside the container (should be different from host)
docker exec <container_id> uname -a
# Expected output: Linux <hostname> 5.10.x-generic #x86_64 ... (Kata VM kernel)

# Compare with host kernel
uname -a
# Host kernel might be different version or have different hostname
```

**Key Indicators of Kata VM:**
- Container runs in a separate kernel with different hostname
- Kernel version is typically `5.10.x` (Kata's guest kernel)
- Host process list shows `qemu-system-x86_64` or similar hypervisor process
---

## Kubernetes Mode

Kubernetes mode supports secure runtimes through RuntimeClass resources.

### Prerequisites

- Kubernetes cluster with containerd runtime
- Secure runtime installed on all nodes
- RuntimeClass CRDs created

### gVisor Setup for Kubernetes

#### Step 1: Install gVisor Components on All Nodes

For Kubernetes with containerd, you need to install **two** components:

1. **runsc** - the gVisor OCI runtime
2. **containerd-shim-runsc-v1** - the containerd shim for gVisor

```bash
# On each node - Ubuntu/Debian
curl -fsSL https://gvisor.dev/archive.key | sudo gpg --dearmor -o /usr/share/keyrings/gvisor-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/gvisor-archive-keyring.gpg] https://storage.googleapis.com/gvisor/releases release main" | \
  sudo tee /etc/apt/sources.list.d/gvisor.list
sudo apt-get update

# Install both gVisor components
sudo apt-get install -y runsc containerd-shim-runsc-v1

# Verify installation
runsc --version
containerd-shim-runsc-v1 --version
```
> **Reference**: See [gVisor Installation Guide](https://gvisor.dev/docs/user_guide/containerd/configuration/) for complete installation instructions and other distributions.

#### Step 2: Configure containerd

Edit `/etc/containerd/config.toml`:

```toml
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc]
          runtime_type = "io.containerd.runsc.v1"
          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc.options]
            TypeUrl = "io.containerd.runsc.v1.options"
            ConfigPath = "/etc/containerd/runsc.toml"
```

```bash
sudo tee /etc/containerd/runsc.toml > /dev/null <<'EOF'
[runsc]
  platform = "ptrace"
EOF
```

Restart containerd:

```bash
sudo systemctl restart containerd
```

#### Step 3: Create RuntimeClass CRD

```yaml
# gvisor-runtimeclass.yaml
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: gvisor
handler: runsc
scheduling:
  nodeSelector:
    kubernetes.io/arch: amd64
```

```bash
kubectl apply -f gvisor-runtimeclass.yaml
```

#### Step 4: Configure OpenSandbox Server

Edit `~/.sandbox.toml`:

```toml
[runtime]
type = "kubernetes"
execd_image = "opensandbox/execd:latest"

[secure_runtime]
type = "gvisor"
k8s_runtime_class = "gvisor"
```

#### Step 5: Verify Installation

```bash
# Test the RuntimeClass
kubectl run test-gvisor --restart=Never --image=hello-world --runtime-class=gvisor
kubectl logs test-gvisor
kubectl delete pod test-gvisor
```

### Kata Containers Setup for Kubernetes

#### Step 1: Install Kata Containers

Follow the [official Kata Containers installation guide](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/README.md).

Quick installation using Helm:

```bash
# Install kata-deploy which will set up Kata Containers via DaemonSet
helm install kata-deploy "oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy" --version "3.27.0" --namespace kube-system --create-namespace

# Wait for kata-deploy pods to be ready
kubectl wait --for=condition=ready pod -l name=kata-deploy -n kube-system --timeout=300s
```

> **Note**: The `kata-deploy` DaemonSet will automatically configure containerd on all nodes. Manual containerd configuration is not required when using kata-deploy.

#### Step 2: Verify Installation

Check that Kata Containers is installed and RuntimeClasses are created:

```bash
# Check RuntimeClasses
kubectl get runtimeclass

# Expected output:
# NAME         HANDLER     AGE
# kata         kata-qemu   10m
# kata-qemu    kata-qemu   10m
# kata-clh     kata-clh    10m
# kata-fc      kata-fc     10m

# Test Kata with a simple pod
kubectl run test-kata --restart=Never --image=hello-world --runtime-class=kata-qemu
kubectl logs test-kata
kubectl delete pod test-kata
```

### Creating Pools for Different Runtimes (Optional)

When using Pool CRDs for pre-warmed sandboxes, create separate pools for each runtime type:

```yaml
# gvisor-pool.yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  name: gvisor-pool
  labels:
    runtime: gvisor
spec:
  template:
    spec:
      runtimeClassName: gvisor
      containers:
        - name: sandbox-container
          image: opensandbox/code-interpreter:v1.0.2
  capacitySpec:
    bufferMax: 10
    bufferMin: 2
    poolMax: 20
    poolMin: 5
```

---

## User Guide

This section is for AI application developers using OpenSandbox.

### No Code Changes Required

**Important**: The secure runtime is configured at the server level. Your code does not need to change.

Simply create a sandbox using the OpenSandbox Lifecycle API - the server automatically applies the configured secure runtime:

**Create a test sandbox:**

```bash
curl -X POST http://localhost:8080/v1/sandboxes \
  -H "Content-Type: application/json" \
  -d '{
    "image": {"uri": "python:3.11"},
    "timeout": 3600,
    "resourceLimits": {"cpu": "500m", "memory": "512Mi"},
    "entrypoint": ["python", "-u", "-c", "import time\nwhile True: print(\"hello from secure sandbox!\"); time.sleep(1)"],
    "metadata": {
      "name": "my-secure-sandbox"
    }
  }'
```

**Response:**

```json
{
  "id": "550e8400-e29b-41d4-a716-446655440000",
  "status": "running"
}
```

The sandbox will automatically use the secure runtime configured on the server (gVisor, Kata, or runc).

### How It Works

1. **Administrator** configures the secure runtime in `~/.sandbox.toml`
2. **Server** validates the runtime at startup
3. **Server** automatically injects the runtime into each sandbox:
   - Docker mode: Adds `runtime` to HostConfig
   - Kubernetes mode: Adds `runtimeClassName` to Pod spec
4. **User** creates sandboxes via API - no runtime parameter needed

### Verifying Runtime Isolation

After creating a sandbox, verify the runtime being used:

**Docker mode:**
```bash
docker ps --format "{{.ID}}\t{{.Image}}\t{{.Names}}"
docker inspect <container_id> | grep -A2 Runtime
# Expected output for gVisor:
# "Runtime": "runsc",
```

**Kubernetes mode:**
```bash
kubectl get pod <pod-name> -o jsonpath='{.spec.runtimeClassName}'
# Expected output for gVisor:
# gvisor
```

---

## Administrator Guide

This section is for platform operators and SREs managing secure runtime infrastructure.

### Prerequisites

Secure runtimes must be installed and configured on your infrastructure **before** configuring OpenSandbox. OpenSandbox does not install runtimes automatically.

### Installation Summary

| Runtime | Docker | Kubernetes |
|---------|--------|------------|
| gVisor | Install runsc → Configure daemon.json | Install runsc → Configure containerd → Create RuntimeClass |
| Kata (QEMU) | Install kata-runtime → Configure daemon.json | Install Kata → Configure containerd → Create RuntimeClass |
| Kata (Firecracker) | Not supported | Install Kata → Configure containerd → Create RuntimeClass |

### Configuration Validation

The server validates secure runtime configuration at startup:

1. **Docker mode**: Checks if the runtime exists in Docker daemon's runtime list
2. **Kubernetes mode**: Checks if the RuntimeClass exists in the cluster

If validation fails, the server refuses to start with a clear error message.

### Security Best Practices

1. **Default to gVisor**: Provides good security with acceptable performance for most workloads
2. **Use Kata for Untrusted Code**: Maximum isolation for completely unknown code
3. **Regular Updates**: Keep runtimes updated for security patches
4. **Test Compatibility**: Validate your workloads with the chosen runtime before production
5. **Monitor Resources**: Secure runtimes have higher memory overhead

### Runtime Selection Guidelines

| Use Case | Recommended Runtime | Reasoning |
|----------|---------------------|-----------|
| Development/Testing | runc (default) | Fastest startup, lowest overhead |
| Production AI Code Execution | gVisor | Good balance of security and performance |
| High-Security Requirements | Kata (QEMU) | Maximum isolation, full compatibility |
| High-Density Multi-Tenant | Kata (Firecracker) | Minimal memory overhead per sandbox |
| Untrusted Network Code | gVisor or Kata | Syscall filtering prevents network attacks |

---

## Troubleshooting and Best Practices

### Common Issues

#### 1. Runtime Not Found (Docker)

**Error**: `Configured Docker runtime 'runsc' is not available.`

**Solution**: Ensure the runtime is configured in `/etc/docker/daemon.json` and Docker has been restarted:

```bash
sudo systemctl restart docker
docker info | grep -A5 Runtimes
```

#### 2. RuntimeClass Not Found (Kubernetes)

**Error**: `RuntimeClass 'gvisor' does not exist.`

**Solution**: Create the RuntimeClass CRD:

```bash
kubectl get runtimeclass
kubectl apply -f gvisor-runtimeclass.yaml
```

#### 3. Syscall Compatibility Issues

**Error**: Container exits with code 1, no logs

**Cause**: gVisor doesn't implement all syscalls. Some applications may not be compatible.

**Solution**: Check the [gVisor compatibility guide](https://gvisor.dev/docs/user_guide/compatibility/). Try using Kata (QEMU) which has better compatibility.

#### 4. Pod Stuck in ContainerCreating

**Cause**: RuntimeClass handler not configured on the node.

**Solution**: Verify containerd configuration:

```bash
# On the node
sudo containerd config dump
sudo systemctl restart containerd
```

### Compatibility Matrix

| Feature | runc | gVisor | Kata (QEMU) | Kata (CLH) | Kata (FC) |
|---------|------|--------|-------------|------------|-----------|
| Syscall Compatibility | Full | Partial | Full | Full | Limited |
| GPU Support | Yes | No | Yes | Yes | No |
| IPv6 | Yes | Yes | Yes | Yes | Yes |
| Privileged Mode | Yes | No | Yes | Yes | No |
| Docker Volume | Yes | Yes | Yes | Yes | Yes |
| Systemd | Yes | No | Yes | Yes | No |

### Getting Help

- **Documentation**: [OpenSandbox GitHub](https://github.com/alibaba/OpenSandbox)
- **Issues**: Report bugs via [GitHub Issues](https://github.com/alibaba/OpenSandbox/issues)
- **Design Document**: See [OSEP-0004](/oseps/0004-secure-container-runtime) for complete design details


================================================
FILE: docs/single_host_network.md
================================================
# Single-host network in OpenSandbox

Detailed routing for a single-host deployment: how execd’s proxy gives every sandbox access to HTTP and WebSocket ports through one exposed host port.

![Single-host sandbox routing](assets/single_host_network.png)

## Single-host routing model
- Every sandbox container starts `execd` listening on container port `44772`. `execd` bundles a lightweight reverse proxy that intercepts requests with the `/proxy/{port}` prefix and forwards them to `127.0.0.1:{port}` inside the same container.
- The Docker runtime binds only the host side of the execd proxy port (labeled `opensandbox.io/embedding-proxy-port`). Callers use `get_endpoint(..., port=X)` to receive `{public_host}:{host_proxy_port}/proxy/{X}`, and execd transparently routes the request back to the sandbox service on port `X`.
- Because the proxy preserves `Upgrade`, `Connection`, and other HTTP headers, HTTP, Server-Sent Events, and WebSocket traffic share the same mapped host port without additional configuration.
- With this setup, a single host port per sandbox suffices to reach **all** container ports. You can safely run many sandboxes on one machine without worrying about overlapping host port allocations.
- When the caller lives inside the same Docker network (e.g., another container or Kubernetes pod), use `get_endpoint(..., resolve_internal=True)` to bypass the host mapping and return the sandbox IP (e.g., `172.17.0.3:5900`) instead.
- The diagram above shows the routing path: host traffic hits the proxy port, execd rewrites the request towards the target container port, and upstream services remain isolated within the sandbox.

## Network modes

### Host network mode (single-host constraints)
- Containers share the host network stack (`network_mode=host`) so sandbox ports are directly accessible on the host.
- Because each sandbox binds its ports on the host, this mode practically limits you to one sandbox instance per host unless you reserve dedicated ports per sandbox.
- `get_endpoint(..., port=X)` returns `{public_host}:{X}` with no `/proxy/` prefix, so the caller needs to know the exact host port and the host must manage firewall rules for each sandbox port.

### Bridge network mode (default for single-host deployments)
- Docker places sandboxes on an isolated bridge network, preventing container ports from being reachable without explicit mapping.
- For single-host scaling, OpenSandbox maps only execd’s proxy port (`44772`) and, optionally, port `8080`. Any other container port stays private and is reached via the proxy.
- The reverse proxy label (`opensandbox.io/embedding-proxy-port`) identifies a host port that fronts `execd`. `get_endpoint(..., port=X)` returns `{public_host}:{host_proxy_port}/proxy/{X}`, so all internal ports can share the same host binding.
- Port `8080` may also receive a direct host binding (`opensandbox.io/http-port`), providing a conventional HTTP endpoint without the proxy path when required.
- This bridge setup lets a single machine host many sandboxes without port conflicts, because the same host proxy port can multiplex requests for HTTP, SSE, WebSocket, VNC, etc.

## Operational notes
- If execd’s proxy port (`44772`) or the optional `8080` host mapping is missing, `get_endpoint` responds with HTTP 500 and a message stating which mapping was unavailable.
- Always keep the `/proxy/{port}` prefix (including any additional path or query string) when embedding URLs in browser-based clients or SDKs so that execd can correctly dispatch the request.
- This proxy-based approach means additional ports never need to be published on the host, simplifying firewall management and improving security.


================================================
FILE: docs/zh/index.md
================================================
---
layout: home

hero:
  name: OpenSandbox
  text: 面向 AI 应用的通用沙箱基础设施
  tagline: 在隔离运行时中安全执行命令、文件操作、代码解释器、浏览器与开发工具。
  actions:
    - theme: brand
      text: 快速开始
      link: /zh/overview/home
    - theme: alt
      text: 查看架构
      link: /zh/overview/architecture

features:
  - title: 沙箱全生命周期与运行时管理
    details: 支持沙箱实例创建、监控、续期与销毁，覆盖 Docker 与 Kubernetes 场景。
  - title: 多语言 SDK 与统一协议
    details: 提供 Python、Java/Kotlin、JavaScript SDK，并基于统一的生命周期与执行协议进行开发。
  - title: 强大的沙箱内执行能力
    details: 支持命令执行、文件系统操作、多语言代码解释、端口暴露以及日志/指标流式获取。
  - title: 面向真实 AI 工作负载
    details: 适配 Coding Agent、浏览器自动化、远程开发、AI 代码执行与强化学习训练等场景。
---

## 典型落地场景

OpenSandbox 已进入 [CNCF Landscape](https://landscape.cncf.io/?item=orchestration-management--scheduling-orchestration--opensandbox)。

<div class="scenario-grid">
  <a class="scenario-card" href="./examples/claude-code/readme">
    <h3>Coding Agent</h3>
    <p>在隔离沙箱中运行 Claude Code、Gemini CLI、Codex 等工具链。</p>
  </a>
  <a class="scenario-card" href="./examples/playwright/readme">
    <h3>浏览器自动化</h3>
    <p>运行 Chrome、Playwright 等工作负载，结合可控运行时、文件系统与网络策略。</p>
  </a>
  <a class="scenario-card" href="./examples/vscode/readme">
    <h3>远程开发环境</h3>
    <p>提供 VS Code Web 与桌面化开发环境，提升云端开发的安全性与一致性。</p>
  </a>
  <a class="scenario-card" href="./examples/code-interpreter/readme">
    <h3>AI 代码执行</h3>
    <p>安全执行模型生成代码，流式采集输出并在可复现环境中快速迭代。</p>
  </a>
  <a class="scenario-card" href="./examples/rl-training/readme">
    <h3>强化学习训练</h3>
    <p>在可控资源下运行 RL 训练任务，并利用沙箱生命周期能力管理训练过程。</p>
  </a>
</div>

更多场景请查看 [示例](./examples/readme)。


================================================
FILE: examples/README.md
================================================
# OpenSandbox Examples

Examples for common OpenSandbox use cases. Each subdirectory contains runnable code and documentation.

## Integrations / Sandboxes
- 🧰 [**aio-sandbox**](aio-sandbox): All-in-one sandbox setup using OpenSandbox SDK and agent-sandbox
- <img src="https://kubernetes.io/icons/favicon-32.png" alt="Kubernetes" width="16" height="16" style="display:inline-block;width:16px;height:16px;vertical-align:middle;margin-right:4px;" /> [**agent-sandbox**](agent-sandbox): Create a kubernetes-sigs/agent-sandbox instance and run a command
- 🧪 [**code-interpreter**](code-interpreter): Code Interpreter SDK singleton example
- 💾 [**host-volume-mount**](host-volume-mount): Mount host directories into sandboxes (read-write, read-only, subpath)
- ☁️ [**docker-ossfs-volume-mount**](docker-ossfs-volume-mount): Mount OSSFS volumes in Docker runtime (inline credentials, subpath, sharing)
- 🎯 [**rl-training**](rl-training): Reinforcement learning training loop inside a sandbox
- <img src="https://img.shields.io/badge/-%20-D97757?logo=claude&logoColor=white&style=flat-square" alt="Claude" width="16" height="16" style="display:inline-block;width:16px;height:16px;vertical-align:middle;margin-right:4px;" /> [**claude-code**](claude-code): Call Claude (Anthropic) API/CLI within the sandbox
- <img src="https://geminicli.com/favicon.ico" alt="Google Gemini" width="16" height="16" style="display:inline-block;width:16px;height:16px;vertical-align:middle;margin-right:4px;" /> [**gemini-cli**](gemini-cli): Call Google Gemini within the sandbox
- <img src="https://developers.openai.com/favicon.png" alt="OpenAI" width="16" height="16" style="display:inline-block;width:16px;height:16px;vertical-align:middle;margin-right:4px;" /> [**codex-cli**](codex-cli): Call OpenAI/Codex-like models within the sandbox
- <img src="https://www.kimi.com/favicon.ico" alt="Kimi" width="16" height="16" style="display:inline-block;width:16px;height:16px;vertical-align:middle;margin-right:4px;" /> [**kimi-cli**](kimi-cli): Call Kimi Code CLI (Moonshot AI) within the sandbox
- <img src="https://img.shields.io/badge/-%20-1C3C3C?logo=langgraph&logoColor=white&style=flat-square" alt="LangGraph" width="16" height="16" style="display:inline-block;width:16px;height:16px;vertical-align:middle;margin-right:4px;" /> [**langgraph**](langgraph): LangGraph agent orchestrating sandbox lifecycle + tools
- <img src="https://google.github.io/adk-docs/assets/agent-development-kit.png" alt="Google ADK" width="16" height="16" style="display:inline-block;width:16px;height:16px;vertical-align:middle;margin-right:4px;" /> [**google-adk**](google-adk): Google ADK agent calling OpenSandbox tools
- 🦞 [**nullclaw**](nullclaw): Launch a Nullclaw Gateway inside a sandbox
- 🦞 [**openclaw**](openclaw): Run an OpenClaw Gateway inside a sandbox
- 🖥️ [**desktop**](desktop): Launch VNC desktop (Xvfb + x11vnc) for VNC client connections
- <img src="https://playwright.dev/img/playwright-logo.svg" alt="Playwright" width="16" height="16" style="display:inline-block;width:16px;height:16px;vertical-align:middle;margin-right:4px;" /> [**playwright**](playwright): Launch headless browser (Playwright + Chromium) to scrape web content
- <img src="https://code.visualstudio.com/assets/favicon.ico" alt="VS Code" width="16" height="16" style="display:inline-block;width:16px;height:16px;vertical-align:middle;margin-right:4px;" /> [**vscode**](vscode): Launch code-server (VS Code Web) to provide browser access
- <img src="https://www.google.com/chrome/static/images/chrome-logo.svg" alt="Google Chrome" width="16" height="16" style="display:inline-block;width:16px;height:16px;vertical-align:middle;margin-right:4px;" /> [**chrome**](chrome): Launch headless Chromium with DevTools port exposed for remote debugging

## How to Run
- Set basic environment variables (e.g., `export SANDBOX_DOMAIN=...`, `export SANDBOX_API_KEY=...`)
- Add provider-specific variables as needed (e.g., `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GEMINI_API_KEY`, `KIMI_API_KEY`, etc.; model selection is optional)
- Navigate to the example directory and install dependencies: `pip install -r requirements.txt` (or refer to the Dockerfile in the directory)
- Then execute: `python main.py`
- To run in a container, build and run using the `Dockerfile` in the directory
- Summary: First set required environment variables via `export`, then run `python main.py` in the corresponding directory, or build/run the Docker image for that directory.


================================================
FILE: examples/agent-sandbox/README.md
================================================
# Agent-Sandbox Example

This example creates a sandbox backed by `kubernetes-sigs/agent-sandbox` and
executes `echo hello world` via the OpenSandbox Python SDK.

## Prerequisites

- A Kubernetes cluster with the agent-sandbox controller and CRDs installed.
- OpenSandbox server configured with Kubernetes runtime and `workload_provider = "agent-sandbox"`.
- Sandbox image should include `bash` (default example uses `ubuntu:22.04`).

## Start OpenSandbox server

1. Install the server package and fetch the example config for agent-sandbox:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
```

2. Update `~/.sandbox.toml` with the following sections:

```toml
[runtime]
type = "kubernetes"
execd_image = "opensandbox/execd:v1.0.7"

[kubernetes]
namespace = "default"
# kubeconfig_path = "/absolute/path/to/kubeconfig"  # optional if running in-cluster
workload_provider = "agent-sandbox"

[agent_sandbox]
shutdown_policy = "Delete"
```

3. Start the server:

```shell
opensandbox-server
```

## Run the example

```shell
uv pip install opensandbox
uv run python examples/agent-sandbox/main.py
```

## Expected output

```text
command output: hello world
```


================================================
FILE: examples/agent-sandbox/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
from datetime import timedelta

from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    image = os.getenv("SANDBOX_IMAGE", "ubuntu:22.04")

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(seconds=60),
    )

    sandbox = await Sandbox.create(
        image,
        connection_config=config,
        timeout=timedelta(minutes=10),
    )

    async with sandbox:
        execution = await sandbox.commands.run("echo hello world")
        stdout = execution.logs.stdout[0].text if execution.logs.stdout else ""
        print(f"command output: {stdout}")
        await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/aio-sandbox/README.md
================================================
# All-in-One (AIO) Sandbox Example

This example demonstrates how to create and access an [All-in-One (AIO) Sandbox](https://github.com/agent-infra/sandbox) via OpenSandbox.

## Start OpenSandbox server [local]

You can find the latest version [here](https://github.com/agent-infra/sandbox/pkgs/container/sandbox).

You can pre-pull the target image which is used in the example.

### Notes (Docker runtime requirement)

The server is configured with `runtime.type = "docker"` by default, so it **must** be able to connect to a running Docker daemon.

- **Docker Desktop**: ensure Docker Desktop is running, then verify with `docker version`.
- **Colima (macOS)**: start it first (`colima start`) and export the socket before starting the server:

```shell
export DOCKER_HOST="unix://${HOME}/.colima/default/docker.sock"
```


```shell
# pre-pull target image
docker pull ghcr.io/agent-infra/sandbox:latest
```

Then, start the OpenSandbox server, you can obtain stdout log from terminal.

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```
> Note: `opensandbox-server` runs in the foreground and will keep the current terminal session busy. The example code lives in this repository—clone it and, in a new terminal window/tab, `cd` into the project root before running the AIO sandbox creation steps below.
If you see errors like `FileNotFoundError: [Errno 2] No such file or directory` from `docker/transport/unixconn.py`, it usually means the Docker unix socket is missing / Docker daemon is not running.

## Create and Access the AIO Sandbox Instance

This example uses a fixed configuration for quick start:
- OpenSandbox server: `http://localhost:8080`
- Image: `ghcr.io/agent-infra/sandbox:latest`
- AIO port: `8080`
- Timeout: `300s`

Install dependencies with uv under project root:
```shell
uv pip install opensandbox agent-sandbox==0.0.18
```

Run the example (it will create a sandbox via OpenSandbox, wait until it's Running, then connect to it via agent-sandbox):
```shell
uv run python examples/aio-sandbox/main.py
```

Subsequently, you will instantiate an AIO sandbox, navigate to Google, capture a screenshot, and download it to your local environment.

```text
Creating AIO sandbox with image=ghcr.io/agent-infra/sandbox:latest on OpenSandbox server http://localhost:8080...
[check] sandbox ready after 7.1s
AIO portal endpoint: 127.0.0.1:56123
total 52
drwxr-x--- 10 gem  gem  4096 Dec 15 13:22 .
drwxr-xr-x  1 root root 4096 Dec 15 13:22 ..
-rw-r--r--  1 gem  gem   220 Jan  7  2022 .bash_logout
-rw-r--r--  1 gem  gem    27 Dec 15 13:22 .bashrc
drwxr-xr-x  5 gem  gem  4096 Dec 15 13:22 .cache
drwxrwxr-x  6 gem  gem  4096 Dec 15 13:22 .config
drwxr-xr-x  2 gem  gem  4096 Dec 15 13:22 .ipython
drwxr-xr-x  4 gem  gem  4096 Dec 15 13:22 .jupyter
drwxrwxr-x  4 gem  gem  4096 Dec 15 13:22 .local
drwxr-xr-x  3 gem  gem  4096 Dec 15 13:22 .npm
drwxrwxr-x  3 gem  gem  4096 Dec 15 13:22 .npm-global
drwx------  3 gem  gem  4096 Dec 15 13:22 .pki
-rw-r--r--  1 gem  gem   807 Jan  7  2022 .profile
-rw-rw-r--  1 gem  gem     0 Dec 15 13:22 .Xauthority
export TERM=xterm-256color

Screenshot saved to sandbox_screenshot.png
```

## More examples

For more examples of using the AIO Sandbox, refer to agents-infra/sandbox [examples](https://github.com/agent-infra/sandbox/tree/main/examples).

## References
- [AIO Sandbox](https://github.com/agent-infra/sandbox/tree/main)
- [AIO Sandbox Python SDK](https://github.com/agent-infra/sandbox/tree/main/sdk/python)


================================================
FILE: examples/aio-sandbox/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Create an AIO sandbox via OpenSandbox SDK, then connect to it with agent-sandbox SDK.

This example is intentionally hard-coded for simplicity:
- OpenSandbox server: http://localhost:8080
- Image: ghcr.io/agent-infra/sandbox:latest
- AIO port: 8080
- Timeout: 300s
"""

import time
from datetime import timedelta

import requests
from agent_sandbox import Sandbox as AioSandboxClient
from opensandbox import SandboxSync
from opensandbox.config import ConnectionConfigSync


def check_aio_process(sbx: SandboxSync) -> bool:
    """
    Health check: poll aio process at /v1/shell/sessions until it returns 200.

    Returns:
        True  when ready
        False on timeout or any exception
    """
    try:
        endpoint = sbx.get_endpoint(8080)
        start = time.perf_counter()
        url = f"http://{endpoint.endpoint}/v1/shell/sessions"
        for _ in range(150):  # max for ~30s
            try:
                resp = requests.get(url, timeout=1)
                if resp.status_code == 200:
                    elapsed = time.perf_counter() - start
                    print(f"[check] sandbox ready after {elapsed:.1f}s")
                    return True
            except Exception as exc:
                # print(f"[check] aio sandbox check health failed: {exc}")
                pass
            time.sleep(0.2)
        return False
    except Exception as exc:
        print(f"[check] failed: {exc}")
        return False


def main() -> None:
    server = "http://localhost:8080"
    image = "ghcr.io/agent-infra/sandbox:latest"
    timeout_seconds = 300

    print(f"Creating AIO sandbox with image={image} on OpenSandbox server {server}...")
    sandbox = SandboxSync.create(
        image=image,
        timeout=timedelta(seconds=timeout_seconds),
        metadata={"example": "aio-sandbox"},
        entrypoint=["/opt/gem/run.sh"],
        connection_config=ConnectionConfigSync(domain=server),
        health_check=check_aio_process,
    )

    with sandbox:
        endpoint = sandbox.get_endpoint(8080)
        print(f"AIO portal endpoint: {endpoint.endpoint}")

        client = AioSandboxClient(base_url=f"http://{endpoint.endpoint}")
        home_dir = client.sandbox.get_context().home_dir

        result = client.shell.exec_command(command="ls -la", timeout=10)
        print(result.data.output)

        content = client.file.read_file(file=f"{home_dir}/.bashrc")
        print(content.data.content)

        screenshot_path = "sandbox_screenshot.png"
        with open(screenshot_path, "wb") as f:
            for chunk in client.browser.screenshot():
                f.write(chunk)
        print(f"Screenshot saved to {screenshot_path}")

        # kill sandbox finally
        sandbox.kill()


if __name__ == "__main__":
    main()


================================================
FILE: examples/chrome/Dockerfile
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM golang:1.25.4 AS builder

WORKDIR /build

COPY go.mod go.sum ./

RUN go mod download

COPY . .

RUN CGO_ENABLED=0 go build -o /build/entrypoint main.go

#----------------------
# Use a base image with a minimal set of packages.
FROM debian:13-slim

#----------------------
# Install prerequisites, chromium, VNC, and X11 utilities.
RUN set -eux; \
    apt-get update; \
    apt-get install -y --no-install-recommends \
      ca-certificates \
      wget \
      xdg-utils \
      chromium \
      tigervnc-standalone-server \
      x11-utils; \
    rm -rf /var/lib/apt/lists/*

# Create a non-root user to run Chrome.
RUN groupadd -r chrome && useradd -r -g chrome -G audio,video chrome \
    && mkdir -p /home/chrome/Downloads && chown -R chrome:chrome /home/chrome

# Precreate X11 stuff
RUN mkdir -p /tmp/.X11-unix
RUN chmod 1777 /tmp/.X11-unix

COPY --chmod=a+rx chrome.sh /chrome.sh
COPY --from=builder --chmod=a+rx /build/entrypoint /entrypoint

WORKDIR /home/chrome

USER chrome


ENTRYPOINT [ "/entrypoint" ]


================================================
FILE: examples/chrome/README.md
================================================
# Chrome Browser in OpenSandbox

This example runs Chrome Browser with OpenSandbox runtime.

The image starts a VNC server (`Xtigervnc :1`) and launches Chromium with remote debugging enabled on port `9222`.

## Getting Chrome image

You can build the image from source or pull it from Docker Hub.

### Build from source

```shell
docker build -t opensandbox/chrome .
```

### Pull an existing image

```shell
docker pull opensandbox/chrome:latest

# use acr from china
# docker pull sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/chrome:latest
```

## Start OpenSandbox server

Start the OpenSandbox server and tail stdout from the terminal:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

## Create and access a Chrome sandbox

Build/pull the image above, then create a sandbox with image `opensandbox/chrome:latest` and an entrypoint that keeps it
alive (e.g., `["/bin/sh", "-c", "sleep infinity"]`), or reuse `tail -f /dev/null`. Make sure the runtime exposes ports
`5901` and `9222` for VNC/DevTools.

```shell
uv pip install opensandbox
uv run python examples/chrome/main.py
```

Then fetch endpoints for 5901/9222 to connect with a VNC client or DevTools, like:

```text
execd daemon running with endpoint='127.0.0.1:48379/proxy/44772'
VNC running with endpoint='127.0.0.1:48379/proxy/5901'
DevTools running with endpoint='127.0.0.1:48379/proxy/9222'/json
```

```text
[ {
   "description": "",
   "devtoolsFrontendUrl": "https://chrome-devtools-frontend.appspot.com/serve_rev/@71a0dbd6672e2ccb6d1008376cbb7acd315cb8d6/inspector.html?ws=127.0.0.1:52302/devtools/page/2215AF60AC345E4BA6D822389CFC743B",
   "faviconUrl": "https://www.gstatic.com/images/branding/searchlogo/ico/favicon.ico",
   "id": "2215AF60AC345E4BA6D822389CFC743B",
   "title": "Google",
   "type": "page",
   "url": "https://www.google.com.hk/",
   "webSocketDebuggerUrl": "ws://127.0.0.1:52302/devtools/page/2215AF60AC345E4BA6D822389CFC743B"
} ]
```

Or you can use it by MCP client, more information please refer
to: [chrome-devtools-mcp](https://github.com/ChromeDevTools/chrome-devtools-mcp).

## Reference

- [chrome-devtools-mcp](https://github.com/ChromeDevTools/chrome-devtools-mcp)


================================================
FILE: examples/chrome/build.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

TAG=${TAG:-latest}

docker buildx rm chrome-builder || true

docker buildx create --use --name chrome-builder

docker buildx inspect --bootstrap

docker buildx ls

docker buildx build \
  -t opensandbox/chrome:${TAG} \
  -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/chrome:${TAG} \
  --platform linux/amd64,linux/arm64 \
  --push \
  .


================================================
FILE: examples/chrome/chrome.sh
================================================
#!/bin/bash

# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -euo pipefail

# There are a lot of interesting flags we could use here: https://github.com/microsoft/playwright/blob/20023ab33a1dc04db2d5a3f753760eef33339e73/packages/playwright-core/src/server/chromium/chromiumSwitches.ts#L47
flags=()

flags+=(--no-sandbox) # We can't use sandbox in a container

flags+=(--disable-gpu)           # We don't (normally) have a GPU
flags+=(--disable-dev-shm-usage) # We don't (normally) have a shared memory filesystem

flags+=(--no-default-browser-check) # Avoids hanging with a "set chrome as default browser" dialog
flags+=(--no-first-run)             # Avoids hanging with a "set chrome as default browser" dialog

flags+=(--start-maximized) # We're the only thing running, use the whole screen

flags+=(--disable-field-trial-config) # Keeps things consistent and a little faster (?)

flags+=(--remote-debugging-port=9222)     # Enable remote debugging
flags+=(--user-data-dir=/tmp/chrome-data) # DevTools remote debugging requires a non-default data directory. Specify this using --user-data-dir.

# Launch Chrome
exec chromium "${flags[@]}" "https://www.google.com"


================================================
FILE: examples/chrome/go.mod
================================================
module github.com/alibaba/opensandbox/chrome-box

go 1.22


================================================
FILE: examples/chrome/go.sum
================================================


================================================
FILE: examples/chrome/main.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"log"
	"net/http"
	"os"
	"os/exec"
	"strings"
	"time"
)

func main() {
	ctx := context.Background()
	if err := run(ctx); err != nil {
		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
		os.Exit(1)
	}
}

func run(ctx context.Context) error {

	ctx, cancel := context.WithCancel(ctx)
	defer cancel()

	vnc := &VNCServer{}

	errs := make(chan error, 10)

	go func() {
		if err := vnc.Run(ctx); err != nil {
			log.Println(err, "VNC server exited with error")
			errs <- fmt.Errorf("VNC server exited with error: %w", err)
			cancel()
		}
	}()

	if err := vnc.WaitForReady(ctx); err != nil {
		return fmt.Errorf("failed to wait for VNC server: %w", err)
	}

	chrome := &Chrome{}
	go func() {
		if err := chrome.Run(ctx); err != nil {
			log.Println(err, "Chrome exited with error")
			errs <- fmt.Errorf("Chrome exited with error: %w", err)
			cancel()
		}
	}()

	if err := chrome.WaitForReady(ctx); err != nil {
		return fmt.Errorf("failed to wait for Chrome: %w", err)
	}
	log.Println("Chrome and VNC server are running")

	<-ctx.Done()
	errs <- ctx.Err()

	// Return the first error (or nil))
	return <-errs
}

type Chrome struct {
}

func (c *Chrome) Run(ctx context.Context) error {
	cmd := exec.CommandContext(ctx, "/chrome.sh")
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr

	var env []string
	for _, e := range os.Environ() {
		if strings.HasPrefix(e, "DISPLAY=") {
			continue
		}
		env = append(env, e)
	}
	env = append(env, "DISPLAY=:1")
	cmd.Env = env

	return cmd.Run()
}

func (c *Chrome) WaitForReady(ctx context.Context) error {
	u := "http://localhost:9222/json/version"

	httpClient := &http.Client{}
	httpClient.Timeout = 200 * time.Millisecond

	for {
		if ctx.Err() != nil {
			return ctx.Err()
		}

		req, err := http.NewRequestWithContext(ctx, "GET", u, nil)
		if err != nil {
			return fmt.Errorf("failed to create HTTP request: %w", err)
		}

		// Send the HTTP request
		response, err := httpClient.Do(req)
		if err != nil {
			log.Println("Waiting for Chrome to be ready", "url", u, "info", err)
			time.Sleep(100 * time.Millisecond)
			continue
		}
		defer response.Body.Close()

		// Check for HTTP 200 OK
		if response.StatusCode != http.StatusOK {
			log.Println("Waiting for Chrome to be ready", "url", u, "status", response.Status)
			time.Sleep(100 * time.Millisecond)
			continue
		}

		b, err := io.ReadAll(response.Body)
		if err != nil {
			log.Println("Waiting for Chrome to be ready", "url", u, "info", err)
			time.Sleep(100 * time.Millisecond)
			continue
		}

		log.Println("Chrome is ready", "url", u, "response", string(b))
		break
	}
	return nil
}

type VNCServer struct {
}

func (v *VNCServer) Run(ctx context.Context) error {
	cmd := exec.CommandContext(ctx, "Xtigervnc", ":1", "-geometry", "1280x1024")
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr

	log.Println("Starting VNC server", "command", cmd.String())
	if err := cmd.Start(); err != nil {
		return fmt.Errorf("failed to start VNC server: %w", err)
	}

	go func() {
		<-ctx.Done()
		if err := cmd.Process.Kill(); err != nil {
			log.Println(err, "failed to kill VNC server")
		}
	}()

	if err := cmd.Wait(); err != nil {
		return fmt.Errorf("VNC server exited with error: %w", err)
	}

	return nil
}

func (v *VNCServer) WaitForReady(ctx context.Context) error {
	for {
		if ctx.Err() != nil {
			return ctx.Err()
		}

		cmd := exec.CommandContext(ctx, "xdpyinfo", "-display", ":1")
		var stdout bytes.Buffer
		cmd.Stdout = &stdout
		var stderr bytes.Buffer
		cmd.Stderr = &stderr
		if err := cmd.Run(); err != nil {
			log.Println("Waiting for VNC server to be ready", "info", err)
			time.Sleep(100 * time.Millisecond)
			continue
		}

		log.Println("VNC is ready")
		break
	}
	return nil
}


================================================
FILE: examples/chrome/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
from datetime import timedelta

from opensandbox.sandbox import Sandbox
from opensandbox.config import ConnectionConfig
from opensandbox.exceptions import SandboxException

async def main():
    try:
        sandbox = await Sandbox.create(
            image="opensandbox/chrome:latest",
            timeout=timedelta(minutes=5),
            entrypoint=["/entrypoint"],
            metadata={"examples.opensandbox.io": "chrome"},
            connection_config=ConnectionConfig(
                domain="localhost:8080"
            )
        )

        # Got execd process endpoint
        execd = await sandbox.get_endpoint(44772)
        print(f"execd daemon running with {execd.endpoint}")

        vnc = await sandbox.get_endpoint(5901)
        print(f"VNC running with {vnc.endpoint}")

        devtools = await sandbox.get_endpoint(9222)
        print(f"DevTools running with {devtools.endpoint}/json")

    except SandboxException as e:
        # Handle Sandbox specific exceptions
        print(f"Sandbox Error: [{e.error.code}] {e.error.message}")
    except Exception as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/claude-code/README.md
================================================
# Claude Code Example

Access Claude via the `claude-cli` npm package in OpenSandbox.

## Start OpenSandbox server [local]

Pre-pull the code-interpreter image (includes Node.js):

```shell
docker pull sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2

# use docker hub
# docker pull opensandbox/code-interpreter:v1.0.2
```

Then start the local OpenSandbox server, stdout logs will be visible in the terminal:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

## Create and Access the Claude Sandbox

```shell
# Install OpenSandbox package
uv pip install opensandbox

# Run the example (requires SANDBOX_DOMAIN / SANDBOX_API_KEY / ANTHROPIC_AUTH_TOKEN)
uv run python examples/claude-code/main.py
```

The script installs the Claude CLI (`npm i -g @anthropic-ai/claude-code@latest`) at runtime (Node.js is already in the code-interpreter image), then sends a simple request `claude "Compute 1+1=?."`. Auth is passed via `ANTHROPIC_AUTH_TOKEN`, and you can override endpoint/model with `ANTHROPIC_BASE_URL` / `ANTHROPIC_MODEL`.

![Claude Code screenshot](./screenshot.jpg)

## Environment Variables

- `SANDBOX_DOMAIN`: Sandbox service address (default: `localhost:8080`)
- `SANDBOX_API_KEY`: API key if your server requires authentication (optional for local)
- `SANDBOX_IMAGE`: Sandbox image to use (default: `sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2`)
- `ANTHROPIC_AUTH_TOKEN`: Your Anthropic auth token (required)
- `ANTHROPIC_BASE_URL`: Anthropic API endpoint (optional; e.g., self-hosted proxy)
- `ANTHROPIC_MODEL`: Model name (default: `claude_sonnet4`)

## References
- [claude-code](https://www.npmjs.com/package/claude-code) - NPM package for Claude Code CLI


================================================
FILE: examples/claude-code/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
from datetime import timedelta

from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


def _required_env(name: str) -> str:
    value = os.getenv(name)
    if not value:
        raise RuntimeError(f"{name} is required")
    return value


async def _print_execution_logs(execution) -> None:
    for msg in execution.logs.stdout:
        print(f"[stdout] {msg.text}")
    for msg in execution.logs.stderr:
        print(f"[stderr] {msg.text}")
    if execution.error:
        print(f"[error] {execution.error.name}: {execution.error.value}")


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    claude_auth_token = _required_env("ANTHROPIC_AUTH_TOKEN")
    claude_base_url = os.getenv("ANTHROPIC_BASE_URL")
    claude_model_name = os.getenv("ANTHROPIC_MODEL", "claude_sonnet4")
    image = os.getenv(
        "SANDBOX_IMAGE",
        "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2",
    )

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(seconds=60),
    )

    # Inject Claude settings into container environment for CLI access
    env = {
        "ANTHROPIC_AUTH_TOKEN": claude_auth_token,
        "ANTHROPIC_BASE_URL": claude_base_url,
        "ANTHROPIC_MODEL": claude_model_name,
        "IS_SANDBOX": "1",
    }
    # Drop None values to avoid overriding defaults inside CLI
    env = {k: v for k, v in env.items() if v is not None}

    sandbox = await Sandbox.create(
        image,
        connection_config=config,
        env=env,
    )

    async with sandbox:
        # Install Claude CLI (Node.js is already in the code-interpreter image)
        install_exec = await sandbox.commands.run(
            "npm i -g @anthropic-ai/claude-code@latest"
        )
        await _print_execution_logs(install_exec)

        # Use Claude CLI to send a message
        run_exec = await sandbox.commands.run(
            'claude "Compute 1+1=?."'
        )
        await _print_execution_logs(run_exec)

        await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/code-interpreter/README.md
================================================
# Code Interpreter Sandbox

Complete demonstration of running Python code using the Code Interpreter SDK.

## Getting Code Interpreter image

Pull the prebuilt image from a registry:

```shell
docker pull sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2

# use docker hub
# docker pull opensandbox/code-interpreter:v1.0.2
```

## Start OpenSandbox server [local]

Start the local OpenSandbox server:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

## Create and access the Code Interpreter Sandbox

```shell
# Install OpenSandbox packages
uv pip install opensandbox opensandbox-code-interpreter

# Run the example (requires SANDBOX_DOMAIN / SANDBOX_API_KEY)
uv run python examples/code-interpreter/main.py
```

The script creates a Sandbox + CodeInterpreter, runs a Python code snippet and prints stdout/result, then terminates the remote instance.

## Environment variables

- `SANDBOX_DOMAIN`: Sandbox service address (default: `localhost:8080`)
- `SANDBOX_API_KEY`: API key if your server requires authentication
- `SANDBOX_IMAGE`: Sandbox image to use (default: `sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2`)

## Example output

```text
=== Python example ===
[Python stdout] Hello from Python!

[Python result] {'py': '3.14.2', 'sum': 4}

=== Java example ===
[Java stdout] Hello from Java!

[Java stdout] 2 + 3 = 5

[Java result] 5

=== Go example ===
[Go stdout] Hello from Go!
3 + 4 = 7


=== TypeScript example ===
[TypeScript stdout] Hello from TypeScript!

[TypeScript stdout] sum = 6
```

# Code Interpreter Sandbox from pool

## Start OpenSandbox server [k8s]

Install the k8s OpenSandbox operator, and create a pool:
```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  labels:
    app.kubernetes.io/name: sandbox-k8s
    app.kubernetes.io/managed-by: kustomize
  name: pool-sample
  namespace: opensandbox
spec:
  template:
    metadata:
      labels:
        app: example
    spec:
      volumes:
        - name: sandbox-storage
          emptyDir: { }
        - name: opensandbox-bin
          emptyDir: { }
      initContainers:
        - name: task-executor-installer
          image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/task-executor:v0.1.0
          command: [ "/bin/sh", "-c" ]
          args:
            - |
              cp /workspace/server /opt/opensandbox/bin/task-executor && 
              chmod +x /opt/opensandbox/bin/task-executor
          volumeMounts:
            - name: opensandbox-bin
              mountPath: /opt/opensandbox/bin
        - name: execd-installer
          image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.7
          command: [ "/bin/sh", "-c" ]
          args:
            - |
              cp ./execd /opt/opensandbox/bin/execd && 
              cp ./bootstrap.sh /opt/opensandbox/bin/bootstrap.sh &&
              chmod +x /opt/opensandbox/bin/execd &&
              chmod +x /opt/opensandbox/bin/bootstrap.sh
          volumeMounts:
            - name: opensandbox-bin
              mountPath: /opt/opensandbox/bin
      containers:
        - name: sandbox
          image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2
          command:
          - "/bin/sh"
          - "-c"
          - |
            /opt/opensandbox/bin/task-executor -listen-addr=0.0.0.0:5758 >/tmp/task-executor.log 2>&1
          env:
          - name: SANDBOX_MAIN_CONTAINER
            value: main
          - name: EXECD_ENVS
            value: /opt/opensandbox/.env
          - name: EXECD
            value: /opt/opensandbox/bin/execd
          volumeMounts:
            - name: sandbox-storage
              mountPath: /var/lib/sandbox
            - name: opensandbox-bin
              mountPath: /opt/opensandbox/bin
      tolerations:
        - operator: "Exists"
  capacitySpec:
    bufferMax: 3
    bufferMin: 1
    poolMax: 5
    poolMin: 0
```

Start the k8s OpenSandbox server:

```shell
uv pip install opensandbox-server

# replace with your k8s cluster config, kubeconfig etc.
opensandbox-server init-config ~/.sandbox.toml --example k8s
curl -o ~/batchsandbox-template.yaml https://raw.githubusercontent.com/alibaba/OpenSandbox/main/server/example.batchsandbox-template.yaml

opensandbox-server
```

## Create and access the Code Interpreter Sandbox

```shell
# Install OpenSandbox packages
uv pip install opensandbox opensandbox-code-interpreter

# Run the example (requires SANDBOX_DOMAIN / SANDBOX_API_KEY)
uv run python examples/code-interpreter/main_use_pool.py
```

The script creates a Sandbox + CodeInterpreter, runs a Python code snippet and prints stdout/result, then terminates the remote instance.

## Environment variables

- `SANDBOX_DOMAIN`: Sandbox service address (default: `localhost:8080`)
- `SANDBOX_API_KEY`: API key if your server requires authentication
- `SANDBOX_IMAGE`: Sandbox image to use (default: `sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2`)

## Example output

```text
=== Verify Environment Variable ===
[ENV Check] TEST_ENV value: test

[ENV Result] 'test'

=== Java example ===
[Java stdout] Hello from Java!

[Java stdout] 2 + 3 = 5

[Java result] 5

=== Go example ===
[Go stdout] Hello from Go!
3 + 4 = 7


=== TypeScript example ===
[TypeScript stdout] Hello from TypeScript!

[TypeScript stdout] sum = 6
```


================================================
FILE: examples/code-interpreter/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
from datetime import timedelta

from code_interpreter import CodeInterpreter, SupportedLanguage
from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    image = os.getenv(
        "SANDBOX_IMAGE",
        "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2",
    )

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(seconds=60),
    )

    sandbox = await Sandbox.create(
        image,
        connection_config=config,
        entrypoint=["/opt/opensandbox/code-interpreter.sh"]
    )

    async with sandbox:
        interpreter = await CodeInterpreter.create(sandbox=sandbox)

        # Python example: show runtime info and return a simple calculation.
        py_exec = await interpreter.codes.run(
            "import platform\n"
            "print('Hello from Python!')\n"
            "result = {'py': platform.python_version(), 'sum': 2 + 2}\n"
            "result",
            language=SupportedLanguage.PYTHON,
        )
        print("\n=== Python example ===")
        for msg in py_exec.logs.stdout:
            print(f"[Python stdout] {msg.text}")
        if py_exec.result:
            for res in py_exec.result:
                print(f"[Python result] {res.text}")

        # Java example: print to stdout and return the final result line.
        java_exec = await interpreter.codes.run(
            "System.out.println(\"Hello from Java!\");\n"
            "int result = 2 + 3;\n"
            "System.out.println(\"2 + 3 = \" + result);\n"
            "result",
            language=SupportedLanguage.JAVA,
        )
        print("\n=== Java example ===")
        for msg in java_exec.logs.stdout:
            print(f"[Java stdout] {msg.text}")
        if java_exec.result:
            for res in java_exec.result:
                print(f"[Java result] {res.text}")
        if java_exec.error:
            print(f"[Java error] {java_exec.error.name}: {java_exec.error.value}")

        # Go example: print logs and demonstrate a main function structure.
        go_exec = await interpreter.codes.run(
            "package main\n"
            "import \"fmt\"\n"
            "func main() {\n"
            "    fmt.Println(\"Hello from Go!\")\n"
            "    sum := 3 + 4\n"
            "    fmt.Println(\"3 + 4 =\", sum)\n"
            "}",
            language=SupportedLanguage.GO,
        )
        print("\n=== Go example ===")
        for msg in go_exec.logs.stdout:
            print(f"[Go stdout] {msg.text}")
        if go_exec.error:
            print(f"[Go error] {go_exec.error.name}: {go_exec.error.value}")

        # TypeScript example: use typing and sum an array.
        ts_exec = await interpreter.codes.run(
            "console.log('Hello from TypeScript!');\n"
            "const nums: number[] = [1, 2, 3];\n"
            "console.log('sum =', nums.reduce((a, b) => a + b, 0));",
            language=SupportedLanguage.TYPESCRIPT,
        )
        print("\n=== TypeScript example ===")
        for msg in ts_exec.logs.stdout:
            print(f"[TypeScript stdout] {msg.text}")
        if ts_exec.error:
            print(f"[TypeScript error] {ts_exec.error.name}: {ts_exec.error.value}")

        await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/code-interpreter/main_use_pool.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
from datetime import timedelta

from code_interpreter import CodeInterpreter, SupportedLanguage
from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    image = os.getenv(
        "SANDBOX_IMAGE",
        "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2",
    )

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(seconds=60),
    )

    sandbox = await Sandbox.create(
        image,
        connection_config=config,
        extensions={"poolRef":"pool-sample"},
        entrypoint=["/opt/opensandbox/code-interpreter.sh"],
        env={
            "TEST_ENV": "test",
        },
    )

    async with sandbox:
        interpreter = await CodeInterpreter.create(sandbox=sandbox)

        # Verify environment variable is set
        print("\n=== Verify Environment Variable ===")
        env_check = await interpreter.codes.run(
            "import os\n"
            "test_env = os.getenv('TEST_ENV', 'NOT_SET')\n"
            "print(f'TEST_ENV value: {test_env}')\n"
            "test_env",
            language=SupportedLanguage.PYTHON,
        )
        for msg in env_check.logs.stdout:
            print(f"[ENV Check] {msg.text}")
        if env_check.result:
            for res in env_check.result:
                print(f"[ENV Result] {res.text}")

        # Java example: print to stdout and return the final result line.
        java_exec = await interpreter.codes.run(
            "System.out.println(\"Hello from Java!\");\n"
            "int result = 2 + 3;\n"
            "System.out.println(\"2 + 3 = \" + result);\n"
            "result",
            language=SupportedLanguage.JAVA,
        )
        print("\n=== Java example ===")
        for msg in java_exec.logs.stdout:
            print(f"[Java stdout] {msg.text}")
        if java_exec.result:
            for res in java_exec.result:
                print(f"[Java result] {res.text}")
        if java_exec.error:
            print(f"[Java error] {java_exec.error.name}: {java_exec.error.value}")

        # Go example: print logs and demonstrate a main function structure.
        go_exec = await interpreter.codes.run(
            "package main\n"
            "import \"fmt\"\n"
            "func main() {\n"
            "    fmt.Println(\"Hello from Go!\")\n"
            "    sum := 3 + 4\n"
            "    fmt.Println(\"3 + 4 =\", sum)\n"
            "}",
            language=SupportedLanguage.GO,
        )
        print("\n=== Go example ===")
        for msg in go_exec.logs.stdout:
            print(f"[Go stdout] {msg.text}")
        if go_exec.error:
            print(f"[Go error] {go_exec.error.name}: {go_exec.error.value}")

        # TypeScript example: use typing and sum an array.
        ts_exec = await interpreter.codes.run(
            "console.log('Hello from TypeScript!');\n"
            "const nums: number[] = [1, 2, 3];\n"
            "console.log('sum =', nums.reduce((a, b) => a + b, 0));",
            language=SupportedLanguage.TYPESCRIPT,
        )
        print("\n=== TypeScript example ===")
        for msg in ts_exec.logs.stdout:
            print(f"[TypeScript stdout] {msg.text}")
        if ts_exec.error:
            print(f"[TypeScript error] {ts_exec.error.name}: {ts_exec.error.value}")

        await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/codex-cli/README.md
================================================
# Codex/OpenAI CLI Example

Use the official `@openai/codex` npm package to call OpenAI/Codex-like models in OpenSandbox.

## Start OpenSandbox server [local]

Pre-pull the code-interpreter image (includes Node.js):

```shell
docker pull sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2

# use docker hub
# docker pull opensandbox/code-interpreter:v1.0.2
```

Start the local OpenSandbox server, logs will be visible in the terminal:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

## Create and Access the Codex Sandbox

```shell
# Install OpenSandbox package
uv pip install opensandbox

# Run the example (requires SANDBOX_DOMAIN / SANDBOX_API_KEY / OPENAI_API_KEY)
uv run python examples/codex-cli/main.py
```

The script installs the Codex CLI (`npm install -g @openai/codex@latest`) at runtime (Node.js is already in the code-interpreter image), then executes a simple request `codex exec "Compute 1+1 and return JSON with keys result and reasoning." --skip-git-repo-check`. Auth is passed via `OPENAI_API_KEY`; you can override endpoint/model with `OPENAI_BASE_URL` / `OPENAI_MODEL`.

## Environment Variables

- `SANDBOX_DOMAIN`: Sandbox service address (default: `localhost:8080`)
- `SANDBOX_API_KEY`: API key if your server requires authentication (optional for local)
- `SANDBOX_IMAGE`: Sandbox image to use (default: `sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2`)
- `OPENAI_API_KEY`: Your OpenAI API key (required)
- `OPENAI_BASE_URL`: OpenAI API endpoint (default: `https://api.openai.com/v1`)
- `OPENAI_MODEL`: Model to use (default: `gpt-4o-mini`)

## References
- [@openai/codex](https://www.npmjs.com/package/@openai/codex) - Official OpenAI Codex CLI


================================================
FILE: examples/codex-cli/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
from datetime import timedelta

from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


def _required_env(name: str) -> str:
    value = os.getenv(name)
    if not value:
        raise RuntimeError(f"{name} is required")
    return value


async def _print_execution_logs(execution) -> None:
    for msg in execution.logs.stdout:
        print(f"[stdout] {msg.text}")
    for msg in execution.logs.stderr:
        print(f"[stderr] {msg.text}")
    if execution.error:
        print(f"[error] {execution.error.name}: {execution.error.value}")


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    openai_api_key = _required_env("OPENAI_API_KEY")
    openai_base_url = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
    openai_model = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
    image = os.getenv(
        "SANDBOX_IMAGE",
        "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2",
    )

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(seconds=60),
    )

    # Inject OpenAI settings into container environment for CLI access
    env = {
        "OPENAI_API_KEY": openai_api_key,
        "OPENAI_BASE_URL": openai_base_url,
        "OPENAI_MODEL": openai_model,
    }
    # Drop None values to avoid overriding defaults inside CLI
    env = {k: v for k, v in env.items() if v is not None}

    sandbox = await Sandbox.create(
        image,
        connection_config=config,
        env=env,
    )

    async with sandbox:
        # Install Codex CLI (Node.js is already in the code-interpreter image)
        install_exec = await sandbox.commands.run(
            "npm install -g @openai/codex@latest"
        )
        await _print_execution_logs(install_exec)

        # Use Codex CLI to execute a command
        run_exec = await sandbox.commands.run(
            'codex exec "Compute 1+1=?." --skip-git-repo-check'
        )
        await _print_execution_logs(run_exec)

        await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/desktop/Dockerfile
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM ubuntu:22.04

# Default to English; install locale support first, then other deps
ENV DEBIAN_FRONTEND=noninteractive \
    LANG=en_US.UTF-8 \
    LC_ALL=en_US.UTF-8 \
    LANGUAGE=en_US:en

#----------------------
# Install locales first, then remaining dependencies
RUN apt-get update \
    && apt-get install -y locales \
    && locale-gen en_US.UTF-8 \
    && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 LANGUAGE=en_US:en \
    && apt-get install -y \
        python3 \
        python3-pip \
        python3-websockify \
        xvfb \
        x11vnc \
        xfce4 \
        xfce4-terminal \
        dbus-x11 \
        xterm \
        novnc \
        fonts-dejavu-core \
        net-tools \
        ca-certificates \
        --no-install-recommends \
    && sed -i 's/DEFAULT_LOCALE = null;/DEFAULT_LOCALE = "en";/' /usr/share/novnc/app/localization.js \
    && rm -rf /var/lib/apt/lists/*

# Precreate X11 stuff
RUN mkdir -p /tmp/.X11-unix
RUN chmod 1777 /tmp/.X11-unix

#----------------------
# Create a non-root user
RUN groupadd -r desktop && useradd -r -g desktop -G audio,video desktop \
    && mkdir -p /home/desktop && chown -R desktop:desktop /home/desktop

#----------------------
# Configure user, etc

WORKDIR /home/desktop

USER desktop

# Default to bash
CMD ["bash"]


================================================
FILE: examples/desktop/README.md
================================================
# Desktop(VNC) Example

Launch Xvfb + x11vnc + fluxbox in OpenSandbox to provide a VNC-accessible desktop environment.

## Build the Desktop Sandbox Image

The Dockerfile in this directory builds a sandbox image with desktop and VNC components pre-installed:

```shell
cd examples/desktop
docker build -t opensandbox/desktop:latest .
```

This image includes:
- Xvfb (virtual framebuffer X server)
- x11vnc (VNC server)
- XFCE desktop (panel, file manager, terminal)
- Non-root user (desktop) for security

## Start OpenSandbox server [local]

Pre-pull the desktop image:

```shell
docker pull opensandbox/desktop:latest
```

Start the local OpenSandbox server:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

## Create and Access the Desktop Sandbox

```shell
# Install OpenSandbox package
uv pip install opensandbox

uv run python examples/desktop/main.py
```

The script starts the desktop stack (Xvfb + XFCE + x11vnc) and also launches noVNC/websockify. It prints:
- VNC endpoint (`endpoint.endpoint`) for native VNC clients, password from `VNC_PASSWORD` (default: `opensandbox`)
- noVNC URL for browsers (`/vnc.html?host=...&port=...&path=...`)

The sandbox stays alive for 5 minutes by default; interrupt sooner with Ctrl+C. Uses the prebuilt desktop image by default.

![Desktop shell](./screenshot_shell.jpg)
![noVNC connect](./screenshot_connect.jpg)
![noVNC password](./screenshot_password.jpg)
![Desktop UI](./screenshot_desktop.jpg)

## Environment Variables

- `SANDBOX_DOMAIN`: Sandbox service address (default: `localhost:8080`)
- `SANDBOX_API_KEY`: API key if your server requires authentication (optional for local)
- `SANDBOX_IMAGE`: Sandbox image to use (default: `opensandbox/desktop:latest`)
- `VNC_PASSWORD`: Password for VNC access (default: `opensandbox`)

## References

- [noVNC](https://github.com/novnc/noVNC)
- [x11vnc](https://github.com/LibVNC/x11vnc)


================================================
FILE: examples/desktop/build.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

TAG=${TAG:-latest}

docker buildx rm desktop-builder || true

docker buildx create --use --name desktop-builder

docker buildx inspect --bootstrap

docker buildx ls

docker buildx build \
  -t opensandbox/desktop:${TAG} \
  -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/desktop:${TAG} \
  --platform linux/amd64,linux/arm64 \
  --push \
  .


================================================
FILE: examples/desktop/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
from datetime import timedelta

from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig
from opensandbox.models.execd import RunCommandOpts


def _required_env(name: str) -> str:
    value = os.getenv(name)
    if not value:
        raise RuntimeError(f"{name} is required")
    return value


async def _print_logs(label: str, execution) -> None:
    for msg in execution.logs.stdout:
        print(f"[{label} stdout] {msg.text}")
    for msg in execution.logs.stderr:
        print(f"[{label} stderr] {msg.text}")
    if execution.error:
        print(f"[{label} error] {execution.error.name}: {execution.error.value}")


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    image = os.getenv(
        "SANDBOX_IMAGE",
        "opensandbox/desktop:latest",
    )
    python_version = os.getenv("PYTHON_VERSION", "3.11")
    vnc_password = os.getenv("VNC_PASSWORD", "opensandbox")

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(seconds=60),
    )

    sandbox = await Sandbox.create(
        image,
        connection_config=config,
        env={
            "PYTHON_VERSION": python_version,
            "VNC_PASSWORD": vnc_password,
        },
    )

    async with sandbox:
        # Desktop and VNC components are pre-installed in the image, just start them
        # Start virtual display, window manager, and VNC server (in background)
        xvfb_exec = await sandbox.commands.run(
            "Xvfb :0 -screen 0 1280x800x24",
            opts=RunCommandOpts(background=True),
        )
        await _print_logs("xvfb", xvfb_exec)

        # Start XFCE session (provides panel, file manager, terminal)
        xfce_exec = await sandbox.commands.run(
            "DISPLAY=:0 dbus-launch startxfce4",
            opts=RunCommandOpts(background=True),
        )
        await _print_logs("xfce", xfce_exec)

        vnc_exec = await sandbox.commands.run(
            "x11vnc -display :0 "
            "-passwd \"$VNC_PASSWORD\" "
            "-forever -shared -rfbport 5900",
            opts=RunCommandOpts(background=True),
        )
        await _print_logs("x11vnc", vnc_exec)

        # Start noVNC/websockify to expose VNC over WebSocket/HTTP
        novnc_exec = await sandbox.commands.run(
            "/usr/bin/websockify --web=/usr/share/novnc 6080 localhost:5900",
            opts=RunCommandOpts(background=True),
        )
        await _print_logs("novnc", novnc_exec)

        endpoint_vnc = await sandbox.get_endpoint(5900)
        endpoint_novnc = await sandbox.get_endpoint(6080)

        # Build noVNC URL with host/port/path for routed endpoint, e.g., host:port/proxy/6080
        novnc_host_port, novnc_path = endpoint_novnc.endpoint.split("/", 1)
        novnc_host, novnc_port = novnc_host_port.split(":")
        novnc_url = (
            f"http://{endpoint_novnc.endpoint}/vnc.html"
            f"?host={novnc_host}&port={novnc_port}&path={novnc_path}"
        )

        print("\nVNC endpoint (native clients):")
        print(f"  {endpoint_vnc.endpoint}")
        print(f"Password: {vnc_password}")

        print("\nnoVNC (browser):")
        print(f"  {novnc_url}")
        print(f"Password: {vnc_password}")

        print("\nKeeping sandbox alive for 5 minutes. Press Ctrl+C to exit sooner.")
        try:
            await asyncio.sleep(300)
        except KeyboardInterrupt:
            print("Stopping...")
        finally:
            await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/docker-ossfs-volume-mount/README.md
================================================
# Docker OSSFS Volume Mount Example

This example demonstrates how to use the new SDK `ossfs` volume model to mount Alibaba Cloud OSS into sandboxes on Docker runtime.

## What this example covers

1. **Basic read-write mount** on an OSSFS backend.
2. **Cross-sandbox sharing** on the same OSSFS backend path.
3. **Two mounts, different OSS prefixes via `subPath`**.

## Prerequisites

### 1) Start OpenSandbox server (Docker runtime)

Make sure your server host has:

- Linux host OS (OSSFS backend is not supported when OpenSandbox Server runs on Windows)
- `ossfs` installed
- FUSE support enabled
- writable local mount root for OSSFS (default `storage.ossfs_mount_root=/mnt/ossfs`)

`storage.ossfs_mount_root` is **optional** if you use the default `/mnt/ossfs`.
Even with on-demand mounting, the runtime still needs a deterministic host-side
base directory to place dynamic mounts (`<mount_root>/<bucket>/<subPath?>`).

Optional config example:

```toml
[runtime]
type = "docker"

[storage]
ossfs_mount_root = "/mnt/ossfs"
```

Then start the server:

```bash
opensandbox-server
```

### 2) Install Python SDK

```bash
uv pip install opensandbox
```

If your PyPI version does not include OSSFS volume models yet, install from source:

```bash
pip install -e sdks/sandbox/python
```

### 3) Prepare OSS credentials and target path

```bash
export SANDBOX_DOMAIN=localhost:8080
export SANDBOX_API_KEY=your-api-key
export SANDBOX_IMAGE=ubuntu

export OSS_BUCKET=your-bucket
export OSS_ENDPOINT=oss-cn-hangzhou.aliyuncs.com
export OSS_ACCESS_KEY_ID=your-ak
export OSS_ACCESS_KEY_SECRET=your-sk
```

## Run

```bash
uv run python examples/docker-ossfs-volume-mount/main.py
```

## Minimal SDK usage snippet

```python
from opensandbox import Sandbox
from opensandbox.models.sandboxes import OSSFS, Volume

sandbox = await Sandbox.create(
    image="ubuntu",
    volumes=[
        Volume(
            name="oss-data",
            ossfs=OSSFS(
                bucket="your-bucket",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                # version="2.0",   # optional, default is "2.0"
                accessKeyId="your-ak",
                accessKeySecret="your-sk",
            ),
            mountPath="/mnt/data",
            subPath="train",      # optional
            readOnly=False,       # optional
        )
    ],
)
```

## Notes

- Current implementation supports **inline credentials only** (`accessKeyId`/`accessKeySecret`).
- Mounting is **on-demand** in Docker runtime (mount-or-reuse), not pre-mounted for all buckets.
- `ossfs.version` exists in API/SDK with enum `"1.0" | "2.0"`, and defaults to `"2.0"` when omitted.
- Docker runtime now applies **version-specific mount argument encoding**:
  - `1.0`: mounts via `ossfs ... -o <option>`.
  - `2.0`: mounts via `ossfs2 mount ... -c <config-file>` where options are written as `--<option>` config lines.
- `options` values must be **raw payloads** without leading `-` (for example: `allow_other`, `umask=0022`).

## References

- [OSEP-0003: Volume and VolumeBinding Support](../../oseps/0003-volume-and-volumebinding-support.md)
- [Sandbox Lifecycle API Spec](../../specs/sandbox-lifecycle.yml)


================================================
FILE: examples/docker-ossfs-volume-mount/README_zh.md
================================================
# Docker OSSFS 挂载示例

本示例演示如何使用新版 SDK 的 `ossfs` volume 模型，在 Docker 运行时将阿里云 OSS 挂载到沙箱容器。

## 覆盖场景

1. **基础读写挂载**（OSSFS backend）。
2. **跨沙箱共享数据**（同一 OSSFS backend path）。
3. **通过 `subPath` 挂载不同 OSS prefix**。

## 前置条件

### 1) 启动 OpenSandbox 服务（Docker runtime）

请确保服务端主机满足：

- Linux 主机系统（OpenSandbox Server 运行在 Windows 时不支持 OSSFS backend）
- 已安装 `ossfs`
- 已启用 FUSE
- 已有可写的 OSSFS 本地挂载根目录（默认 `storage.ossfs_mount_root=/mnt/ossfs`）

`storage.ossfs_mount_root` 是**可选配置**（使用默认值时可不写）。
即使是按需动态挂载，运行时仍需要一个确定的宿主机根目录来放置挂载点：
`<mount_root>/<bucket>/<subPath?>`。

可选配置示例：

```toml
[runtime]
type = "docker"

[storage]
ossfs_mount_root = "/mnt/ossfs"
```

启动服务：

```bash
opensandbox-server
```

### 2) 安装 Python SDK

```bash
uv pip install opensandbox
```

如果当前 PyPI 版本还不包含 OSSFS 相关模型，可从源码安装：

```bash
pip install -e sdks/sandbox/python
```

### 3) 配置 OSS 参数

```bash
export SANDBOX_DOMAIN=localhost:8080
export SANDBOX_API_KEY=your-api-key
export SANDBOX_IMAGE=ubuntu

export OSS_BUCKET=your-bucket
export OSS_ENDPOINT=oss-cn-hangzhou.aliyuncs.com
export OSS_ACCESS_KEY_ID=your-ak
export OSS_ACCESS_KEY_SECRET=your-sk
```

## 运行

```bash
uv run python examples/docker-ossfs-volume-mount/main.py
```

## SDK 最小示例

```python
from opensandbox import Sandbox
from opensandbox.models.sandboxes import OSSFS, Volume

sandbox = await Sandbox.create(
    image="ubuntu",
    volumes=[
        Volume(
            name="oss-data",
            ossfs=OSSFS(
                bucket="your-bucket",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                # version="2.0",   # 可选，默认 "2.0"
                accessKeyId="your-ak",
                accessKeySecret="your-sk",
            ),
            mountPath="/mnt/data",
            subPath="train",      # 可选
            readOnly=False,       # 可选
        )
    ],
)
```

## 说明

- 当前实现仅支持**内联凭据**（`accessKeyId` / `accessKeySecret`）。
- Docker 运行时采用**按需挂载**（mount-or-reuse），不是预挂载所有 bucket。
- API/SDK 中 `ossfs.version` 字段存在，枚举为 `"1.0"` / `"2.0"`，省略时默认 `"2.0"`。
- Docker 运行时已按 `version` 区分挂载参数编码：
  - `1.0`：通过 `ossfs ... -o <option>` 挂载。
  - `2.0`：通过 `ossfs2 mount ... -c <config-file>` 挂载，`options` 以 `--<option>` 配置项写入配置文件。
- `options` 必须是**不带前缀 `-` 的原始参数值**（例如：`allow_other`、`umask=0022`）。

## 参考

- [OSEP-0003: Volume 与 VolumeBinding 支持](../../oseps/0003-volume-and-volumebinding-support.md)
- [Sandbox Lifecycle API 规范](../../specs/sandbox-lifecycle.yml)


================================================
FILE: examples/docker-ossfs-volume-mount/main.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Docker OSSFS Volume Mount Example
=================================

Demonstrates how to create OSSFS volumes with the new SDK model and mount them
into sandboxes on Docker runtime.

Scenarios:
1) Basic read-write mount on OSSFS backend.
2) Cross-sandbox data sharing on same OSSFS backend path.
3) Two volumes use different OSS prefixes via subPath.
"""

import asyncio
import os
from datetime import timedelta
from uuid import uuid4

from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig

try:
    from opensandbox.models.sandboxes import OSSFS, Volume
except ImportError:
    print(
        "ERROR: Your installed opensandbox SDK does not include OSSFS/Volume models.\n"
        "       Please install the latest SDK from source:\n"
        "\n"
        "           pip install -e sdks/sandbox/python\n"
    )
    raise SystemExit(1)


def _required_env(name: str) -> str:
    value = os.getenv(name, "").strip()
    if not value:
        raise RuntimeError(f"Missing required environment variable: {name}")
    return value


def build_ossfs() -> OSSFS:
    return OSSFS(
        bucket=_required_env("OSS_BUCKET"),
        endpoint=_required_env("OSS_ENDPOINT"),
        accessKeyId=_required_env("OSS_ACCESS_KEY_ID"),
        accessKeySecret=_required_env("OSS_ACCESS_KEY_SECRET"),
    )


async def print_exec(sandbox: Sandbox, command: str) -> str:
    result = await sandbox.commands.run(command)
    stdout = "\n".join(msg.text for msg in result.logs.stdout).strip()
    stderr = "\n".join(msg.text for msg in result.logs.stderr).strip()
    if stdout:
        print(stdout)
    if stderr:
        print(stderr)
    if result.error:
        raise RuntimeError(f"Command failed: {result.error.name}: {result.error.value}")
    return stdout


async def demo_basic_mount(config: ConnectionConfig, image: str, run_id: str) -> None:
    print("\n" + "=" * 60)
    print("Scenario 1: Basic OSSFS Read-Write Mount")
    print("=" * 60)
    sandbox = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=3),
        volumes=[
            Volume(
                name="oss-root",
                ossfs=build_ossfs(),
                mountPath="/mnt/oss",
                readOnly=False,
            )
        ],
    )
    async with sandbox:
        try:
            await print_exec(sandbox, "mkdir -p /mnt/oss/opensandbox-demo")
            await print_exec(
                sandbox,
                f"echo 'hello-{run_id}' > /mnt/oss/opensandbox-demo/basic.txt",
            )
            print("[verify] read file from mounted OSSFS path:")
            await print_exec(sandbox, "cat /mnt/oss/opensandbox-demo/basic.txt")
        finally:
            await sandbox.kill()


async def demo_cross_sandbox_sharing(config: ConnectionConfig, image: str, run_id: str) -> None:
    print("\n" + "=" * 60)
    print("Scenario 2: Cross-Sandbox Sharing")
    print("=" * 60)
    writer = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=3),
        volumes=[
            Volume(
                name="oss-root-writer",
                ossfs=build_ossfs(),
                mountPath="/mnt/oss",
            )
        ],
    )
    async with writer:
        try:
            await print_exec(
                writer,
                f"echo 'from-writer-{run_id}' > /mnt/oss/opensandbox-demo/shared.txt",
            )
        finally:
            await writer.kill()

    reader = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=3),
        volumes=[
            Volume(
                name="oss-root-reader",
                ossfs=build_ossfs(),
                mountPath="/mnt/oss",
                readOnly=True,
            )
        ],
    )
    async with reader:
        try:
            print("[verify] sandbox B reads file created by sandbox A:")
            await print_exec(reader, "cat /mnt/oss/opensandbox-demo/shared.txt")
        finally:
            await reader.kill()


async def demo_subpath_mounts(config: ConnectionConfig, image: str, run_id: str) -> None:
    print("\n" + "=" * 60)
    print("Scenario 3: Different OSS Prefixes via subPath")
    print("=" * 60)
    setup = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=3),
        volumes=[
            Volume(
                name="oss-root-setup",
                ossfs=build_ossfs(),
                mountPath="/mnt/oss",
            )
        ],
    )
    async with setup:
        try:
            await print_exec(
                setup,
                "mkdir -p /mnt/oss/opensandbox-demo/subpath-a /mnt/oss/opensandbox-demo/subpath-b",
            )
        finally:
            await setup.kill()

    sandbox = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=3),
        volumes=[
            Volume(
                name="oss-a",
                ossfs=build_ossfs(),
                mountPath="/mnt/a",
                subPath="opensandbox-demo/subpath-a",
            ),
            Volume(
                name="oss-b",
                ossfs=build_ossfs(),
                mountPath="/mnt/b",
                subPath="opensandbox-demo/subpath-b",
            ),
        ],
    )
    async with sandbox:
        try:
            await print_exec(sandbox, f"echo 'A-{run_id}' > /mnt/a/file.txt")
            await print_exec(sandbox, f"echo 'B-{run_id}' > /mnt/b/file.txt")
            print("[verify] subPath A content:")
            await print_exec(sandbox, "cat /mnt/a/file.txt")
            print("[verify] subPath B content:")
            await print_exec(sandbox, "cat /mnt/b/file.txt")
        finally:
            await sandbox.kill()


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    image = os.getenv("SANDBOX_IMAGE", "ubuntu")
    run_id = uuid4().hex[:8]

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(minutes=5),
    )

    print(f"OpenSandbox server : {domain}")
    print(f"Sandbox image      : {image}")
    print(f"OSS bucket         : {_required_env('OSS_BUCKET')}")
    print(f"OSS endpoint       : {_required_env('OSS_ENDPOINT')}")

    await demo_basic_mount(config, image, run_id)
    await demo_cross_sandbox_sharing(config, image, run_id)
    await demo_subpath_mounts(config, image, run_id)

    print("\n" + "=" * 60)
    print("All OSSFS scenarios completed successfully.")
    print("=" * 60)


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/docker-pvc-volume-mount/README.md
================================================
# Docker PVC (Named Volume) Mount Example

This example demonstrates how to mount Docker named volumes into sandbox containers using the OpenSandbox `pvc` backend. In Docker runtime, `pvc.claimName` maps to a Docker named volume -- providing a more convenient and secure alternative to host-path bind mounts for sharing data across sandboxes.

> **What is `pvc`?** The `pvc` backend is a runtime-neutral abstraction. In Kubernetes it maps to a PersistentVolumeClaim; in Docker it maps to a named volume. The same API request works on both runtimes. See [OSEP-0003](../../oseps/0003-volume-and-volumebinding-support.md) for the design.

## Why Named Volumes over Host Paths?

| | Host path (`host` backend) | Named volume (`pvc` backend) |
|---|---|---|
| **Security** | Exposes host filesystem paths | Docker manages storage location; no host path exposed |
| **Setup** | Requires `allowed_host_paths` allowlist | No allowlist needed |
| **Cross-sandbox sharing** | All containers must agree on a host path | Reference the same volume name |
| **Portability** | Tied to host directory structure | Works on any Docker host |
| **Lifecycle** | User manages host directories | `docker volume create/rm` |

## Scenarios

| # | Scenario | Description |
|---|----------|-------------|
| 1 | **Read-write mount** | Mount a named volume for bidirectional file I/O |
| 2 | **Read-only mount** | Mount a named volume that sandboxes cannot modify |
| 3 | **Cross-sandbox sharing** | Two sandboxes share data through the same named volume |
| 4 | **SubPath mount** | Mount only a subdirectory of a named volume (consistent with K8s PVC subPath) |

## Prerequisites

### 1. Start OpenSandbox Server

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

### 2. Create a Docker Named Volume

```shell
# Create the named volume
docker volume create opensandbox-pvc-demo

# Seed it with a marker file via a temporary container
docker run --rm -v opensandbox-pvc-demo:/data alpine \
  sh -c "echo 'hello-from-named-volume' > /data/marker.txt"
```

### 3. Install Python SDK

```shell
uv pip install opensandbox
```

### 4. Pull the Sandbox Image

```shell
docker pull ubuntu:latest
```

## Run

```shell
uv run python examples/docker-pvc-volume-mount/main.py
```

The script automatically creates the named volume and seeds it with test data. You can also specify a custom volume name or image:

```shell
SANDBOX_IMAGE=ubuntu SANDBOX_DOMAIN=localhost:8080 uv run python examples/docker-pvc-volume-mount/main.py
```

## Expected Output

```text
OpenSandbox server : localhost:8080
Sandbox image      : ubuntu
Docker volume      : opensandbox-pvc-demo
  Ensuring Docker named volume 'opensandbox-pvc-demo' exists...
  Created volume 'opensandbox-pvc-demo' with marker.txt

============================================================
Scenario 1: Read-Write PVC (Named Volume) Mount
============================================================
  Volume name: opensandbox-pvc-demo
  Mount path : /mnt/data

  [1] Reading marker file from named volume:
  hello-from-named-volume

  [2] Writing a file from inside the sandbox:
  -> Written: /mnt/data/sandbox-output.txt

  [3] Reading back the written file:
  written-by-sandbox

  [4] Listing volume contents:
  ...
  -rw-r--r-- 1 root root   ... marker.txt
  -rw-r--r-- 1 root root   ... sandbox-output.txt

  Scenario 1 completed.

============================================================
Scenario 2: Read-Only PVC (Named Volume) Mount
============================================================
  Volume name: opensandbox-pvc-demo
  Mount path : /mnt/readonly

  [1] Reading marker.txt from read-only mount:
  hello-from-named-volume

  [2] Attempting to write (should fail):
  touch: cannot touch '/mnt/readonly/should-fail.txt': Read-only file system
  Write denied (expected)

  Scenario 2 completed.

============================================================
Scenario 3: Cross-Sandbox Sharing via PVC (Named Volume)
============================================================
  Volume name: opensandbox-pvc-demo

  [Sandbox A] Creating sandbox and writing data...
  [Sandbox A] Wrote /mnt/shared/cross-sandbox.txt

  [Sandbox B] Creating sandbox and reading data...
  [Sandbox B] Reading file written by Sandbox A:
  message-from-sandbox-a

  Cross-sandbox data sharing verified!

  Scenario 3 completed.

============================================================
Scenario 4: SubPath PVC (Named Volume) Mount
============================================================
  Volume name: opensandbox-pvc-demo
  SubPath    : datasets/train
  Mount path : /mnt/training-data

  [1] Listing mounted subpath content:
  ...
  -rw-r--r-- 1 root root   ... data.csv

  [2] Reading data.csv:
  id,value
  1,100
  2,200

  [3] Verifying volume root is NOT visible:
  marker.txt at mount root: NOT-FOUND
  -> Confirmed: subPath isolation is working correctly

  Scenario 4 completed.

============================================================
All scenarios completed successfully!
============================================================
```

## SDK Usage Quick Reference

### Python (async)

```python
from opensandbox import Sandbox
from opensandbox.models.sandboxes import PVC, Volume

sandbox = await Sandbox.create(
    image="ubuntu",
    volumes=[
        Volume(
            name="my-data",
            pvc=PVC(claimName="my-named-volume"),
            mountPath="/mnt/data",
            readOnly=False,       # optional, default is False
            subPath="datasets/train",  # optional, mount a subdirectory
        ),
    ],
)
```

### Python (sync)

```python
from opensandbox import SandboxSync
from opensandbox.models.sandboxes import PVC, Volume

sandbox = SandboxSync.create(
    image="ubuntu",
    volumes=[
        Volume(
            name="my-data",
            pvc=PVC(claimName="my-named-volume"),
            mountPath="/mnt/data",
            subPath="datasets/train",  # optional
        ),
    ],
)
```

### JavaScript / TypeScript

```typescript
import { Sandbox } from "@alibaba-group/opensandbox";

const sandbox = await Sandbox.create({
  image: "ubuntu",
  volumes: [
    {
      name: "my-data",
      pvc: { claimName: "my-named-volume" },
      mountPath: "/mnt/data",
      readOnly: false,
      subPath: "datasets/train",  // optional
    },
  ],
});
```

### Java / Kotlin

```java
Volume volume = Volume.builder()
    .name("my-data")
    .pvc(PVC.of("my-named-volume"))
    .mountPath("/mnt/data")
    .readOnly(false)
    .subPath("datasets/train")  // optional
    .build();

Sandbox sandbox = Sandbox.builder()
    .image("ubuntu")
    .volume(volume)
    .build();
```

## Cleanup

```shell
docker volume rm opensandbox-pvc-demo
```

## References

- [OSEP-0003: Volume and VolumeBinding Support](../../oseps/0003-volume-and-volumebinding-support.md) -- Design proposal
- [Sandbox Lifecycle API Spec](../../specs/sandbox-lifecycle.yml) -- OpenAPI schema for volume definitions
- [Host Volume Mount Example](../host-volume-mount/) -- Host path bind mount example (alternative approach)


================================================
FILE: examples/docker-pvc-volume-mount/README_zh.md
================================================
# Docker PVC（命名卷）挂载示例

本示例演示如何使用 OpenSandbox 的 `pvc` 后端将 Docker 命名卷（named volume）挂载到沙箱容器中。在 Docker 运行时下，`pvc.claimName` 映射为 Docker 命名卷 —— 相比宿主机路径绑定挂载（host path），命名卷更安全、更便于跨沙箱共享数据。

> **什么是 `pvc`？** `pvc` 后端是一个运行时无关的抽象。在 Kubernetes 中它映射为 PersistentVolumeClaim；在 Docker 中它映射为命名卷。同一个 API 请求可在两种运行时上工作。详见 [OSEP-0003](../../oseps/0003-volume-and-volumebinding-support.md) 设计文档。

## 为什么使用命名卷而非宿主机路径？

| | 宿主机路径（`host` 后端） | 命名卷（`pvc` 后端） |
|---|---|---|
| **安全性** | 暴露宿主机文件系统路径 | Docker 管理存储位置，不暴露宿主机路径 |
| **配置** | 需要 `allowed_host_paths` 白名单 | 无需白名单配置 |
| **跨沙箱共享** | 所有容器必须约定同一宿主机路径 | 引用相同的卷名即可 |
| **可移植性** | 依赖宿主机目录结构 | 在任何 Docker 主机上均可使用 |
| **生命周期** | 用户手动管理宿主机目录 | `docker volume create/rm` 管理 |

## 演示场景

| # | 场景 | 说明 |
|---|------|------|
| 1 | **读写挂载** | 挂载命名卷，支持双向文件读写 |
| 2 | **只读挂载** | 挂载命名卷，沙箱不可修改 |
| 3 | **跨沙箱共享** | 两个沙箱通过同一命名卷共享数据，无需暴露宿主机路径 |
| 4 | **SubPath 挂载** | 仅挂载命名卷的子目录（与 K8s PVC subPath 语义一致） |

## 前置条件

### 1. 启动 OpenSandbox 服务

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

### 2. 创建 Docker 命名卷

```shell
# 创建命名卷
docker volume create opensandbox-pvc-demo

# 通过临时容器写入一个标记文件
docker run --rm -v opensandbox-pvc-demo:/data alpine \
  sh -c "echo 'hello-from-named-volume' > /data/marker.txt"
```

### 3. 安装 Python SDK

```shell
uv pip install opensandbox
```

### 4. 拉取沙箱镜像

```shell
docker pull registry.cn-hangzhou.aliyuncs.com/acs/ubuntu:latest
```

## 运行

```shell
SANDBOX_IMAGE=registry.cn-hangzhou.aliyuncs.com/acs/ubuntu:latest \
  uv run python examples/docker-pvc-volume-mount/main.py
```

脚本会自动创建命名卷并写入测试数据。也可以通过环境变量自定义镜像和服务地址：

```shell
SANDBOX_IMAGE=ubuntu SANDBOX_DOMAIN=localhost:8080 \
  uv run python examples/docker-pvc-volume-mount/main.py
```

## 预期输出

```text
OpenSandbox server : localhost:8080
Sandbox image      : ubuntu
Docker volume      : opensandbox-pvc-demo
  Ensuring Docker named volume 'opensandbox-pvc-demo' exists...
  Created volume 'opensandbox-pvc-demo' with marker.txt

============================================================
Scenario 1: Read-Write PVC (Named Volume) Mount
============================================================
  Volume name: opensandbox-pvc-demo
  Mount path : /mnt/data

  [1] Reading marker file from named volume:
  hello-from-named-volume

  [2] Writing a file from inside the sandbox:
  -> Written: /mnt/data/sandbox-output.txt

  [3] Reading back the written file:
  written-by-sandbox

  [4] Listing volume contents:
  ...
  -rw-r--r-- 1 root root   ... marker.txt
  -rw-r--r-- 1 root root   ... sandbox-output.txt

  Scenario 1 completed.

============================================================
Scenario 2: Read-Only PVC (Named Volume) Mount
============================================================
  Volume name: opensandbox-pvc-demo
  Mount path : /mnt/readonly

  [1] Reading marker.txt from read-only mount:
  hello-from-named-volume

  [2] Attempting to write (should fail):
  touch: cannot touch '/mnt/readonly/should-fail.txt': Read-only file system
  Write denied (expected)

  Scenario 2 completed.

============================================================
Scenario 3: Cross-Sandbox Sharing via PVC (Named Volume)
============================================================
  Volume name: opensandbox-pvc-demo

  [Sandbox A] Creating sandbox and writing data...
  [Sandbox A] Wrote /mnt/shared/cross-sandbox.txt

  [Sandbox B] Creating sandbox and reading data...
  [Sandbox B] Reading file written by Sandbox A:
  message-from-sandbox-a

  Cross-sandbox data sharing verified!

  Scenario 3 completed.

============================================================
Scenario 4: SubPath PVC (Named Volume) Mount
============================================================
  Volume name: opensandbox-pvc-demo
  SubPath    : datasets/train
  Mount path : /mnt/training-data

  [1] Listing mounted subpath content:
  ...
  -rw-r--r-- 1 root root   ... data.csv

  [2] Reading data.csv:
  id,value
  1,100
  2,200

  [3] Verifying volume root is NOT visible:
  marker.txt at mount root: NOT-FOUND
  -> Confirmed: subPath isolation is working correctly

  Scenario 4 completed.

============================================================
All scenarios completed successfully!
============================================================
```

## 各 SDK 用法速览

### Python（异步）

```python
from opensandbox import Sandbox
from opensandbox.models.sandboxes import PVC, Volume

sandbox = await Sandbox.create(
    image="ubuntu",
    volumes=[
        Volume(
            name="my-data",
            pvc=PVC(claimName="my-named-volume"),
            mountPath="/mnt/data",
            readOnly=False,       # 可选，默认为 False
            subPath="datasets/train",  # 可选，挂载子目录
        ),
    ],
)
```

### Python（同步）

```python
from opensandbox import SandboxSync
from opensandbox.models.sandboxes import PVC, Volume

sandbox = SandboxSync.create(
    image="ubuntu",
    volumes=[
        Volume(
            name="my-data",
            pvc=PVC(claimName="my-named-volume"),
            mountPath="/mnt/data",
            subPath="datasets/train",  # 可选
        ),
    ],
)
```

### JavaScript / TypeScript

```typescript
import { Sandbox } from "@alibaba-group/opensandbox";

const sandbox = await Sandbox.create({
  image: "ubuntu",
  volumes: [
    {
      name: "my-data",
      pvc: { claimName: "my-named-volume" },
      mountPath: "/mnt/data",
      readOnly: false,
      subPath: "datasets/train",  // 可选
    },
  ],
});
```

### Java / Kotlin

```java
Volume volume = Volume.builder()
    .name("my-data")
    .pvc(PVC.of("my-named-volume"))
    .mountPath("/mnt/data")
    .readOnly(false)
    .subPath("datasets/train")  // 可选
    .build();

Sandbox sandbox = Sandbox.builder()
    .image("ubuntu")
    .volume(volume)
    .build();
```

## 清理

```shell
docker volume rm opensandbox-pvc-demo
```

## 参考资料

- [OSEP-0003: Volume 与 VolumeBinding 支持](../../oseps/0003-volume-and-volumebinding-support.md) — 设计提案
- [Sandbox Lifecycle API 规范](../../specs/sandbox-lifecycle.yml) — Volume 定义的 OpenAPI 规范
- [宿主机目录挂载示例](../host-volume-mount/) — Host path 绑定挂载示例（替代方案）


================================================
FILE: examples/docker-pvc-volume-mount/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Docker PVC (Named Volume) Mount Example
========================================

Demonstrates how to mount Docker named volumes into sandbox containers using
the OpenSandbox ``pvc`` backend.  In Docker runtime the ``pvc`` backend maps
``claimName`` to a Docker named volume -- providing a more convenient and
secure alternative to host-path bind mounts for sharing data across sandboxes.

Four scenarios are demonstrated:

1. **Read-write mount**        - Mount a named volume for bidirectional file I/O.
2. **Read-only mount**         - Mount a named volume as read-only.
3. **Cross-sandbox sharing**   - Two sandboxes share data through the same named
   volume without exposing any host path.
4. **SubPath mount**           - Mount only a subdirectory of a named volume,
   keeping the same API as Kubernetes PVC subPath.

Prerequisites:
- OpenSandbox server running with Docker runtime
- Docker named volume created before running this script (see README.md)
"""

import asyncio
import os
import subprocess
from datetime import timedelta

from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig

try:
    from opensandbox.models.sandboxes import PVC, Volume
except ImportError:
    print(
        "ERROR: Your installed opensandbox SDK does not include Volume/PVC models.\n"
        "       Volume support requires the latest SDK from source.\n"
        "       Please install from the local repository:\n"
        "\n"
        "           pip install -e sdks/sandbox/python\n"
        "\n"
        "       See README.md for details."
    )
    raise SystemExit(1)


VOLUME_NAME = "opensandbox-pvc-demo"


async def print_exec(sandbox: Sandbox, command: str) -> str | None:
    """Run a command in the sandbox and print/return stdout."""
    result = await sandbox.commands.run(command)
    if result.error:
        print(f"  [error] {result.error.name}: {result.error.value}")
        return None
    text = "\n".join(msg.text for msg in result.logs.stdout)
    if text:
        print(f"  {text}")
    return text


def ensure_named_volume() -> None:
    """Create the Docker named volume and seed it with test data."""
    print(f"  Ensuring Docker named volume '{VOLUME_NAME}' exists...")
    subprocess.run(
        ["docker", "volume", "rm", VOLUME_NAME],
        capture_output=True,
    )
    subprocess.run(
        ["docker", "volume", "create", VOLUME_NAME],
        check=True,
        capture_output=True,
    )
    # Seed the volume with a marker file and subpath test data
    subprocess.run(
        [
            "docker", "run", "--rm",
            "-v", f"{VOLUME_NAME}:/data",
            "alpine",
            "sh", "-c",
            "echo 'hello-from-named-volume' > /data/marker.txt && "
            "mkdir -p /data/datasets/train && "
            "echo 'id,value' > /data/datasets/train/data.csv && "
            "echo '1,100' >> /data/datasets/train/data.csv && "
            "echo '2,200' >> /data/datasets/train/data.csv",
        ],
        check=True,
        capture_output=True,
    )
    print(f"  Created volume '{VOLUME_NAME}' with marker.txt and datasets/train/")


async def demo_readwrite_mount(config: ConnectionConfig, image: str) -> None:
    """
    Scenario 1: Read-write named volume mount.

    Mount a Docker named volume into the sandbox at /mnt/data.
    Write a file inside the sandbox, then read it back to verify.
    """
    print("\n" + "=" * 60)
    print("Scenario 1: Read-Write PVC (Named Volume) Mount")
    print("=" * 60)
    print(f"  Volume name: {VOLUME_NAME}")
    print(f"  Mount path : /mnt/data")

    sandbox = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=2),
        volumes=[
            Volume(
                name="demo-data",
                pvc=PVC(claimName=VOLUME_NAME),
                mountPath="/mnt/data",
                readOnly=False,
            ),
        ],
    )

    async with sandbox:
        try:
            # Read the seeded marker file
            print("\n  [1] Reading marker file from named volume:")
            await print_exec(sandbox, "cat /mnt/data/marker.txt")

            # Write a new file
            print("\n  [2] Writing a file from inside the sandbox:")
            await print_exec(
                sandbox,
                "echo 'written-by-sandbox' > /mnt/data/sandbox-output.txt",
            )
            print("  -> Written: /mnt/data/sandbox-output.txt")

            # Read it back
            print("\n  [3] Reading back the written file:")
            await print_exec(sandbox, "cat /mnt/data/sandbox-output.txt")

            # List all files
            print("\n  [4] Listing volume contents:")
            await print_exec(sandbox, "ls -la /mnt/data/")

        finally:
            await sandbox.kill()

    print("\n  Scenario 1 completed.")


async def demo_readonly_mount(config: ConnectionConfig, image: str) -> None:
    """
    Scenario 2: Read-only named volume mount.

    Mount the same named volume as read-only.  Verify reads succeed but
    writes are rejected by the container runtime.
    """
    print("\n" + "=" * 60)
    print("Scenario 2: Read-Only PVC (Named Volume) Mount")
    print("=" * 60)
    print(f"  Volume name: {VOLUME_NAME}")
    print(f"  Mount path : /mnt/readonly")

    sandbox = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=2),
        volumes=[
            Volume(
                name="readonly-vol",
                pvc=PVC(claimName=VOLUME_NAME),
                mountPath="/mnt/readonly",
                readOnly=True,
            ),
        ],
    )

    async with sandbox:
        try:
            # Read the marker file
            print("\n  [1] Reading marker.txt from read-only mount:")
            await print_exec(sandbox, "cat /mnt/readonly/marker.txt")

            # Attempt to write (should fail)
            print("\n  [2] Attempting to write (should fail):")
            result = await sandbox.commands.run(
                "touch /mnt/readonly/should-fail.txt 2>&1 || echo 'Write denied (expected)'"
            )
            for msg in result.logs.stdout:
                print(f"  {msg.text}")
            for msg in result.logs.stderr:
                print(f"  {msg.text}")

        finally:
            await sandbox.kill()

    print("\n  Scenario 2 completed.")


async def demo_cross_sandbox_sharing(config: ConnectionConfig, image: str) -> None:
    """
    Scenario 3: Cross-sandbox data sharing via named volume.

    Two sandboxes mount the same named volume.  Sandbox A writes a file,
    then Sandbox B reads it -- demonstrating data sharing without any host
    path exposure.
    """
    print("\n" + "=" * 60)
    print("Scenario 3: Cross-Sandbox Sharing via PVC (Named Volume)")
    print("=" * 60)
    print(f"  Volume name: {VOLUME_NAME}")

    volume_spec = Volume(
        name="shared-vol",
        pvc=PVC(claimName=VOLUME_NAME),
        mountPath="/mnt/shared",
        readOnly=False,
    )

    # --- Sandbox A: write ---
    print("\n  [Sandbox A] Creating sandbox and writing data...")
    sandbox_a = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=2),
        volumes=[volume_spec],
    )
    async with sandbox_a:
        try:
            await print_exec(
                sandbox_a,
                "echo 'message-from-sandbox-a' > /mnt/shared/cross-sandbox.txt",
            )
            print("  [Sandbox A] Wrote /mnt/shared/cross-sandbox.txt")
        finally:
            await sandbox_a.kill()

    # --- Sandbox B: read ---
    print("\n  [Sandbox B] Creating sandbox and reading data...")
    sandbox_b = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=2),
        volumes=[volume_spec],
    )
    async with sandbox_b:
        try:
            print("  [Sandbox B] Reading file written by Sandbox A:")
            text = await print_exec(sandbox_b, "cat /mnt/shared/cross-sandbox.txt")
            if text and "message-from-sandbox-a" in text:
                print("\n  Cross-sandbox data sharing verified!")
        finally:
            await sandbox_b.kill()

    print("\n  Scenario 3 completed.")


async def demo_subpath_mount(config: ConnectionConfig, image: str) -> None:
    """
    Scenario 4: SubPath mount on a named volume.

    Mount only a subdirectory (datasets/train) of the named volume.  The server
    resolves the volume's host-side Mountpoint via ``docker volume inspect`` and
    appends the subPath, producing a standard bind mount.  This keeps the API
    consistent with Kubernetes PVC subPath semantics.
    """
    print("\n" + "=" * 60)
    print("Scenario 4: SubPath PVC (Named Volume) Mount")
    print("=" * 60)
    print(f"  Volume name: {VOLUME_NAME}")
    print(f"  SubPath    : datasets/train")
    print(f"  Mount path : /mnt/training-data")

    sandbox = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=2),
        volumes=[
            Volume(
                name="train-data",
                pvc=PVC(claimName=VOLUME_NAME),
                mountPath="/mnt/training-data",
                readOnly=True,
                subPath="datasets/train",
            ),
        ],
    )

    async with sandbox:
        try:
            # List contents -- should only show the subpath
            print("\n  [1] Listing mounted subpath content:")
            await print_exec(sandbox, "ls -la /mnt/training-data/")

            # Read the CSV data
            print("\n  [2] Reading data.csv:")
            await print_exec(sandbox, "cat /mnt/training-data/data.csv")

            # Verify the root marker.txt is NOT visible (we're inside datasets/train)
            print("\n  [3] Verifying volume root is NOT visible:")
            result = await sandbox.commands.run("test -f /mnt/training-data/marker.txt && echo FOUND || echo NOT-FOUND")
            text = "\n".join(msg.text for msg in result.logs.stdout)
            print(f"  marker.txt at mount root: {text}")
            if "NOT-FOUND" in text:
                print("  -> Confirmed: subPath isolation is working correctly")

        finally:
            await sandbox.kill()

    print("\n  Scenario 4 completed.")


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    image = os.getenv("SANDBOX_IMAGE", "ubuntu")

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(minutes=3),
    )

    print(f"OpenSandbox server : {config.domain}")
    print(f"Sandbox image      : {image}")
    print(f"Docker volume      : {VOLUME_NAME}")

    # Ensure the named volume exists with seed data
    ensure_named_volume()

    await demo_readwrite_mount(config, image)
    await demo_readonly_mount(config, image)
    await demo_cross_sandbox_sharing(config, image)
    await demo_subpath_mount(config, image)

    print("\n" + "=" * 60)
    print("All scenarios completed successfully!")
    print("=" * 60)


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/gemini-cli/README.md
================================================
# Gemini CLI Example

Call Google Gemini via the `@google/gemini-cli` npm package in OpenSandbox.

## Start OpenSandbox server [local]

Pre-pull the code-interpreter image (includes Node.js):

```shell
docker pull sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2

# use docker hub
# docker pull opensandbox/code-interpreter:v1.0.2
```

Start the local OpenSandbox server, logs will be visible in the terminal:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

## Create and Access the Gemini Sandbox

```shell
# Install OpenSandbox package
uv pip install opensandbox

# Run the example (requires SANDBOX_DOMAIN / SANDBOX_API_KEY / GEMINI_API_KEY)
uv run python examples/gemini-cli/main.py
```

The script installs the Gemini CLI (`npm install -g @google/gemini-cli@latest`) at runtime (Node.js is already in the code-interpreter image), then sends a simple request `gemini "Compute 1+1=?."`. Auth is passed via `GEMINI_API_KEY`; you can override endpoint/model with `GEMINI_BASE_URL` / `GEMINI_MODEL`.

## Environment Variables

- `SANDBOX_DOMAIN`: Sandbox service address (default: `localhost:8080`)
- `SANDBOX_API_KEY`: API key if your server requires authentication (optional for local)
- `SANDBOX_IMAGE`: Sandbox image to use (default: `sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2`)
- `GEMINI_API_KEY`: Your Google Gemini API key (required)
- `GEMINI_BASE_URL`: Gemini API endpoint (optional; e.g., proxy)
- `GEMINI_MODEL`: Model to use (default: `gemini-2.5-flash`)

## References
- [@google/gemini-cli](https://www.npmjs.com/package/@google/gemini-cli) - Gemini CLI


================================================
FILE: examples/gemini-cli/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
from datetime import timedelta

from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


def _required_env(name: str) -> str:
    value = os.getenv(name)
    if not value:
        raise RuntimeError(f"{name} is required")
    return value


async def _print_execution_logs(execution) -> None:
    for msg in execution.logs.stdout:
        print(f"[stdout] {msg.text}")
    for msg in execution.logs.stderr:
        print(f"[stderr] {msg.text}")
    if execution.error:
        print(f"[error] {execution.error.name}: {execution.error.value}")


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    gemini_api_key = _required_env("GEMINI_API_KEY")
    gemini_base_url = os.getenv("GEMINI_BASE_URL")
    gemini_model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
    image = os.getenv(
        "SANDBOX_IMAGE",
        "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2",
    )

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(seconds=60),
    )

    # Inject Gemini settings into container environment for CLI access
    env = {
        "GEMINI_API_KEY": gemini_api_key,
        "GEMINI_BASE_URL": gemini_base_url,
        "GEMINI_MODEL": gemini_model,
    }
    # Drop None values to avoid overriding defaults inside CLI
    env = {k: v for k, v in env.items() if v is not None}

    sandbox = await Sandbox.create(
        image,
        connection_config=config,
        env=env,
    )

    async with sandbox:
        # Install Gemini CLI (Node.js is already in the code-interpreter image)
        install_exec = await sandbox.commands.run(
            "npm install -g @google/gemini-cli@latest"
        )
        await _print_execution_logs(install_exec)

        # Use Gemini CLI to send a message
        run_exec = await sandbox.commands.run(
            'gemini "Compute 1+1=?."'
        )
        await _print_execution_logs(run_exec)

        await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/google-adk/README.md
================================================
# Google ADK + OpenSandbox Example

Integrate Google Agent Development Kit (ADK) with OpenSandbox. The ADK agent
drives tool calls that execute inside a sandbox.

## Start OpenSandbox server [local]

Pre-pull the code-interpreter image (includes Python):

```shell
docker pull sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2

# use docker hub
# docker pull opensandbox/code-interpreter:v1.0.2
```

Start the local OpenSandbox server, logs will be visible in the terminal:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

## Run the example

```shell
# Install OpenSandbox + Google ADK deps
uv pip install opensandbox google-adk

# Run the example (requires SANDBOX_DOMAIN / SANDBOX_API_KEY / GOOGLE_API_KEY)
uv run python examples/google-adk/main.py
```

The script uses ADK to create an agent with OpenSandbox tools (`write_file`,
`read_file`, `run_in_sandbox`). It runs a few prompts, prints tool events, and
cleans up the sandbox.

## Environment Variables

- `SANDBOX_DOMAIN`: Sandbox service address (default: `localhost:8080`)
- `SANDBOX_API_KEY`: API key if your server requires authentication (optional for local)
- `SANDBOX_IMAGE`: Sandbox image to use (default: `sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2`)
- `GOOGLE_API_KEY`: Gemini API key (required)
- `GOOGLE_ADK_MODEL`: Gemini model name (default: `gemini-2.5-flash`)

## References
- [Google ADK](https://google.github.io/adk-docs/) - Agent Development Kit


================================================
FILE: examples/google-adk/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from datetime import timedelta

from google.adk.agents import Agent
from google.adk.apps import App
from google.adk.runners import Runner
from google.adk.sessions.in_memory_session_service import InMemorySessionService
from google.adk.utils._debug_output import print_event
from google.adk.utils.context_utils import Aclosing
from google.genai import types
from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


def _required_env(name: str) -> str:
    value = os.getenv(name)
    if not value:
        raise RuntimeError(f"{name} is required")
    return value


async def main() -> None:
    _required_env("GOOGLE_API_KEY")
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    image = os.getenv(
        "SANDBOX_IMAGE",
        "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2",
    )
    model_name = os.getenv("GOOGLE_ADK_MODEL", "gemini-2.5-flash")

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(seconds=120),
    )

    sandbox = await Sandbox.create(
        image,
        connection_config=config,
    )

    async def run_in_sandbox(command: str) -> str:
        """Run a shell command in OpenSandbox and return the output."""

        execution = await sandbox.commands.run(command)
        stdout = "\n".join(msg.text for msg in execution.logs.stdout)
        stderr = "\n".join(msg.text for msg in execution.logs.stderr)
        if execution.error:
            stderr = "\n".join(
                [
                    stderr,
                    f"[error] {execution.error.name}: {execution.error.value}",
                ]
            ).strip()

        output = stdout.strip()
        if stderr:
            output = "\n".join([output, f"[stderr]\n{stderr}"]).strip()
        return output or "(no output)"

    async def write_file(path: str, content: str) -> str:
        """Write a file inside the sandbox."""

        await sandbox.files.write_file(path, content)
        return f"wrote {len(content)} bytes to {path}"

    async def read_file(path: str) -> str:
        """Read a file from the sandbox."""

        return await sandbox.files.read_file(path)

    agent = Agent(
        name="opensandbox_adk",
        model=model_name,
        instruction=(
            "You have access to OpenSandbox tools. Use write_file to create or "
            "update files, read_file to read files, and run_in_sandbox to run "
            "commands."
        ),
        tools=[run_in_sandbox, write_file, read_file],
    )

    app = App(name="opensandbox_adk", root_agent=agent)
    session_service = InMemorySessionService()
    runner = Runner(app=app, session_service=session_service)
    session = await session_service.create_session(
        app_name=app.name,
        user_id="local-user",
    )

    prompts = [
        "Use write_file to save /tmp/math.py that prints 137 * 42.",
        "Run the script using run_in_sandbox and report the result.",
        "Write /tmp/notes.txt with 'ADK + OpenSandbox', then read it back.",
    ]

    try:
        for prompt in prompts:
            content = types.Content(
                role="user",
                parts=[types.Part(text=prompt)],
            )
            async with Aclosing(
                runner.run_async(
                    user_id=session.user_id,
                    session_id=session.id,
                    new_message=content,
                )
            ) as agen:
                async for event in agen:
                    print_event(event, verbose=True)
    finally:
        await sandbox.kill()
        await sandbox.close()


if __name__ == "__main__":
    import asyncio

    asyncio.run(main())


================================================
FILE: examples/host-volume-mount/README.md
================================================
# Host Volume Mount Example

This example demonstrates how to mount host directories into sandbox containers using the OpenSandbox Volume API. Host volume mounts enable bidirectional file sharing between the host machine and sandbox environments — ideal for sharing datasets, model checkpoints, configuration files, or collecting sandbox outputs.

## Scenarios

| # | Scenario | Description |
|---|----------|-------------|
| 1 | **Read-write mount** | Mount a host directory for bidirectional file exchange |
| 2 | **Read-only mount** | Provide shared data that sandboxes cannot modify |
| 3 | **SubPath mount** | Mount a specific subdirectory from the host path |

## Prerequisites

### 1. Start OpenSandbox Server

```shell
git clone git@github.com:alibaba/OpenSandbox.git
cd OpenSandbox/server
cp example.config.toml ~/.sandbox.toml
uv sync && uv run python -m src.main
```

### 2. Configure Allowed Host Paths

For security, the server restricts which host paths can be mounted. Add a `[storage]` section to `~/.sandbox.toml`:

```toml
[storage]
# Allowlist of host path prefixes permitted for bind mounts.
# Only paths under these prefixes can be mounted into sandboxes.
# If empty, all host paths are allowed (not recommended for production).
allowed_host_paths = ["/tmp/opensandbox-data", "/data/shared"]
```

> **Security note**: In production, always set explicit `allowed_host_paths` to prevent sandboxes from accessing sensitive host directories. An empty list allows all paths, which is convenient for local development but not safe for shared environments.

### 3. Create Host Directories

```shell
# Create a directory to share with sandboxes
mkdir -p /tmp/opensandbox-data
echo "hello-from-host" > /tmp/opensandbox-data/marker.txt

# Create a subdirectory for the subpath demo
mkdir -p /tmp/opensandbox-data/datasets/train
echo -e "id,value\n1,100\n2,200\n3,300" > /tmp/opensandbox-data/datasets/train/data.csv
```

### 4. Install SDK from Source

Volume support requires the latest SDK built from source (not yet available in the released package):

```shell
# From the project root (recommended: use uv)
uv pip install -e sdks/sandbox/python

# Or use pip inside a virtual environment
# python3 -m venv .venv && source .venv/bin/activate
# pip install -e sdks/sandbox/python
```

### 5. Pull the Sandbox Image

```shell
docker pull ubuntu:latest
```

## Run

```shell
HOST_VOLUME_PATH=/tmp/opensandbox-data uv run python examples/host-volume-mount/main.py
```

## Expected Output

```text
Using HOST_VOLUME_PATH: /tmp/opensandbox-data

OpenSandbox server : localhost:8080
Sandbox image      : ubuntu
Host volume path   : /tmp/opensandbox-data

============================================================
Scenario 1: Read-Write Host Volume Mount
============================================================
  Host path : /tmp/opensandbox-data
  Mount path: /mnt/shared

  [1] Listing files visible from inside the sandbox:
  total 12
  drwxrwxrwx 3 root root 4096 ... .
  drwxr-xr-x 1 root root 4096 ... ..
  -rw-r--r-- 1 root root   16 ... marker.txt
  drwxr-xr-x 3 root root 4096 ... datasets

  [2] Writing a file from inside the sandbox:
  -> Written: /mnt/shared/sandbox-greeting.txt

  [3] Reading back the file:
  Hello from sandbox!

  [4] Verified on host: /tmp/opensandbox-data/sandbox-greeting.txt
      Content: Hello from sandbox!

  Scenario 1 completed.

============================================================
Scenario 2: Read-Only Host Volume Mount
============================================================
  Host path : /tmp/opensandbox-data
  Mount path: /mnt/readonly

  [1] Reading files from read-only mount:
  ...

  [2] Reading marker.txt:
  hello-from-host

  [3] Attempting to write (should fail):
  Write denied (expected)

  Scenario 2 completed.

============================================================
Scenario 3: SubPath Host Volume Mount
============================================================
  Host path : /tmp/opensandbox-data
  SubPath   : datasets/train
  Mount path: /mnt/training-data

  [1] Listing mounted subpath content:
  ...
  -rw-r--r-- 1 root root   28 ... data.csv

  [2] Reading data.csv:
  id,value
  1,100
  2,200
  3,300

  Scenario 3 completed.

============================================================
All scenarios completed successfully!
============================================================
```

## SDK Usage Quick Reference

### Python (async)

```python
from opensandbox import Sandbox
from opensandbox.models.sandboxes import Host, Volume

sandbox = await Sandbox.create(
    image="ubuntu",
    volumes=[
        Volume(
            name="my-data",
            host=Host(path="/data/shared"),
            mountPath="/mnt/data",
            readOnly=False,       # optional, default is False
            subPath="subdir",     # optional, mount a subdirectory
        ),
    ],
)
```

### Python (sync)

```python
from opensandbox import SandboxSync
from opensandbox.models.sandboxes import Host, Volume

sandbox = SandboxSync.create(
    image="ubuntu",
    volumes=[
        Volume(
            name="my-data",
            host=Host(path="/data/shared"),
            mountPath="/mnt/data",
        ),
    ],
)
```

### JavaScript / TypeScript

```typescript
import { Sandbox } from "@alibaba-group/opensandbox";

const sandbox = await Sandbox.create({
  image: "ubuntu",
  volumes: [
    {
      name: "my-data",
      host: { path: "/data/shared" },
      mountPath: "/mnt/data",
      readOnly: false,
    },
  ],
});
```

### Java / Kotlin

```java
Volume volume = Volume.builder()
    .name("my-data")
    .host(Host.of("/data/shared"))
    .mountPath("/mnt/data")
    .readOnly(false)
    .build();

Sandbox sandbox = Sandbox.builder()
    .image("ubuntu")
    .volume(volume)
    .build();
```

## References

- [OSEP-0003: Volume and VolumeBinding Support](../../oseps/0003-volume-and-volumebinding-support.md) — Design proposal
- [Sandbox Lifecycle API Spec](../../specs/sandbox-lifecycle.yml) — OpenAPI schema for volume definitions
- [Server Configuration](../../server/example.config.toml) — `[storage]` section for `allowed_host_paths`


================================================
FILE: examples/host-volume-mount/README_zh.md
================================================
# 宿主机目录挂载示例

本示例演示如何使用 OpenSandbox Volume API 将宿主机目录挂载到沙箱容器中。宿主机目录挂载支持宿主机与沙箱环境之间的双向文件共享，适用于共享数据集、模型检查点、配置文件或收集沙箱输出等场景。

## 演示场景

| # | 场景 | 说明 |
|---|------|------|
| 1 | **读写挂载** | 挂载宿主机目录，支持双向文件读写 |
| 2 | **只读挂载** | 提供沙箱不可修改的共享数据 |
| 3 | **SubPath 挂载** | 仅挂载宿主机路径下的指定子目录 |

## 前置条件

### 1. 启动 OpenSandbox 服务

```shell
git clone git@github.com:alibaba/OpenSandbox.git
cd OpenSandbox/server
cp example.config.zh.toml ~/.sandbox.toml
uv sync && uv run python -m src.main
```

### 2. 配置允许的宿主机路径

出于安全考虑，服务端会限制可挂载的宿主机路径。请在 `~/.sandbox.toml` 中添加 `[storage]` 配置段：

```toml
[storage]
# 允许进行 bind mount 的宿主机路径前缀白名单。
# 仅匹配这些前缀的路径才能被挂载到沙箱中。
# 如果为空，则允许所有路径（不建议在生产环境使用）。
allowed_host_paths = ["/tmp/opensandbox-data", "/data/shared"]
```

> **安全提示**：在生产环境中，请务必设置明确的 `allowed_host_paths`，以防止沙箱访问敏感的宿主机目录。空列表表示允许所有路径，适合本地开发，但不适用于共享环境。

### 3. 创建宿主机目录

```shell
# 创建与沙箱共享的目录
mkdir -p /tmp/opensandbox-data
echo "hello-from-host" > /tmp/opensandbox-data/marker.txt

# 创建用于 subpath 演示的子目录
mkdir -p /tmp/opensandbox-data/datasets/train
echo -e "id,value\n1,100\n2,200\n3,300" > /tmp/opensandbox-data/datasets/train/data.csv
```

### 4. 从源码安装 SDK

Volume 功能需要从源码安装最新版 SDK：

```shell
# 在项目根目录下执行（推荐使用 uv）
uv pip install -e sdks/sandbox/python

# 或者使用 pip（需要在虚拟环境中执行）
# python3 -m venv .venv && source .venv/bin/activate
# pip install -e sdks/sandbox/python
```

### 5. 拉取沙箱镜像

```shell
docker pull registry.cn-hangzhou.aliyuncs.com/acs/ubuntu:latest
```

## 运行

```shell
SANDBOX_IMAGE=registry.cn-hangzhou.aliyuncs.com/acs/ubuntu:latest \
  HOST_VOLUME_PATH=/tmp/opensandbox-data uv run python examples/host-volume-mount/main.py
```

## 预期输出

```text
Using HOST_VOLUME_PATH: /tmp/opensandbox-data

OpenSandbox server : localhost:8080
Sandbox image      : registry.cn-hangzhou.aliyuncs.com/acs/ubuntu:latest
Host volume path   : /tmp/opensandbox-data

============================================================
Scenario 1: Read-Write Host Volume Mount
============================================================
  Host path : /tmp/opensandbox-data
  Mount path: /mnt/shared

  [1] Listing files visible from inside the sandbox:
  total 4
drwxr-xr-x 1 root root 128 Feb  6 09:24 .
drwxr-xr-x 1 root root  12 Feb  6 11:50 ..
drwxr-xr-x 1 root root  96 Feb  6 09:24 datasets
-rw-r--r-- 1 root root  16 Feb  6 09:24 marker.txt

  [2] Writing a file from inside the sandbox:
  -> Written: /mnt/shared/sandbox-greeting.txt

  [3] Reading back the file:
  Hello from sandbox!

  [4] Verified on host: /tmp/opensandbox-data/sandbox-greeting.txt
      Content: Hello from sandbox!

  Scenario 1 completed.

============================================================
Scenario 2: Read-Only Host Volume Mount
============================================================
  Host path : /tmp/opensandbox-data
  Mount path: /mnt/readonly

  [1] Reading files from read-only mount:
  total 8
drwxr-xr-x 1 root root 160 Feb  6 11:50 .
drwxr-xr-x 1 root root  16 Feb  6 11:50 ..
drwxr-xr-x 1 root root  96 Feb  6 09:24 datasets
-rw-r--r-- 1 root root  16 Feb  6 09:24 marker.txt
-rw-r--r-- 1 root root  20 Feb  6 11:50 sandbox-greeting.txt

  [2] Reading marker.txt:
  hello-from-host

  [3] Attempting to write (should fail):
  touch: cannot touch '/mnt/readonly/should-fail.txt': Read-only file system
  Write denied (expected)

  Scenario 2 completed.

============================================================
Scenario 3: SubPath Host Volume Mount
============================================================
  Host path : /tmp/opensandbox-data
  SubPath   : datasets/train
  Mount path: /mnt/training-data

  [1] Listing mounted subpath content:
  total 4
drwxr-xr-x 1 root root 96 Feb  6 09:24 .
drwxr-xr-x 1 root root 26 Feb  6 11:50 ..
-rw-r--r-- 1 root root 27 Feb  6 11:50 data.csv

  [2] Reading data.csv:
  id,value
1,100
2,200
3,300

  Scenario 3 completed.

============================================================
All scenarios completed successfully!
============================================================
```

## 各 SDK 用法速览

### Python（异步）

```python
from opensandbox import Sandbox
from opensandbox.models.sandboxes import Host, Volume

sandbox = await Sandbox.create(
    image="ubuntu",
    volumes=[
        Volume(
            name="my-data",
            host=Host(path="/data/shared"),
            mountPath="/mnt/data",
            readOnly=False,       # 可选，默认为 False
            subPath="subdir",     # 可选，挂载子目录
        ),
    ],
)
```

### Python（同步）

```python
from opensandbox import SandboxSync
from opensandbox.models.sandboxes import Host, Volume

sandbox = SandboxSync.create(
    image="ubuntu",
    volumes=[
        Volume(
            name="my-data",
            host=Host(path="/data/shared"),
            mountPath="/mnt/data",
        ),
    ],
)
```

### JavaScript / TypeScript

```typescript
import { Sandbox } from "@alibaba-group/opensandbox";

const sandbox = await Sandbox.create({
  image: "ubuntu",
  volumes: [
    {
      name: "my-data",
      host: { path: "/data/shared" },
      mountPath: "/mnt/data",
      readOnly: false,
    },
  ],
});
```

### Java / Kotlin

```java
Volume volume = Volume.builder()
    .name("my-data")
    .host(Host.of("/data/shared"))
    .mountPath("/mnt/data")
    .readOnly(false)
    .build();

Sandbox sandbox = Sandbox.builder()
    .image("ubuntu")
    .volume(volume)
    .build();
```

## 参考资料

- [OSEP-0003: Volume 与 VolumeBinding 支持](../../oseps/0003-volume-and-volumebinding-support.md) — 设计提案
- [Sandbox Lifecycle API 规范](../../specs/sandbox-lifecycle.yml) — Volume 定义的 OpenAPI 规范
- [服务端配置示例](../../server/example.config.zh.toml) — `[storage]` 段中的 `allowed_host_paths` 配置


================================================
FILE: examples/host-volume-mount/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Host Volume Mount Example
=========================

Demonstrates how to mount a host directory into a sandbox container using
the OpenSandbox Volume API. This enables sharing files, datasets, or model
checkpoints between the host machine and sandbox environments.

Three scenarios are demonstrated:

1. **Read-write mount** - Share a working directory for bidirectional file exchange.
2. **Read-only mount**  - Provide shared datasets or configs that sandboxes should
   not modify.
3. **SubPath mount**    - Mount a specific subdirectory from the host path.

Prerequisites:
- OpenSandbox server running with Docker runtime
- Server config includes `[storage]` section with appropriate `allowed_host_paths`
- Host directories created before running this script (see README.md)
"""

import asyncio
import os
import tempfile
from datetime import timedelta
from pathlib import Path

from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig

try:
    from opensandbox.models.sandboxes import Host, Volume
except ImportError:
    print(
        "ERROR: Your installed opensandbox SDK does not include Volume/Host models.\n"
        "       Volume support requires the latest SDK from source.\n"
        "       Please install from the local repository:\n"
        "\n"
        "           pip install -e sdks/sandbox/python\n"
        "\n"
        "       See README.md for details."
    )
    raise SystemExit(1)


async def print_exec(sandbox: Sandbox, command: str) -> str | None:
    """Run a command in the sandbox and print/return stdout."""
    result = await sandbox.commands.run(command)
    if result.error:
        print(f"  [error] {result.error.name}: {result.error.value}")
        return None
    text = "\n".join(msg.text for msg in result.logs.stdout)
    if text:
        print(f"  {text}")
    return text


async def demo_readwrite_mount(config: ConnectionConfig, image: str, host_dir: str) -> None:
    """
    Scenario 1: Read-write mount.

    Mount a host directory into the sandbox at /mnt/shared. Write a file from
    inside the sandbox, then verify it appears on the host.
    """
    print("\n" + "=" * 60)
    print("Scenario 1: Read-Write Host Volume Mount")
    print("=" * 60)
    print(f"  Host path : {host_dir}")
    print(f"  Mount path: /mnt/shared")

    sandbox = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=2),
        volumes=[
            Volume(
                name="shared-data",
                host=Host(path=host_dir),
                mountPath="/mnt/shared",
                readOnly=False,
            ),
        ],
    )

    async with sandbox:
        try:
            # Read existing files from host
            print("\n  [1] Listing files visible from inside the sandbox:")
            await print_exec(sandbox, "ls -la /mnt/shared/")

            # Write a file from inside the sandbox
            print("\n  [2] Writing a file from inside the sandbox:")
            await print_exec(
                sandbox,
                "echo 'Hello from sandbox!' > /mnt/shared/sandbox-greeting.txt",
            )
            print("  -> Written: /mnt/shared/sandbox-greeting.txt")

            # Verify the file content
            print("\n  [3] Reading back the file:")
            await print_exec(sandbox, "cat /mnt/shared/sandbox-greeting.txt")

            # Check host-side: the file should now exist on the host
            host_file = Path(host_dir) / "sandbox-greeting.txt"
            if host_file.exists():
                print(f"\n  [4] Verified on host: {host_file}")
                print(f"      Content: {host_file.read_text().strip()}")
            else:
                print(f"\n  [4] Note: {host_file} not directly visible (expected on remote Docker)")

        finally:
            await sandbox.kill()

    print("\n  Scenario 1 completed.")


async def demo_readonly_mount(config: ConnectionConfig, image: str, host_dir: str) -> None:
    """
    Scenario 2: Read-only mount.

    Mount the same host directory as read-only. Verify reads work but writes
    are rejected by the container runtime.
    """
    print("\n" + "=" * 60)
    print("Scenario 2: Read-Only Host Volume Mount")
    print("=" * 60)
    print(f"  Host path : {host_dir}")
    print(f"  Mount path: /mnt/readonly")

    sandbox = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=2),
        volumes=[
            Volume(
                name="readonly-data",
                host=Host(path=host_dir),
                mountPath="/mnt/readonly",
                readOnly=True,
            ),
        ],
    )

    async with sandbox:
        try:
            # Read existing files
            print("\n  [1] Reading files from read-only mount:")
            await print_exec(sandbox, "ls -la /mnt/readonly/")

            # Read the marker file
            print("\n  [2] Reading marker.txt:")
            await print_exec(sandbox, "cat /mnt/readonly/marker.txt")

            # Attempt to write (should fail)
            print("\n  [3] Attempting to write (should fail):")
            result = await sandbox.commands.run(
                "touch /mnt/readonly/should-fail.txt 2>&1 || echo 'Write denied (expected)'"
            )
            for msg in result.logs.stdout:
                print(f"  {msg.text}")
            for msg in result.logs.stderr:
                print(f"  {msg.text}")

        finally:
            await sandbox.kill()

    print("\n  Scenario 2 completed.")


async def demo_subpath_mount(config: ConnectionConfig, image: str, host_dir: str) -> None:
    """
    Scenario 3: SubPath mount.

    Mount only a specific subdirectory from the host path. This is useful when
    the host path contains multiple datasets or project directories, and you
    want to expose only one of them.
    """
    print("\n" + "=" * 60)
    print("Scenario 3: SubPath Host Volume Mount")
    print("=" * 60)

    # Ensure subdirectory exists on host
    sub_dir = Path(host_dir) / "datasets" / "train"
    sub_dir.mkdir(parents=True, exist_ok=True)
    (sub_dir / "data.csv").write_text("id,value\n1,100\n2,200\n3,300\n")

    print(f"  Host path : {host_dir}")
    print(f"  SubPath   : datasets/train")
    print(f"  Mount path: /mnt/training-data")

    sandbox = await Sandbox.create(
        image=image,
        connection_config=config,
        timeout=timedelta(minutes=2),
        volumes=[
            Volume(
                name="training-data",
                host=Host(path=host_dir),
                mountPath="/mnt/training-data",
                subPath="datasets/train",
                readOnly=True,
            ),
        ],
    )

    async with sandbox:
        try:
            # List the mounted subdirectory
            print("\n  [1] Listing mounted subpath content:")
            await print_exec(sandbox, "ls -la /mnt/training-data/")

            # Read the CSV data
            print("\n  [2] Reading data.csv:")
            await print_exec(sandbox, "cat /mnt/training-data/data.csv")

        finally:
            await sandbox.kill()

    print("\n  Scenario 3 completed.")


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    image = os.getenv("SANDBOX_IMAGE", "ubuntu")
    host_dir = os.getenv("HOST_VOLUME_PATH", "")

    # If no host path specified, create a temporary directory with sample data
    if not host_dir:
        host_dir = tempfile.mkdtemp(prefix="opensandbox-vol-")
        print(f"No HOST_VOLUME_PATH set, using temporary directory: {host_dir}")
        marker = Path(host_dir) / "marker.txt"
        marker.write_text("hello-from-host\n")
        print(f"Created marker file: {marker}")
    else:
        print(f"Using HOST_VOLUME_PATH: {host_dir}")

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(minutes=3),
    )

    print(f"\nOpenSandbox server : {config.domain}")
    print(f"Sandbox image      : {image}")
    print(f"Host volume path   : {host_dir}")

    await demo_readwrite_mount(config, image, host_dir)
    await demo_readonly_mount(config, image, host_dir)
    await demo_subpath_mount(config, image, host_dir)

    print("\n" + "=" * 60)
    print("All scenarios completed successfully!")
    print("=" * 60)


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/kimi-cli/README.md
================================================
# Kimi CLI Example

Run [Kimi Code CLI](https://github.com/MoonshotAI/kimi-cli) (Moonshot AI) inside an OpenSandbox container.

## Start OpenSandbox server [local]

Pre-pull the code-interpreter image (includes Python 3.12+):

```shell
docker pull sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2

# use docker hub
# docker pull opensandbox/code-interpreter:v1.0.2
```

Then start the local OpenSandbox server, stdout logs will be visible in the terminal:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

## Create and Access the Kimi Sandbox

```shell
# Install OpenSandbox package
uv pip install opensandbox

# Run the example (requires SANDBOX_DOMAIN / SANDBOX_API_KEY / KIMI_API_KEY)
uv run python examples/kimi-cli/main.py
```

The script installs Kimi Code CLI (`pip install kimi-cli`) at runtime (Python 3.12+ is already in the code-interpreter image), then sends a simple request `kimi -p "Compute 1+1=?."`. Auth is passed via `KIMI_API_KEY`, and you can override endpoint/model with `KIMI_BASE_URL` / `KIMI_MODEL_NAME`.

## Environment Variables

- `SANDBOX_DOMAIN`: Sandbox service address (default: `localhost:8080`)
- `SANDBOX_API_KEY`: API key if your server requires authentication (optional for local)
- `SANDBOX_IMAGE`: Sandbox image to use (default: `sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2`)
- `KIMI_API_KEY`: Your Moonshot AI / Kimi API key (required)
- `KIMI_BASE_URL`: Kimi API endpoint (optional; defaults to Kimi's official endpoint)
- `KIMI_MODEL_NAME`: Model to use (default: `kimi-k2.5`)

## References
- [Kimi Code CLI](https://github.com/MoonshotAI/kimi-cli) - Official Kimi Code CLI repository
- [Moonshot AI Platform](https://platform.moonshot.ai/) - API key management and documentation
- [Kimi CLI Documentation](https://moonshotai.github.io/kimi-cli/en/) - Full CLI documentation


================================================
FILE: examples/kimi-cli/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
from datetime import timedelta

from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


def _required_env(name: str) -> str:
    value = os.getenv(name)
    if not value:
        raise RuntimeError(f"{name} is required")
    return value


async def _print_execution_logs(execution) -> None:
    for msg in execution.logs.stdout:
        print(f"[stdout] {msg.text}")
    for msg in execution.logs.stderr:
        print(f"[stderr] {msg.text}")
    if execution.error:
        print(f"[error] {execution.error.name}: {execution.error.value}")


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    kimi_api_key = _required_env("KIMI_API_KEY")
    kimi_base_url = os.getenv("KIMI_BASE_URL")
    kimi_model_name = os.getenv("KIMI_MODEL_NAME", "kimi-k2.5")
    image = os.getenv(
        "SANDBOX_IMAGE",
        "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2",
    )

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(seconds=60),
    )

    # Inject Kimi settings into container environment for CLI access
    env = {
        "KIMI_API_KEY": kimi_api_key,
        "KIMI_BASE_URL": kimi_base_url,
        "KIMI_MODEL_NAME": kimi_model_name,
    }
    # Drop None values to avoid overriding defaults inside CLI
    env = {k: v for k, v in env.items() if v is not None}

    sandbox = await Sandbox.create(
        image,
        connection_config=config,
        env=env,
    )

    async with sandbox:
        # Install Kimi CLI (Python 3.12+ is already in the code-interpreter image)
        install_exec = await sandbox.commands.run(
            "pip install kimi-cli"
        )
        await _print_execution_logs(install_exec)

        # Use Kimi CLI to send a message in non-interactive mode
        run_exec = await sandbox.commands.run(
            'kimi -p "Compute 1+1=?."'
        )
        await _print_execution_logs(run_exec)

        await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/kubernetes-pvc-volume-mount/README.md
================================================
# Kubernetes PVC Volume Mount Example

This example demonstrates how to mount Kubernetes PersistentVolumeClaims (PVC) into OpenSandbox containers for persistent storage. Data written to a PVC persists across sandbox lifecycles -- when a sandbox is killed and a new one is created with the same PVC, previously written data is still available.

## Prerequisites

### 1. CSI Driver

Kubernetes PVC requires a [Container Storage Interface (CSI)](https://kubernetes-csi.github.io/docs/drivers.html) driver to provision and manage storage. Install the CSI driver that matches your storage backend. Refer to your storage vendor's documentation for installation instructions.

For example, [Alibaba Cloud CSI Driver](https://github.com/kubernetes-sigs/alibaba-cloud-csi-driver) supports the following storage types:

- **Cloud Disk (EBS)** -- block storage, suitable for high-performance single-node read-write scenarios
- **NAS** -- shared file storage, supports multi-node read-write (ReadWriteMany)
- **OSS** -- object storage, suitable for large-scale data read and shared access scenarios
- **CPFS** -- high-performance parallel file system
- **LVM** -- local volume management

### 2. Create a PersistentVolumeClaim

Create a PVC in the namespace where OpenSandbox runs:

```yaml
# pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: my-pvc
  namespace: opensandbox
spec:
  accessModes:
    - ReadWriteOnce
  storageClassName: <your-storage-class>
  resources:
    requests:
      storage: 10Gi
```

```shell
kubectl apply -f pvc.yaml
```

Verify the PVC is bound:

```shell
kubectl get pvc my-pvc -n opensandbox
```

### 3. OpenSandbox Server

Ensure the OpenSandbox server is running with Kubernetes runtime and BatchSandbox workload provider.

### 4. Python SDK

```shell
uv pip install opensandbox
```

## Run the Example

```shell
export OPEN_SANDBOX_API_KEY=your-api-key
export OPEN_SANDBOX_BASE_URL=http://localhost:8080
export SANDBOX_PVC_NAME=my-pvc

python examples/kubernetes-pvc-volume-mount/main.py
```

## What the Example Does

1. Creates a sandbox with a PVC mounted at `/mnt/data`
2. Writes a test file to the PVC
3. Reads the file back to verify
4. Kills the sandbox
5. Creates a new sandbox with the same PVC
6. Reads the file again to verify data persistence across sandbox lifecycles

## Usage

```python
from opensandbox import Sandbox
from opensandbox.models.sandboxes import PVC, Volume

sandbox = await Sandbox.create(
    image="python:3.11",
    volumes=[
        Volume(
            name="data-volume",
            pvc=PVC(claimName="my-pvc"),
            mountPath="/mnt/data",
            readOnly=False,
        ),
    ],
)

# Run commands against the mounted volume
result = await sandbox.commands.run("ls -la /mnt/data")
output = "\n".join(msg.text for msg in result.logs.stdout)
print(output)
```

## Important Notes

- **Pool mode does not support volumes.** Use template mode instead.
- PVC must exist before creating the sandbox.
- PVC is not deleted when the sandbox is killed.
- Multiple sandboxes can mount the same PVC if the access mode allows (e.g. `ReadWriteMany`).

## References

- [OSEP-0003: Volume and VolumeBinding Support](../../oseps/0003-volume-and-volumebinding-support.md)
- [Kubernetes CSI Drivers](https://kubernetes-csi.github.io/docs/drivers.html)
- [Alibaba Cloud CSI Driver](https://github.com/kubernetes-sigs/alibaba-cloud-csi-driver)


================================================
FILE: examples/kubernetes-pvc-volume-mount/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#!/usr/bin/env python3
"""
Kubernetes PVC Volume Mount Example

This example demonstrates how to use PersistentVolumeClaims (PVC) with OpenSandbox
in a Kubernetes environment. It verifies that data written to a PVC persists across
sandbox lifecycles.

Prerequisites:
1. Kubernetes cluster with CSI driver installed
2. PVC created in the target namespace
3. OpenSandbox server running with Kubernetes runtime

Usage:
    export OPEN_SANDBOX_API_KEY=your-api-key
    export OPEN_SANDBOX_BASE_URL=http://localhost:8080
    python main.py
"""

import asyncio
import os
from datetime import timedelta
from opensandbox import Sandbox
from opensandbox.models.sandboxes import PVC, Volume
from opensandbox.config.connection import ConnectionConfig

# Configuration
PVC_NAME = os.getenv("SANDBOX_PVC_NAME", "my-pvc")
IMAGE = os.getenv("SANDBOX_IMAGE", "python:3.11")

# Connection config with extended timeout for sandbox creation
CONNECTION_CONFIG = ConnectionConfig(
    request_timeout=timedelta(minutes=10),
)

VOLUMES = [
    Volume(
        name="data-volume",
        pvc=PVC(claimName=PVC_NAME),
        mountPath="/mnt/data",
        readOnly=False,
    ),
]


async def basic_pvc_mount():
    print("\n" + "=" * 60)
    print("Step 1: Create sandbox and write data to PVC")
    print("=" * 60)
    print(f"  PVC name  : {PVC_NAME}")
    print(f"  Mount path: /mnt/data")
    print()

    sandbox = await Sandbox.create(
        image=IMAGE,
        timeout=timedelta(minutes=10),
        ready_timeout=timedelta(minutes=10),
        volumes=VOLUMES,
        connection_config=CONNECTION_CONFIG,
    )
    print(f"  Created sandbox: {sandbox.id}")

    # Write a test file to PVC
    await sandbox.commands.run(
        "python -c \"with open('/mnt/data/sandbox-test.txt', 'w') as f: f.write('Hello from OpenSandbox!')\""
    )
    print("  Written test file to /mnt/data/sandbox-test.txt")

    # Read it back
    result = await sandbox.commands.run("cat /mnt/data/sandbox-test.txt")
    content = "\n".join(msg.text for msg in result.logs.stdout)
    print(f"  Read back: {content.strip()}")

    # Kill the sandbox
    await sandbox.kill()
    print("  Sandbox killed.")

    # ---- Verify persistence ----
    print("\n" + "=" * 60)
    print("Step 2: Create new sandbox and verify data persistence")
    print("=" * 60)

    sandbox2 = await Sandbox.create(
        image=IMAGE,
        timeout=timedelta(minutes=10),
        ready_timeout=timedelta(minutes=10),
        volumes=VOLUMES,
        connection_config=CONNECTION_CONFIG,
    )
    print(f"  Created new sandbox: {sandbox2.id}")

    # Read the file written by the previous sandbox
    result = await sandbox2.commands.run("cat /mnt/data/sandbox-test.txt")
    content = "\n".join(msg.text for msg in result.logs.stdout).strip()
    print(f"  Read back from new sandbox: {content}")

    if content == "Hello from OpenSandbox!":
        print("  Data persistence verified!")
    else:
        print(f"  ERROR: Expected 'Hello from OpenSandbox!', got '{content}'")

    await sandbox2.kill()
    print("  Sandbox killed.")


async def main():
    """Main entry point."""
    print("\n" + "=" * 60)
    print("OpenSandbox Kubernetes PVC Volume Mount Example")
    print("=" * 60)
    print(f"PVC Name   : {PVC_NAME}")
    print(f"Image      : {IMAGE}")
    print()

    try:
        await basic_pvc_mount()

        print("\n" + "=" * 60)
        print("All steps completed successfully!")
        print("=" * 60)

    except Exception as e:
        print(f"\nError: {e}")
        raise


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/langgraph/README.md
================================================
# LangGraph Agent + OpenSandbox Example

Integrate LangGraph with OpenSandbox using a graph-driven control flow. The example uses
explicit state machine nodes to create, prepare, run, inspect, and clean up a sandbox, plus
a decision node to retry with a fallback command if the run step fails.

## Start OpenSandbox server [local]

Pre-pull the code-interpreter image (includes Python):

```shell
docker pull sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2

# use docker hub
# docker pull opensandbox/code-interpreter:v1.0.2
```

Start the local OpenSandbox server, logs will be visible in the terminal:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

## Run the example

```shell
# Install OpenSandbox + LangGraph deps
uv pip install opensandbox langgraph langchain-anthropic

# Run the example (requires SANDBOX_DOMAIN / SANDBOX_API_KEY / ANTHROPIC_API_KEY)
uv run python examples/langgraph/main.py
```

The workflow writes files, executes a job, retries with a fallback command on failure (default
`python` vs `python3`), then summarizes results with Claude and cleans up the sandbox instance.

![LangGraph + OpenSandbox screenshot](./screenshot.jpg)

## Environment Variables

- `SANDBOX_DOMAIN`: Sandbox service address (default: `localhost:8080`)
- `SANDBOX_API_KEY`: API key if your server requires authentication (optional for local)
- `SANDBOX_IMAGE`: Sandbox image to use (default: `sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2`)
- `ANTHROPIC_API_KEY`: Your Anthropic API key (required if `ANTHROPIC_AUTH_TOKEN` is unset)
- `ANTHROPIC_AUTH_TOKEN`: Alternate Anthropic auth token (uses `Authorization` header)
- `ANTHROPIC_API_KEY` and `ANTHROPIC_AUTH_TOKEN` should not be set together
- `ANTHROPIC_BASE_URL`: Anthropic API endpoint override (optional)
- `ANTHROPIC_MODEL`: Model to use (default: `claude-3-5-sonnet-latest`)

## References
- [LangGraph](https://langchain-ai.github.io/langgraph/) - Agent workflow framework


================================================
FILE: examples/langgraph/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from datetime import timedelta
from typing import TypedDict

from langchain_anthropic import ChatAnthropic
from langgraph.graph import END, StateGraph
from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


class WorkflowState(TypedDict):
    sandbox: Sandbox | None
    run_output: str
    summary: str
    last_error: str
    attempt: int
    max_attempts: int
    command: str
    fallback_command: str
    cleaned: bool


def _configure_anthropic_env() -> None:
    api_key = os.getenv("ANTHROPIC_API_KEY")
    auth_token = os.getenv("ANTHROPIC_AUTH_TOKEN")

    if auth_token:
        os.environ["ANTHROPIC_AUTH_TOKEN"] = auth_token
        os.environ.pop("ANTHROPIC_API_KEY", None)
        return

    if api_key:
        os.environ["ANTHROPIC_API_KEY"] = api_key
        os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
        return

    raise RuntimeError("ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN is required")


def _build_llm() -> ChatAnthropic:
    _configure_anthropic_env()
    anthropic_base_url = os.getenv("ANTHROPIC_BASE_URL")
    model_name = os.getenv("ANTHROPIC_MODEL", "claude-3-5-sonnet-latest")

    return ChatAnthropic(
        model=model_name,
        anthropic_api_url=anthropic_base_url,
    )


def _format_execution(execution) -> str:
    stdout = "\n".join(msg.text for msg in execution.logs.stdout)
    stderr = "\n".join(msg.text for msg in execution.logs.stderr)

    if execution.error:
        stderr = "\n".join(
            [
                stderr,
                f"[error] {execution.error.name}: {execution.error.value}",
            ]
        ).strip()

    output = stdout.strip()
    if stderr:
        output = "\n".join([output, f"[stderr]\n{stderr}"]).strip()
    return output or "(no output)"


async def create_sandbox(state: WorkflowState) -> WorkflowState:
    print("[create] Creating sandbox")
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    image = os.getenv(
        "SANDBOX_IMAGE",
        "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2",
    )

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(seconds=120),
    )

    sandbox = await Sandbox.create(
        image,
        connection_config=config,
    )

    print(f"[create] Sandbox ready: {sandbox.id}")

    return {**state, "sandbox": sandbox}


async def prepare_workspace(state: WorkflowState) -> WorkflowState:
    print("[prepare] Writing job files")
    sandbox = state["sandbox"]
    if sandbox is None:
        raise RuntimeError("Sandbox not initialized")

    await sandbox.files.write_file(
        "/tmp/math.py",
        "result = 137 * 42\nprint(result)\n",
    )
    await sandbox.files.write_file(
        "/tmp/notes.txt",
        "LangGraph + OpenSandbox\n",
    )

    print("[prepare] Files written")

    return state


async def run_job(state: WorkflowState) -> WorkflowState:
    attempt = state["attempt"] + 1
    max_attempts = state["max_attempts"]
    command = state.get("command") or "python3 /tmp/math.py"
    print(f"[run] Executing job (attempt {attempt}/{max_attempts})")
    sandbox = state["sandbox"]
    if sandbox is None:
        raise RuntimeError("Sandbox not initialized")

    execution = await sandbox.commands.run(command)
    run_output = _format_execution(execution)
    last_error = ""
    next_command = command

    if execution.error:
        last_error = f"{execution.error.name}: {execution.error.value}"
        if attempt < max_attempts:
            next_command = state.get("fallback_command", "python /tmp/math.py")
            print(f"[run] Failed, scheduling fallback: {next_command}")

    print(f"[run] Output: {run_output}")

    return {
        **state,
        "run_output": run_output,
        "last_error": last_error,
        "attempt": attempt,
        "command": next_command,
    }


def decide_next(state: WorkflowState) -> str:
    if state.get("last_error") and state["attempt"] < state["max_attempts"]:
        print("[decide] Retry with fallback command")
        return "run"

    print("[decide] Proceeding to inspect")
    return "inspect"


async def inspect_results(state: WorkflowState) -> WorkflowState:
    print("[inspect] Reading notes and summarizing")
    sandbox = state["sandbox"]
    if sandbox is None:
        raise RuntimeError("Sandbox not initialized")

    notes = await sandbox.files.read_file("/tmp/notes.txt")
    llm = _build_llm()
    prompt = (
        "Summarize the sandbox run result and notes in one sentence. "
        f"Run output: {state.get('run_output', '')}. "
        f"Notes: {notes.strip()}."
    )
    response = await llm.ainvoke(prompt)

    print(f"[inspect] Summary: {response.content}")

    return {**state, "summary": response.content}


async def cleanup_sandbox(state: WorkflowState) -> WorkflowState:
    print("[cleanup] Cleaning up sandbox")
    sandbox = state.get("sandbox")
    if sandbox is not None:
        await sandbox.kill()
        await sandbox.close()

    print("[cleanup] Done")

    return {**state, "sandbox": None, "cleaned": True}


async def main() -> None:
    graph = StateGraph(WorkflowState)
    graph.add_node("create", create_sandbox)
    graph.add_node("prepare", prepare_workspace)
    graph.add_node("run", run_job)
    graph.add_node("inspect", inspect_results)
    graph.add_node("cleanup", cleanup_sandbox)
    graph.set_entry_point("create")
    graph.add_edge("create", "prepare")
    graph.add_edge("prepare", "run")
    graph.add_conditional_edges(
        "run",
        decide_next,
        {
            "run": "run",
            "inspect": "inspect",
        },
    )
    graph.add_edge("inspect", "cleanup")
    graph.add_edge("cleanup", END)
    app = graph.compile()

    initial_state = {
        "sandbox": None,
        "run_output": "",
        "summary": "",
        "last_error": "",
        "attempt": 0,
        "max_attempts": 2,
        "command": "python3 /tmp/math.py",
        "fallback_command": "python /tmp/math.py",
        "cleaned": False,
    }

    state = initial_state
    try:
        async for update in app.astream(initial_state, stream_mode="values"):
            state = update
    finally:
        if not state.get("cleaned"):
            sandbox = state.get("sandbox")
            if sandbox is not None:
                await sandbox.kill()
                await sandbox.close()

    print(f"Run output: {state['run_output']}")
    print(f"Summary: {state['summary']}")


if __name__ == "__main__":
    import asyncio

    asyncio.run(main())


================================================
FILE: examples/nullclaw/README.md
================================================
# Nullclaw Gateway Example

Launch a [Nullclaw](https://github.com/nullclaw/nullclaw) Gateway inside an OpenSandbox instance and expose its HTTP endpoint. The script polls the gateway health check until it returns HTTP 200, then prints the reachable endpoint.

## Start OpenSandbox server [local]

You can find the latest Nullclaw container image [here](https://github.com/nullclaw/nullclaw/pkgs/container/nullclaw).

### Notes (Docker runtime requirement)

The server uses `runtime.type = "docker"` by default, so it **must** be able to reach a running Docker daemon.

- **Docker Desktop**: ensure Docker Desktop is running, then verify with `docker version`.
- **Colima (macOS)**: start it first (`colima start`) and export the socket before starting the server:

```shell
export DOCKER_HOST="unix://${HOME}/.colima/default/docker.sock"
```

Pre-pull the Nullclaw image:

```shell
docker pull ghcr.io/nullclaw/nullclaw:latest
```

Start the OpenSandbox server (logs will stay in the terminal):

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

If you see errors like `FileNotFoundError: [Errno 2] No such file or directory` from `docker/transport/unixconn.py`, it usually means the Docker unix socket is missing or Docker is not running.

## Create and Access the Nullclaw Sandbox

This example is hard-coded for a quick start:
- OpenSandbox server: `http://localhost:8080`
- Image: `ghcr.io/nullclaw/nullclaw:latest`
- Gateway port: `3000`
- Timeout: `3600s`

Install dependencies from the project root:

```shell
uv pip install opensandbox requests
```

Run the example:

```shell
uv run python examples/nullclaw/main.py
```

You should see output similar to:

```text
Creating nullclaw sandbox with image=ghcr.io/nullclaw/nullclaw:latest on OpenSandbox server http://localhost:8080...
[check] sandbox ready after 0.3s
Nullclaw gateway started. Please refer to 127.0.0.1:56234
```

The endpoint printed at the end (e.g., `127.0.0.1:56234`) is the Nullclaw Gateway address exposed from the sandbox.

By default, Nullclaw requires pairing before authenticated endpoints (for example, `/webhook`) can be used. The `/health` endpoint remains publicly accessible.

## References
- [Nullclaw](https://github.com/nullclaw/nullclaw) — Minimal AI assistant runtime (678 KB static Zig binary)
- [Nullclaw Documentation](https://nullclaw.github.io) — Full documentation
- [OpenSandbox Python SDK](https://pypi.org/project/opensandbox/)


================================================
FILE: examples/nullclaw/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import time
from datetime import timedelta

import requests
from opensandbox import SandboxSync
from opensandbox.config import ConnectionConfigSync
from opensandbox.models.sandboxes import NetworkPolicy, NetworkRule


def check_nullclaw(sbx: SandboxSync) -> bool:
    """
    Health check: poll nullclaw gateway until it returns 200.

    Returns:
        True  when ready
        False on timeout or any exception
    """
    try:
        endpoint = sbx.get_endpoint(3000)
        start = time.perf_counter()
        url = f"http://{endpoint.endpoint}/health"
        for _ in range(150):  # max for ~30s
            try:
                resp = requests.get(url, timeout=1)
                if resp.status_code == 200:
                    elapsed = time.perf_counter() - start
                    print(f"[check] sandbox ready after {elapsed:.1f}s")
                    return True
            except Exception:
                pass
            time.sleep(0.2)
        return False
    except Exception as exc:
        print(f"[check] failed: {exc}")
        return False


def main() -> None:
    server = "http://localhost:8080"
    image = "ghcr.io/nullclaw/nullclaw:latest"
    timeout_seconds = 3600  # 1 hour

    print(f"Creating nullclaw sandbox with image={image} on OpenSandbox server {server}...")
    sandbox = SandboxSync.create(
        image=image,
        timeout=timedelta(seconds=timeout_seconds),
        metadata={"example": "nullclaw"},
        connection_config=ConnectionConfigSync(domain=server),
        health_check=check_nullclaw,
        # use network policy to limit nullclaw network accesses
        network_policy=NetworkPolicy(
            defaultAction="deny",
            egress=[NetworkRule(action="allow", target="openrouter.ai")],
        ),
    )

    endpoint = sandbox.get_endpoint(3000)
    print(f"Nullclaw gateway started. Please refer to {endpoint.endpoint}")


if __name__ == "__main__":
    main()


================================================
FILE: examples/openclaw/README.md
================================================
# OpenClaw Gateway Example

Launch an [OpenClaw](https://github.com/openclaw/openclaw) Gateway inside an OpenSandbox instance and expose its HTTP endpoint. The script polls the gateway until it returns HTTP 200, then prints the reachable endpoint.

## Start OpenSandbox server [local]

You can find the latest OpenClaw container image [here](https://github.com/openclaw/openclaw/pkgs/container/openclaw).

### Notes (Docker runtime requirement)

The server uses `runtime.type = "docker"` by default, so it **must** be able to reach a running Docker daemon.

- **Docker Desktop**: ensure Docker Desktop is running, then verify with `docker version`.
- **Colima (macOS)**: start it first (`colima start`) and export the socket before starting the server:

```shell
export DOCKER_HOST="unix://${HOME}/.colima/default/docker.sock"
```

Pre-pull the OpenClaw image:

```shell
docker pull ghcr.io/openclaw/openclaw:latest
```

Start the OpenSandbox server (logs will stay in the terminal):

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

If you see errors like `FileNotFoundError: [Errno 2] No such file or directory` from `docker/transport/unixconn.py`, it usually means the Docker unix socket is missing or Docker is not running.

## Create and Access the OpenClaw Sandbox

This example is hard-coded for a quick start:
- OpenSandbox server: `http://localhost:8080`
- Image: `ghcr.io/openclaw/openclaw:latest`
- Gateway port: `18789`
- Timeout: `3600s`
- Token: `OPENCLAW_GATEWAY_TOKEN` (default: `dummy-token-for-sandbox`)

Install dependencies from the project root:

```shell
uv pip install opensandbox requests
```

Run the example (set a real token if you need authenticated access):

```shell
export OPENCLAW_GATEWAY_TOKEN="$(openssl rand -hex 32)"
uv run python examples/openclaw/main.py
```

You should see output similar to:

```text
Creating openclaw sandbox with image=ghcr.io/openclaw/openclaw:latest on OpenSandbox server http://localhost:8080...
[check] sandbox ready after 7.1s
Openclaw started finished. Please refer to 127.0.0.1:56123
```

The endpoint printed at the end (e.g., `127.0.0.1:56123`) is the OpenClaw Gateway address exposed from the sandbox.

## References
- [OpenClaw](https://github.com/openclaw/openclaw)
- [OpenSandbox Python SDK](https://pypi.org/project/opensandbox/)


================================================
FILE: examples/openclaw/README_zh.md
================================================
# OpenClaw Gateway 示例

在 OpenSandbox 沙箱实例中启动 [OpenClaw](https://github.com/openclaw/openclaw) Gateway，并暴露 HTTP 访问端点。脚本会轮询 Gateway，直到返回 HTTP 200，然后打印可访问地址。

## 启动 OpenSandbox Server（本地）

最新 OpenClaw 镜像可在这里查看：[OpenClaw Container Registry](https://github.com/openclaw/openclaw/pkgs/container/openclaw)。

### 注意事项（Docker 运行时要求）

默认情况下，OpenSandbox Server 使用 `runtime.type = "docker"`，因此 **必须** 能访问可用的 Docker daemon。

- **Docker Desktop**：确保已启动，然后执行 `docker version` 验证。
- **Colima（macOS）**：先启动 (`colima start`)，再在启动 server 前导出 socket：

```shell
export DOCKER_HOST="unix://${HOME}/.colima/default/docker.sock"
```

预拉取 OpenClaw 镜像：

```shell
docker pull ghcr.io/openclaw/openclaw:latest
```

启动 OpenSandbox Server（日志会持续输出在当前终端）：

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

如果出现 `docker/transport/unixconn.py` 的 `FileNotFoundError: [Errno 2] No such file or directory`，通常表示 Docker unix socket 不存在或 Docker 未启动。

## 创建并访问 OpenClaw Sandbox

该示例为快速体验预置了以下参数：

- OpenSandbox Server：`http://localhost:8080`
- 镜像：`ghcr.io/openclaw/openclaw:latest`
- Gateway 端口：`18789`
- 超时时间：`3600s`
- Token：`OPENCLAW_GATEWAY_TOKEN`（默认：`dummy-token-for-sandbox`）

在项目根目录安装依赖：

```shell
uv pip install opensandbox requests
```

运行示例（如需鉴权访问请设置真实 token）：

```shell
export OPENCLAW_GATEWAY_TOKEN="$(openssl rand -hex 32)"
uv run python examples/openclaw/main.py
```

预期输出类似：

```text
Creating openclaw sandbox with image=ghcr.io/openclaw/openclaw:latest on OpenSandbox server http://localhost:8080...
[check] sandbox ready after 7.1s
Openclaw started finished. Please refer to 127.0.0.1:56123
```

最后打印的地址（如 `127.0.0.1:56123`）就是沙箱中 OpenClaw Gateway 的可访问端点。

## 参考

- [OpenClaw](https://github.com/openclaw/openclaw)
- [OpenSandbox Python SDK](https://pypi.org/project/opensandbox/)


================================================
FILE: examples/openclaw/main.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import time
from datetime import timedelta

from opensandbox import SandboxSync
from opensandbox.config import ConnectionConfigSync
from opensandbox.models.sandboxes import NetworkPolicy, NetworkRule
import requests


def check_openclaw(sbx: SandboxSync) -> bool:
    """
    Health check: poll openclaw until it returns 200.

    Returns:
        True  when ready
        False on timeout or any exception
    """
    try:
        endpoint = sbx.get_endpoint(18789)
        start = time.perf_counter()
        url = f"http://{endpoint.endpoint}"
        for _ in range(150):  # max for ~30s
            try:
                resp = requests.get(url, timeout=1)
                if resp.status_code == 200:
                    elapsed = time.perf_counter() - start
                    print(f"[check] sandbox ready after {elapsed:.1f}s")
                    return True
            except Exception as exc:
                pass
            time.sleep(0.2)
        return False
    except Exception as exc:
        print(f"[check] failed: {exc}")
        return False


def main() -> None:
    server = "http://localhost:8080"
    image = "ghcr.io/openclaw/openclaw:latest"
    timeout_seconds = 3600  # 1 hour
    token = os.getenv("OPENCLAW_GATEWAY_TOKEN", "dummy-token-for-sandbox")

    print(f"Creating openclaw sandbox with image={image} on OpenSandbox server {server}...")
    sandbox = SandboxSync.create(
        image=image,
        timeout=timedelta(seconds=timeout_seconds),
        metadata={"example": "openclaw"},
        entrypoint=["node dist/index.js gateway --bind=lan --port 18789 --allow-unconfigured --verbose"],
        connection_config=ConnectionConfigSync(domain=server),
        health_check=check_openclaw,
        # env for openclaw
        env={
            "OPENCLAW_GATEWAY_TOKEN": token
        },
        # use network policy to limit openclaw network accesses
        network_policy=NetworkPolicy(
            defaultAction="deny",
            egress=[NetworkRule(action="allow", target="pypi.org")],
        ),
    )

    endpoint = sandbox.get_endpoint(18789)
    print(f"Openclaw started finished. Please refer to {endpoint.endpoint}")

if __name__ == "__main__":
    main()

================================================
FILE: examples/playwright/Dockerfile
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM debian:12-slim

#----------------------
# Install all prerequisite packages in one layer
RUN apt-get update && apt-get install -y \
    python3 \
    python3-pip \
    python3-venv \
    wget \
    ca-certificates \
    curl \
    git \
    nodejs \
    npm \
    --no-install-recommends \
    && rm -rf /var/lib/apt/lists/*

#----------------------
# Create a non-root user and browser cache dir early (needed before chown)
RUN groupadd -r playwright && useradd -r -g playwright playwright \
    && mkdir -p /home/playwright /ms-playwright \
    && chown -R playwright:playwright /home/playwright /ms-playwright

#----------------------
# Install Playwright and browser binaries
# Use an isolated venv to avoid PEP 668 issues
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright

RUN python3 -m venv /venv \
    && /venv/bin/pip install --no-cache-dir --upgrade pip \
    && /venv/bin/pip install --no-cache-dir playwright \
    && npm install -g @playwright/mcp \
    && /venv/bin/playwright install --with-deps chromium

ENV PATH="/venv/bin:${PATH}"

#----------------------
# Configure user, etc

WORKDIR /home/playwright

USER playwright

# Default to bash
CMD ["bash"]


================================================
FILE: examples/playwright/README.md
================================================
# Playwright Example

Access web pages in headless mode using Playwright + Chromium in OpenSandbox to scrape title/body snippets.

## Build the Playwright Sandbox Image

The Dockerfile in this directory builds a sandbox image with Playwright and Chromium pre-installed:

```shell
cd examples/playwright
docker build -t opensandbox/playwright:latest .
```

This image includes:
- Playwright Python package
- Chromium browser binaries
- Node.js and npm (for Playwright MCP)
- Non-root user (playwright) for security

## Start OpenSandbox server [local]

Pre-pull the Playwright image:

```shell
docker pull sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/playwright:latest
```

Start the local OpenSandbox server:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

## Create and Access the Playwright Sandbox

```shell
# Install OpenSandbox package
uv pip install opensandbox

uv run python examples/playwright/main.py
```

The script launches Chromium in headless mode to access the target URL, prints title/body snippets, and saves a full-page screenshot to `/home/playwright/screenshot.png` inside the sandbox. It also downloads the screenshot to the local working directory as `./screenshot.png`. Uses the prebuilt Playwright image by default.

![Playwright screenshot](./screenshot.png)

## References
- [Playwright](https://playwright.dev/)


================================================
FILE: examples/playwright/build.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

TAG=${TAG:-latest}

docker buildx rm playwright-builder || true

docker buildx create --use --name playwright-builder

docker buildx inspect --bootstrap

docker buildx ls

docker buildx build \
  -t opensandbox/playwright:${TAG} \
  -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/playwright:${TAG} \
  --platform linux/amd64,linux/arm64 \
  --push \
  .


================================================
FILE: examples/playwright/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
from datetime import timedelta
from pathlib import Path

from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


def _required_env(name: str) -> str:
    value = os.getenv(name)
    if not value:
        raise RuntimeError(f"{name} is required")
    return value


async def _print_logs(label: str, execution) -> None:
    for msg in execution.logs.stdout:
        print(f"[{label} stdout] {msg.text}")
    for msg in execution.logs.stderr:
        print(f"[{label} stderr] {msg.text}")
    if execution.error:
        print(f"[{label} error] {execution.error.name}: {execution.error.value}")


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    image = os.getenv(
        "SANDBOX_IMAGE",
        "opensandbox/playwright:latest",
    )
    python_version = os.getenv("PYTHON_VERSION", "3.11")

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(seconds=60),
    )

    # Inject Python version into container environment
    env = {"PYTHON_VERSION": python_version}
    sandbox = await Sandbox.create(
        image,
        connection_config=config,
        env=env,
    )

    async with sandbox:
        # Playwright and Chromium are pre-installed in the image
        # Run browser script
        browse_exec = await sandbox.commands.run(
            "python - <<'PY'\n"
            "import asyncio\n"
            "import os\n"
            "from pathlib import Path\n"
            "from playwright.async_api import async_playwright\n"
            "\n"
            "URL = os.environ.get('TARGET_URL', 'https://example.com')\n"
            "SCREENSHOT_PATH = Path('/home/playwright/screenshot.png')\n"
            "SCREENSHOT_PATH.parent.mkdir(parents=True, exist_ok=True)\n"
            "\n"
            "async def run():\n"
            "    async with async_playwright() as p:\n"
            "        browser = await p.chromium.launch(headless=True)\n"
            "        page = await browser.new_page()\n"
            "        await page.goto(URL, wait_until='networkidle')\n"
            "        title = await page.title()\n"
            "        content = await page.text_content('body')\n"
            "        await page.screenshot(path=str(SCREENSHOT_PATH), full_page=True)\n"
            "        print('title:', title)\n"
            "        print('screenshot saved at:', SCREENSHOT_PATH)\n"
            "        if content:\n"
            "            snippet = content.strip().replace('\\n', ' ')\n"
            "            print('content snippet:', snippet[:300])\n"
            "        await browser.close()\n"
            "\n"
            "asyncio.run(run())\n"
            "PY"
        )
        await _print_logs("browse", browse_exec)

        # Download screenshot from sandbox to local disk
        screenshot_remote = "/home/playwright/screenshot.png"
        screenshot_local = Path("screenshot.png")
        try:
            data = await sandbox.files.read_bytes(screenshot_remote)
            screenshot_local.write_bytes(data)
            print(f"\nDownloaded screenshot to: {screenshot_local.resolve()}")
        except Exception as e:
            print(f"\nFailed to download screenshot from {screenshot_remote}: {e}")

        await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/rl-training/README.md
================================================
# Reinforcement Learning Sandbox Example

Demonstrates running a basic RL training loop (CartPole + DQN) inside an isolated OpenSandbox container. The example installs RL dependencies in the sandbox, trains a policy, saves a checkpoint, and returns a training summary.

## Start OpenSandbox server [local]

Start the local OpenSandbox server:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

## Run the Example

```shell
# Install OpenSandbox package
uv pip install opensandbox

# Run the example
uv run python examples/rl-training/main.py
```

The script provisions a sandbox, installs RL dependencies, trains a DQN agent on CartPole, saves a checkpoint, and prints the JSON training summary.

![RL training screenshot](./screenshot.jpg)

## Environment Variables

- `SANDBOX_DOMAIN`: Sandbox service address (default: `localhost:8080`)
- `SANDBOX_API_KEY`: API key if your server requires authentication
- `SANDBOX_IMAGE`: Docker image to use (default: `sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2`)
- `RL_TIMESTEPS`: Training timesteps to run (default: `5000`)

## TensorBoard

The training script logs to `runs/`. To visualize metrics, open a shell in the sandbox and run:

```shell
tensorboard --logdir runs --host 0.0.0.0 --port 6006
```


================================================
FILE: examples/rl-training/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
import textwrap
from datetime import timedelta
from pathlib import Path

from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


def _load_requirements() -> str:
    requirements_path = Path(__file__).with_name("requirements.txt")
    return requirements_path.read_text(encoding="utf-8")


def _training_script() -> str:
    return textwrap.dedent(
        """
        import json
        import os

        import gymnasium as gym
        from stable_baselines3 import DQN
        from stable_baselines3.common.evaluation import evaluate_policy

        timesteps = int(os.getenv("RL_TIMESTEPS", "5000"))
        tensorboard_log = os.getenv("RL_TENSORBOARD_LOG", "runs")

        env = gym.make("CartPole-v1")
        model = DQN(
            "MlpPolicy",
            env,
            verbose=1,
            tensorboard_log=tensorboard_log,
            learning_rate=1e-3,
            buffer_size=10000,
            learning_starts=1000,
            batch_size=32,
            train_freq=4,
            gradient_steps=1,
        )

        model.learn(total_timesteps=timesteps)

        os.makedirs("checkpoints", exist_ok=True)
        checkpoint_path = "checkpoints/cartpole_dqn"
        model.save(checkpoint_path)

        mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=5)
        summary = {
            "timesteps": timesteps,
            "mean_reward": float(mean_reward),
            "std_reward": float(std_reward),
            "checkpoint_path": f"{checkpoint_path}.zip",
        }
        with open("training_summary.json", "w", encoding="utf-8") as handle:
            json.dump(summary, handle, indent=2)

        print("Training summary:", summary)
        env.close()
        """
    ).lstrip()


async def _print_execution_logs(execution) -> None:
    for msg in execution.logs.stdout:
        print(f"[stdout] {msg.text}")
    for msg in execution.logs.stderr:
        print(f"[stderr] {msg.text}")
    if execution.error:
        print(f"[error] {execution.error.name}: {execution.error.value}")


def _execution_failed(execution) -> bool:
    return execution.error is not None


async def _run_command(sandbox: Sandbox, command: str) -> bool:
    execution = await sandbox.commands.run(command)
    await _print_execution_logs(execution)
    return not _execution_failed(execution)


def _with_python_env(command: str) -> str:
    return (
        "bash -lc '"
        "source /opt/opensandbox/code-interpreter-env.sh "
        "python ${PYTHON_VERSION:-3.14} >/dev/null "
        "&& "
        f"{command}"
        "'"
    )


async def _ensure_pip(sandbox: Sandbox) -> bool:
    bootstrap_commands = [
        _with_python_env("python3 -m pip --version"),
        _with_python_env("python3 -m ensurepip --upgrade"),
        "apt-get update && apt-get install -y python3-pip",
        "apk add --no-cache py3-pip",
    ]
    for command in bootstrap_commands:
        if await _run_command(sandbox, command):
            return True
    return False


async def _install_requirements(sandbox: Sandbox) -> bool:
    install_commands = [
        _with_python_env(
            "python3 -m pip install --no-cache-dir --break-system-packages -r requirements.txt"
        ),
        "pip3 install --no-cache-dir -r requirements.txt",
        "pip install --no-cache-dir -r requirements.txt",
    ]
    for command in install_commands:
        if await _run_command(sandbox, command):
            return True
    return False


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    image = os.getenv("SANDBOX_IMAGE", "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2")
    timesteps = os.getenv("RL_TIMESTEPS", "5000")

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(minutes=10),
    )

    sandbox = await Sandbox.create(
        image,
        connection_config=config,
        env={"RL_TIMESTEPS": timesteps},
    )

    async with sandbox:
        try:
            await sandbox.files.write_file("requirements.txt", _load_requirements())
            if not await _ensure_pip(sandbox):
                print("Failed to bootstrap pip inside the sandbox.")
                return

            if not await _install_requirements(sandbox):
                print("Failed to install RL dependencies inside the sandbox.")
                return

            await sandbox.files.write_file("train.py", _training_script())
            train_exec = await sandbox.commands.run(_with_python_env("python3 train.py"))
            await _print_execution_logs(train_exec)
            if _execution_failed(train_exec):
                print("Training failed inside the sandbox.")
                return

            try:
                summary = await sandbox.files.read_file("training_summary.json")
            except Exception as exc:
                print(f"\nFailed to read training summary: {exc}")
            else:
                print("\n=== Training summary ===")
                print(summary)
        finally:
            await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: examples/rl-training/requirements.txt
================================================
gymnasium==0.29.1
stable-baselines3==2.3.2
tensorboard==2.16.2
torch==2.9.1


================================================
FILE: examples/vscode/Dockerfile
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM debian:12-slim

#----------------------
# Install all prerequisite packages in one layer
RUN apt-get update && apt-get install -y \
    python3 \
    python3-pip \
    curl \
    ca-certificates \
    --no-install-recommends \
    && rm -rf /var/lib/apt/lists/*

#----------------------
# Install code-server
RUN curl -fsSL https://code-server.dev/install.sh | sh

#----------------------
# Create a non-root user
RUN groupadd -r vscode && useradd -r -g vscode vscode \
    && mkdir -p /home/vscode /workspace && chown -R vscode:vscode /home/vscode /workspace

#----------------------
# Configure user, etc

WORKDIR /workspace

USER vscode

# Default to bash
CMD ["bash"]


================================================
FILE: examples/vscode/README.md
================================================
# VS Code Example

## Build the VS Code Sandbox Image

The Dockerfile in this directory builds a sandbox image with code-server pre-installed:

```shell
cd examples/vscode
docker build -t opensandbox/vscode:latest .
```

This image includes:
- code-server (VS Code Web) pre-installed
- Non-root user (vscode) for security
- Workspace directory at `/workspace`

Launch code-server (VS Code Web) in OpenSandbox to provide browser access.

## Start OpenSandbox server [local]

Pre-pull the VS Code image:

```shell
docker pull sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/vscode:latest
```

Start the local OpenSandbox server:

```shell
uv pip install opensandbox-server
opensandbox-server init-config ~/.sandbox.toml --example docker
opensandbox-server
```

## Create and Access the VS Code Sandbox

```shell
# Install OpenSandbox package
uv pip install opensandbox

uv run python examples/vscode/main.py
```

The script starts code-server (with authentication disabled), binds it to the specified port and outputs the accessible address. Uses the prebuilt VS Code image by default.

![VS Code screenshot shell](./screenshot_shell.jpg)
![VS Code screenshot vscode](./screenshot_vscode.jpg)

## References
- [code-server (VS Code Web)](https://github.com/coder/code-server)


================================================
FILE: examples/vscode/build.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

TAG=${TAG:-latest}

docker buildx rm vscode-builder || true

docker buildx create --use --name vscode-builder

docker buildx inspect --bootstrap

docker buildx ls

docker buildx build \
  -t opensandbox/vscode:${TAG} \
  -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/vscode:${TAG} \
  --platform linux/amd64,linux/arm64 \
  --push \
  .


================================================
FILE: examples/vscode/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
from datetime import timedelta

from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig
from opensandbox.models.execd import RunCommandOpts


def _required_env(name: str) -> str:
    value = os.getenv(name)
    if not value:
        raise RuntimeError(f"{name} is required")
    return value


async def _print_logs(label: str, execution) -> None:
    for msg in execution.logs.stdout:
        print(f"[{label} stdout] {msg.text}")
    for msg in execution.logs.stderr:
        print(f"[{label} stderr] {msg.text}")
    if execution.error:
        print(f"[{label} error] {execution.error.name}: {execution.error.value}")


async def main() -> None:
    domain = os.getenv("SANDBOX_DOMAIN", "localhost:8080")
    api_key = os.getenv("SANDBOX_API_KEY")
    image = os.getenv(
        "SANDBOX_IMAGE",
        "opensandbox/vscode:latest",
    )
    python_version = os.getenv("PYTHON_VERSION", "3.11")
    code_port = int(os.getenv("CODE_PORT", "8443"))

    config = ConnectionConfig(
        domain=domain,
        api_key=api_key,
        request_timeout=timedelta(seconds=60),
    )

    # Inject Python version into container environment
    env = {"PYTHON_VERSION": python_version}
    sandbox = await Sandbox.create(
        image,
        connection_config=config,
        env=env,
    )

    async with sandbox:
        # code-server is pre-installed in the image
        # Start code-server with authentication disabled
        start_exec = await sandbox.commands.run(
            f"code-server --bind-addr 0.0.0.0:{code_port} --auth none /workspace",
            opts=RunCommandOpts(background=True),
        )
        await _print_logs("code-server", start_exec)

        endpoint = await sandbox.get_endpoint(code_port)
        print("\nVS Code Web endpoint:")
        print(f"  http://{endpoint.endpoint}/")

        print("\nKeeping sandbox alive for 10 minutes. Press Ctrl+C to exit sooner.")
        try:
            await asyncio.sleep(600)
        except KeyboardInterrupt:
            print("Stopping...")
        finally:
            await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: kubernetes/.golangci.yml
================================================
version: "2"
run:
  concurrency: 4
  issues-exit-code: 1
  tests: true

# output configuration options
output:
  formats:
    text:
      path: stdout
      colors: true
linters:
  default: none
  enable:
    - depguard
    - govet
    - ineffassign
    - misspell
    - unconvert
    - unused
  settings:
    misspell:
      # Correct spellings using locale preferences for US or UK.
      # Default is to use a neutral variety of English.
      # Setting locale to US will correct the British spelling of 'colour' to 'color'.
      locale: US
    depguard:
      rules:
        forbid-pkg-errors:
          deny:
          - pkg: "github.com/pkg/errors"
            desc: Should be replaced with standard lib errors or fmt.Errorf
  exclusions:
    generated: lax
    presets:
      - comments
      - common-false-positives
      - legacy
      - std-error-handling
    rules:
      - path: (.+)\.go$
        text: 'SA1019: package github.com/golang/protobuf/proto is deprecated: Use the "google.golang.org/protobuf/proto" package instead'
    paths:
      - third_party$
      - builtin$
      - examples$
      - apis
      - pkg/client
      - vendor
      - test
formatters:
  enable:
  - gofmt
  - goimports
  settings:
    gofmt:
      simplify: true
    goimports:
      # put imports beginning with prefix after 3rd-party packages;
      local-prefixes: 
      - github.com/alibaba/OpenSandbox/sandbox-k8s
  exclusions:
    generated: lax
    paths:
      - third_party$
      - builtin$
      - examples$
      - apis
      - pkg/client
      - vendor
      - test

================================================
FILE: kubernetes/Dockerfile
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Build the manager binary
FROM golang:1.24 AS builder
ARG TARGETOS
ARG TARGETARCH

WORKDIR /workspace
# Copy the Go Modules manifests
COPY go.mod go.mod
COPY go.sum go.sum
# cache deps before building and copying source so that we don't need to re-download as much
# and so that source changes don't invalidate our downloaded layer
RUN GOPROXY=https://goproxy.cn,direct go mod download

# Copy the go source
COPY cmd/ cmd/
COPY apis/ apis/
COPY pkg/ pkg/
COPY internal/ internal/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
RUN echo "Building for $TARGETOS/$TARGETARCH"
ARG PACKAGE=cmd/controller/main.go
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -o server ${PACKAGE}

# Use golang image as base to ensure nsenter (util-linux) is available
# distroless does not contain shell or nsenter
FROM golang:1.24
ARG USERID=65532
WORKDIR /workspace
COPY --from=builder /workspace/server .
USER $USERID
ENTRYPOINT ["/workspace/server"]

================================================
FILE: kubernetes/Dockerfile.debug
================================================
FROM golang:1.25

# Install Delve (debugger) and Reflex (file watcher)
RUN go install github.com/go-delve/delve/cmd/dlv@latest && \
    go install github.com/cespare/reflex@latest

WORKDIR /workspace

# Set cache env vars to ensuring they are targeted by our volume mounts
ENV GOCACHE=/go/.cache/go-build
ENV GOMODCACHE=/go/pkg/mod
# Expose ports
EXPOSE 5758 2345

# The default command will be overridden by the script, but we can set a safe default
CMD ["bash"]


================================================
FILE: kubernetes/Makefile
================================================
# VERSION defines the project version for the bundle.
# Update this value when you upgrade the version of your project.
# To re-generate a bundle for another specific version without changing the standard setup, you can:
# - use the VERSION as arg of the bundle target (e.g make bundle VERSION=0.0.2)
# - use environment variables to overwrite this value (e.g export VERSION=0.0.2)
VERSION ?= 0.1.0

# CHANNELS define the bundle channels used in the bundle.
# Add a new line here if you would like to change its default config. (E.g CHANNELS = "candidate,fast,stable")
# To re-generate a bundle for other specific channels without changing the standard setup, you can:
# - use the CHANNELS as arg of the bundle target (e.g make bundle CHANNELS=candidate,fast,stable)
# - use environment variables to overwrite this value (e.g export CHANNELS="candidate,fast,stable")
ifneq ($(origin CHANNELS), undefined)
BUNDLE_CHANNELS := --channels=$(CHANNELS)
endif

# DEFAULT_CHANNEL defines the default channel used in the bundle.
# Add a new line here if you would like to change its default config. (E.g DEFAULT_CHANNEL = "stable")
# To re-generate a bundle for any other default channel without changing the default setup, you can:
# - use the DEFAULT_CHANNEL as arg of the bundle target (e.g make bundle DEFAULT_CHANNEL=stable)
# - use environment variables to overwrite this value (e.g export DEFAULT_CHANNEL="stable")
ifneq ($(origin DEFAULT_CHANNEL), undefined)
BUNDLE_DEFAULT_CHANNEL := --default-channel=$(DEFAULT_CHANNEL)
endif
BUNDLE_METADATA_OPTS ?= $(BUNDLE_CHANNELS) $(BUNDLE_DEFAULT_CHANNEL)

# IMAGE_TAG_BASE defines the docker.io namespace and part of the image name for remote images.
# This variable is used to construct full image tags for bundle and catalog images.
#
# For example, running 'make bundle-build bundle-push catalog-build catalog-push' will build and push both
# opensandbox.io/sandbox-k8s-bundle:$VERSION and opensandbox.io/sandbox-k8s-catalog:$VERSION.
IMAGE_TAG_BASE ?= sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/controller

# BUNDLE_IMG defines the image:tag used for the bundle.
# You can use it as an arg. (E.g make bundle-build BUNDLE_IMG=<some-registry>/<project-name-bundle>:<tag>)
BUNDLE_IMG ?= $(IMAGE_TAG_BASE)-bundle:v$(VERSION)

# BUNDLE_GEN_FLAGS are the flags passed to the operator-sdk generate bundle command
BUNDLE_GEN_FLAGS ?= -q --overwrite --version $(VERSION) $(BUNDLE_METADATA_OPTS)

# USE_IMAGE_DIGESTS defines if images are resolved via tags or digests
# You can enable this value if you would like to use SHA Based Digests
# To enable set flag to true
USE_IMAGE_DIGESTS ?= false
ifeq ($(USE_IMAGE_DIGESTS), true)
	BUNDLE_GEN_FLAGS += --use-image-digests
endif

# Set the Operator SDK version to use. By default, what is installed on the system is used.
# This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit.
OPERATOR_SDK_VERSION ?= v1.42.0
# Image URL to use all building/pushing image targets
# CONTROLLER_IMG defines the image for the controller manager.
CONTROLLER_IMG ?= controller:dev
# TASK_EXECUTOR_IMG defines the image for the task-executor service.
TASK_EXECUTOR_IMG ?= task-executor:dev

# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
ifeq (,$(shell go env GOBIN))
GOBIN=$(shell go env GOPATH)/bin
else
GOBIN=$(shell go env GOBIN)
endif

# CONTAINER_TOOL defines the container tool to be used for building images.
# Be aware that the target commands are only tested with Docker which is
# scaffolded by default. However, you might want to replace it to use other
# tools. (i.e. podman)
CONTAINER_TOOL ?= docker

# DOCKER_BUILD_ARGS defines additional arguments to pass to docker build.
# For example, in some environments you may need: DOCKER_BUILD_ARGS=--network=host
DOCKER_BUILD_ARGS ?=

# Setting SHELL to bash allows bash commands to be executed by recipes.
# Options are set to exit when a recipe line exits non-zero or a piped command fails.
SHELL = /usr/bin/env bash -o pipefail
.SHELLFLAGS = -ec

.PHONY: all
all: build

##@ General

# The help target prints out all targets with their descriptions organized
# beneath their categories. The categories are represented by '##@' and the
# target descriptions by '##'. The awk command is responsible for reading the
# entire set of makefiles included in this invocation, looking for lines of the
# file as xyz: ## something, and then pretty-format the target and help. Then,
# if there's a line with ##@ something, that gets pretty-printed as a category.
# More info on the usage of ANSI control characters for terminal formatting:
# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters
# More info on the awk command:
# http://linuxcommand.org/lc3_adv_awk.php

.PHONY: help
help: ## Display this help.
	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

##@ Development

.PHONY: manifests
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
	$(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases

.PHONY: generate
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
	$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."

.PHONY: fmt
fmt: ## Run go fmt against code.
	go fmt ./...

.PHONY: vet
vet: ## Run go vet against code.
	go vet ./...

.PHONY: test
test: manifests generate fmt vet setup-envtest ## Run tests.
	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out

# To use a different vendor for e2e tests, modify the setup under 'tests/e2e'.
# The default setup assumes Kind is pre-installed and builds/loads the Manager Docker image locally.
KIND_CLUSTER ?= sandbox-k8s-test-e2e
KIND_K8S_VERSION ?= v1.22.4
GINKGO_ARGS ?=

.PHONY: install-kind
install-kind: ## Install Kind using go install if not already installed
	@if command -v kind >/dev/null 2>&1; then \
		echo "Kind is already installed: $$(kind version)"; \
	else \
		echo "Installing Kind..."; \
		go install sigs.k8s.io/kind@v0.20.0 && \
		echo "Kind installed successfully: $$(kind version)"; \
	fi

.PHONY: setup-test-e2e
setup-test-e2e: install-kind ## Set up a Kind cluster for e2e tests if it does not exist
	@case "$$($(KIND) get clusters 2>/dev/null || echo '')" in \
		*"$(KIND_CLUSTER)"*) \
			echo "Kind cluster '$(KIND_CLUSTER)' already exists. Skipping creation." ;; \
		*) \
			echo "Creating Kind cluster '$(KIND_CLUSTER)' with Kubernetes version $(KIND_K8S_VERSION)..."; \
			$(KIND) create cluster --name $(KIND_CLUSTER) --image kindest/node:$(KIND_K8S_VERSION) ;; \
	esac

.PHONY: test-e2e
test-e2e: setup-test-e2e manifests generate fmt vet ## Run the e2e tests. Expected an isolated environment using Kind. Use GINKGO_ARGS to pass additional arguments.
	CONTROLLER_IMG=$(CONTROLLER_IMG) TASK_EXECUTOR_IMG=$(TASK_EXECUTOR_IMG) \
		KIND_CLUSTER=$(KIND_CLUSTER) go test ./test/e2e/ -v -ginkgo.v $(GINKGO_ARGS)
	CONTROLLER_IMG=$(CONTROLLER_IMG) TASK_EXECUTOR_IMG=$(TASK_EXECUTOR_IMG) \
		KIND_CLUSTER=$(KIND_CLUSTER) go test ./test/e2e_task/ -v -ginkgo.v $(GINKGO_ARGS)
	$(MAKE) cleanup-test-e2e
	$(MAKE) test-gvisor CONTROLLER_IMG=$(CONTROLLER_IMG) TASK_EXECUTOR_IMG=$(TASK_EXECUTOR_IMG)
	$(MAKE) cleanup-gvisor

.PHONY: cleanup-test-e2e
cleanup-test-e2e: ## Tear down the Kind cluster used for e2e tests
	@$(KIND) delete cluster --name $(KIND_CLUSTER)

# Common E2E setup targets - install CRDs and controller for any Kind cluster
.PHONY: install-e2e-deps
install-e2e-deps:
	@echo "Installing OpenSandbox CRDs..."
	@$(MAKE) install
	@echo "Installing OpenSandbox controller..."
	@cd config/manager && $(KUSTOMIZE) edit set image controller=$(CONTROLLER_IMG)
	@$(KUSTOMIZE) build config/default | kubectl apply -f -
	@echo "Waiting for controller to be ready..."
	@kubectl wait --for=condition=available --timeout=120s deployment -n opensandbox-system opensandbox-controller-manager || \
		{ kubectl describe deployment -n opensandbox-system opensandbox-controller-manager; exit 1; }

# RuntimeClass E2E testing - gVisor
GVISOR_KIND_CLUSTER ?= gvisor-test
GVISOR_KIND_IMAGE ?= kindest/node:v1.27.3

# gVisor versions
GVISOR_VERSION ?= 20260112
GVISOR_RUNSC_BIN ?= $(shell pwd)/test/kind/gvisor/runsc
GVISOR_SHIM_BIN ?= $(shell pwd)/test/kind/gvisor/containerd-shim-runsc-v1

.PHONY: download-gvisor
download-gvisor: ## Download gVisor runsc and containerd-shim-runsc-v1 binaries
	@echo "Downloading gVisor runsc (release-$(GVISOR_VERSION))..."
	@mkdir -p $(dir $(GVISOR_RUNSC_BIN))
	@wget -q "https://storage.googleapis.com/gvisor/releases/release/$(GVISOR_VERSION)/$$(uname -m)/runsc" -O $(GVISOR_RUNSC_BIN)
	@chmod +x $(GVISOR_RUNSC_BIN)
	@echo "Downloading containerd-shim-runsc-v1..."
	@wget -q "https://storage.googleapis.com/gvisor/releases/release/$(GVISOR_VERSION)/$$(uname -m)/containerd-shim-runsc-v1" -O $(GVISOR_SHIM_BIN)
	@chmod +x $(GVISOR_SHIM_BIN)
	@echo "gVisor binaries downloaded successfully."

.PHONY: setup-gvisor
setup-gvisor: download-gvisor ## Set up Kind cluster with gVisor (runsc) for e2e tests
	@echo "Creating gVisor Kind cluster with runsc binaries from kubernetes/test/kind/gvisor/..."
	@export GVISOR_KIND_CLUSTER=$(GVISOR_KIND_CLUSTER) && \
		export GVISOR_KIND_IMAGE=$(GVISOR_KIND_IMAGE) && \
		export PWD=$$(pwd) && \
		envsubst < test/e2e_runtime/gvisor/testdata/gvisor.yaml.tmpl | \
		$(KIND) create cluster --config -
	@echo "Creating runsc.toml on Kind nodes..."
	@for node in $$(docker ps --filter "name=$(GVISOR_KIND_CLUSTER)-" --format "{{.Names}}"); do \
		docker exec $$node sh -c 'mkdir -p /etc/containerd && echo "[runsc]" > /etc/containerd/runsc.toml && echo "  platform = \"ptrace\"" >> /etc/containerd/runsc.toml'; \
	done

.PHONY: cleanup-gvisor
cleanup-gvisor: ## Tear down gVisor Kind cluster
	@$(KIND) delete cluster --name $(GVISOR_KIND_CLUSTER) 2>/dev/null || true

# install-gvisor-deps installs CRDs and controller for gVisor tests
.PHONY: install-gvisor-deps
install-gvisor-deps:
	@echo "Building and loading controller image into gVisor Kind cluster..."
	@$(MAKE) docker-build-controller CONTROLLER_IMG=$(CONTROLLER_IMG)
	@$(KIND) load docker-image --name $(GVISOR_KIND_CLUSTER) $(CONTROLLER_IMG)
	@$(MAKE) install-e2e-deps CONTROLLER_IMG=$(CONTROLLER_IMG)

.PHONY: test-gvisor
test-gvisor: setup-gvisor install-gvisor-deps ## Run gVisor RuntimeClass e2e tests
	@echo "Installing gVisor RuntimeClass resources..."
	@kubectl apply -f test/e2e_runtime/gvisor/testdata/runtimeclass.yaml
	@echo "Running gVisor E2E tests..."
	CONTROLLER_IMG=$(CONTROLLER_IMG) TASK_EXECUTOR_IMG=$(TASK_EXECUTOR_IMG) \
		KIND_CLUSTER=$(GVISOR_KIND_CLUSTER) go test ./test/e2e_runtime/gvisor -v -ginkgo.v $(GINKGO_ARGS)
	$(MAKE) cleanup-gvisor

.PHONY: lint
lint: golangci-lint ## Run golangci-lint linter
	$(GOLANGCI_LINT) run

.PHONY: lint-fix
lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes
	$(GOLANGCI_LINT) run --fix

.PHONY: lint-config
lint-config: golangci-lint ## Verify golangci-lint linter configuration
	$(GOLANGCI_LINT) config verify

##@ Build

.PHONY: build
build: manifests generate fmt vet ## Build manager binary.
	go build -o bin/manager cmd/controller/main.go

.PHONY: run
run: manifests generate fmt vet ## Run a controller from your host.
	go run ./cmd/controller/main.go

.PHONY: task-executor-build
task-executor-build: ## Build task-executor binary.
	go build -o bin/task-executor ./cmd/task-executor

.PHONY: task-executor-run
task-executor-run: ## Run task-executor from your host.
	go run ./cmd/task-executor

# If you wish to build the manager image targeting other platforms you can use the --platform flag.
# (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it.
# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
.PHONY: docker-build
# docker-build: ## Build docker image with the manager.
#	$(CONTAINER_TOOL) build -t ${CONTROLLER_IMG} .

docker-build: docker-build-controller

.PHONY: docker-build-controller
docker-build-controller: ## Build docker image with the manager.
	$(CONTAINER_TOOL) build $(DOCKER_BUILD_ARGS) --build-arg PACKAGE=cmd/controller/main.go -t ${CONTROLLER_IMG} .

.PHONY: docker-build-task-executor
docker-build-task-executor: ## Build docker image with task-executor.
	$(CONTAINER_TOOL) build $(DOCKER_BUILD_ARGS) --build-arg PACKAGE=cmd/task-executor/main.go --build-arg USERID=0 -t ${TASK_EXECUTOR_IMG} .

.PHONY: docker-push
# docker-push: ## Push docker image with the manager.
#	$(CONTAINER_TOOL) push ${CONTROLLER_IMG}

docker-push: docker-push-controller

.PHONY: docker-push-controller
docker-push-controller: ## Push docker image with the manager.
	$(CONTAINER_TOOL) push ${CONTROLLER_IMG}

.PHONY: docker-push-task-executor
docker-push-task-executor: ## Push docker image with task-executor.
	$(CONTAINER_TOOL) push ${TASK_EXECUTOR_IMG}

.PHONY: docker-run-task-executor
docker-run-task-executor: docker-build-task-executor ## Run task-executor docker image.
	@echo "Running task-executor image: $(TASK_EXECUTOR_IMG) on port 8080"
	@$(CONTAINER_TOOL) run --rm -d -p 8080:8080 --name task-executor-local $(TASK_EXECUTOR_IMG)

.PHONY: docker-stop-task-executor
docker-stop-task-executor: ## Stop task-executor docker container.
	@echo "Stopping task-executor container: task-executor-local"
	-@$(CONTAINER_TOOL) stop task-executor-local || true
	-@$(CONTAINER_TOOL) rm task-executor-local || true

# PLATFORMS defines the target platforms for the manager image be built to provide support to multiple
# architectures. (i.e. make docker-buildx CONTROLLER_IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
# - be able to use docker buildx. More info: https://docs.docker.com/build/buildx/
# - have enabled BuildKit. More info: https://docs.docker.com/develop/develop-images/build_enhancements/
# be able to push the image to your registry (i.e. if you do not set a valid value via CONTROLLER_IMG=<myregistry/image:<tag>> then the export will fail)
# To adequately provide solutions that are compatible with multiple platforms, you should consider using this option.
PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le
.PHONY: docker-buildx
docker-buildx: ## Build and push docker image for the manager for cross-platform support
	# copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile
	sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross
	- $(CONTAINER_TOOL) buildx create --name sandbox-k8s-builder
	$(CONTAINER_TOOL) buildx use sandbox-k8s-builder
	- $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${CONTROLLER_IMG} -f Dockerfile.cross .
	- $(CONTAINER_TOOL) buildx rm sandbox-k8s-builder
	rm Dockerfile.cross

.PHONY: build-installer
build-installer: manifests generate kustomize ## Generate a consolidated YAML with CRDs and deployment.
	mkdir -p dist
	cd config/manager && $(KUSTOMIZE) edit set image controller=${CONTROLLER_IMG}
	$(KUSTOMIZE) build config/default > dist/install.yaml

##@ Deployment

ifndef ignore-not-found
  ignore-not-found = false
endif

.PHONY: install
install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.
	$(KUSTOMIZE) build config/crd | $(KUBECTL) apply -f -

.PHONY: uninstall
uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
	$(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -

.PHONY: deploy
deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
	cd config/manager && $(KUSTOMIZE) edit set image controller=${CONTROLLER_IMG}
	$(KUSTOMIZE) build config/default | $(KUBECTL) apply -f -

.PHONY: undeploy
undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
	$(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -

##@ Dependencies

## Location to install dependencies to
LOCALBIN ?= $(shell pwd)/bin
$(LOCALBIN):
	mkdir -p $(LOCALBIN)

## Tool Binaries
KUBECTL ?= kubectl
KIND ?= kind
KUSTOMIZE ?= $(LOCALBIN)/kustomize
CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen
ENVTEST ?= $(LOCALBIN)/setup-envtest
GOLANGCI_LINT = $(LOCALBIN)/golangci-lint

## Tool Versions
KUSTOMIZE_VERSION ?= v5.6.0
CONTROLLER_TOOLS_VERSION ?= v0.18.0
#ENVTEST_VERSION is the version of controller-runtime release branch to fetch the envtest setup script (i.e. release-0.20)
ENVTEST_VERSION ?= $(shell go list -m -f "{{ .Version }}" sigs.k8s.io/controller-runtime | awk -F'[v.]' '{printf "release-%d.%d", $$2, $$3}')
#ENVTEST_K8S_VERSION is the version of Kubernetes to use for setting up ENVTEST binaries (i.e. 1.31)
ENVTEST_K8S_VERSION ?= $(shell go list -m -f "{{ .Version }}" k8s.io/api | awk -F'[v.]' '{printf "1.%d", $$3}')
GOLANGCI_LINT_VERSION ?= v2.7.2

.PHONY: kustomize
kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary.
$(KUSTOMIZE): $(LOCALBIN)
	$(call go-install-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v5,$(KUSTOMIZE_VERSION))

.PHONY: controller-gen
controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary.
$(CONTROLLER_GEN): $(LOCALBIN)
	$(call go-install-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen,$(CONTROLLER_TOOLS_VERSION))

.PHONY: setup-envtest
setup-envtest: envtest ## Download the binaries required for ENVTEST in the local bin directory.
	@echo "Setting up envtest binaries for Kubernetes version $(ENVTEST_K8S_VERSION)..."
	@$(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path || { \
		echo "Error: Failed to set up envtest binaries for version $(ENVTEST_K8S_VERSION)."; \
		exit 1; \
	}

.PHONY: envtest
envtest: $(ENVTEST) ## Download setup-envtest locally if necessary.
$(ENVTEST): $(LOCALBIN)
	$(call go-install-tool,$(ENVTEST),sigs.k8s.io/controller-runtime/tools/setup-envtest,$(ENVTEST_VERSION))

.PHONY: golangci-lint
golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary.
$(GOLANGCI_LINT): $(LOCALBIN)
	$(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/v2/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION))

# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist
# $1 - target path with name of binary
# $2 - package url which can be installed
# $3 - specific version of package
define go-install-tool
@[ -f "$(1)-$(3)" ] || { \
set -e; \
package=$(2)@$(3) ;\
echo "Downloading $${package}" ;\
rm -f $(1) || true ;\
GOBIN=$(LOCALBIN) go install $${package} ;\
mv $(1) $(1)-$(3) ;\
} ;\
ln -sf $(1)-$(3) $(1)
endef

.PHONY: operator-sdk
OPERATOR_SDK ?= $(LOCALBIN)/operator-sdk
operator-sdk: ## Download operator-sdk locally if necessary.
ifeq (,$(wildcard $(OPERATOR_SDK)))
ifeq (, $(shell which operator-sdk 2>/dev/null))
	@{ \
	set -e ;\
	mkdir -p $(dir $(OPERATOR_SDK)) ;\
	OS=$(shell go env GOOS) && ARCH=$(shell go env GOARCH) && \
	curl -sSLo $(OPERATOR_SDK) https://github.com/operator-framework/operator-sdk/releases/download/$(OPERATOR_SDK_VERSION)/operator-sdk_$${OS}_$${ARCH} ;\
	chmod +x $(OPERATOR_SDK) ;\
	}
else
OPERATOR_SDK = $(shell which operator-sdk)
endif
endif

.PHONY: bundle
bundle: manifests kustomize operator-sdk ## Generate bundle manifests and metadata, then validate generated files.
	$(OPERATOR_SDK) generate kustomize manifests -q
	cd config/manager && $(KUSTOMIZE) edit set image controller=$(CONTROLLER_IMG)
	$(KUSTOMIZE) build config/manifests | $(OPERATOR_SDK) generate bundle $(BUNDLE_GEN_FLAGS)
	$(OPERATOR_SDK) bundle validate ./bundle

.PHONY: bundle-build
bundle-build: ## Build the bundle image.
	$(CONTAINER_TOOL) build $(DOCKER_BUILD_ARGS) -f bundle.Dockerfile -t $(BUNDLE_IMG) .

.PHONY: bundle-push
bundle-push: ## Push the bundle image.
	$(MAKE) docker-push CONTROLLER_IMG=$(BUNDLE_IMG)

.PHONY: opm
OPM = $(LOCALBIN)/opm
opm: ## Download opm locally if necessary.
ifeq (,$(wildcard $(OPM)))
ifeq (,$(shell which opm 2>/dev/null))
	@{ \
	set -e ;\
	mkdir -p $(dir $(OPM)) ;\
	OS=$(shell go env GOOS) && ARCH=$(shell go env GOARCH) && \
	curl -sSLo $(OPM) https://github.com/operator-framework/operator-registry/releases/download/v1.55.0/$${OS}-$${ARCH}-opm ;\
	chmod +x $(OPM) ;\
	}
else
OPM = $(shell which opm)
endif
endif

# A comma-separated list of bundle images (e.g. make catalog-build BUNDLE_IMGS=example.com/operator-bundle:v0.1.0,example.com/operator-bundle:v0.2.0).
# These images MUST exist in a registry and be pull-able.
BUNDLE_IMGS ?= $(BUNDLE_IMG)

# The image tag given to the resulting catalog image (e.g. make catalog-build CATALOG_IMG=example.com/operator-catalog:v0.2.0).
CATALOG_IMG ?= $(IMAGE_TAG_BASE)-catalog:v$(VERSION)

# Set CATALOG_BASE_IMG to an existing catalog image tag to add $BUNDLE_IMGS to that image.
ifneq ($(origin CATALOG_BASE_IMG), undefined)
FROM_INDEX_OPT := --from-index $(CATALOG_BASE_IMG)
endif

# Build a catalog image by adding bundle images to an empty catalog using the operator package manager tool, 'opm'.
# This recipe invokes 'opm' in 'semver' bundle add mode. For more information on add modes, see:
# https://github.com/operator-framework/community-operators/blob/7f1438c/docs/packaging-operator.md#updating-your-existing-operator
.PHONY: catalog-build
catalog-build: opm ## Build a catalog image.
	$(OPM) index add --container-tool $(CONTAINER_TOOL) --mode semver --tag $(CATALOG_IMG) --bundles $(BUNDLE_IMGS) $(FROM_INDEX_OPT)

# Push the catalog image.
.PHONY: catalog-push
catalog-push: ## Push a catalog image.
	$(MAKE) docker-push CONTROLLER_IMG=$(CATALOG_IMG)

##@ Helm

# Helm chart configuration
HELM_CHART_PATH ?= charts/opensandbox-controller
HELM_CHART_VERSION ?= $(VERSION)

.PHONY: helm-lint
helm-lint: ## Lint the Helm chart
	@echo "Linting Helm chart..."
	helm lint $(HELM_CHART_PATH)

.PHONY: helm-template
helm-template: ## Generate Kubernetes manifests from Helm chart
	@echo "Generating manifests from Helm chart..."
	helm template opensandbox-controller $(HELM_CHART_PATH) \
		--set controller.image.repository=$(IMAGE_TAG_BASE) \
		--set controller.image.tag=v$(VERSION)

.PHONY: helm-template-debug
helm-template-debug: ## Generate Kubernetes manifests with debug output
	@echo "Generating manifests from Helm chart with debug..."
	helm template opensandbox-controller $(HELM_CHART_PATH) \
		--set controller.image.repository=$(IMAGE_TAG_BASE) \
		--set controller.image.tag=v$(VERSION) \
		--debug

.PHONY: helm-package
helm-package: ## Package the Helm chart
	@echo "Packaging Helm chart..."
	@mkdir -p dist
	helm package $(HELM_CHART_PATH) -d dist/ --version $(HELM_CHART_VERSION) --app-version $(VERSION)

.PHONY: helm-install
helm-install: ## Install the Helm chart
	@echo "Installing Helm chart..."
	helm install opensandbox-controller $(HELM_CHART_PATH) \
		--set controller.image.repository=$(IMAGE_TAG_BASE) \
		--set controller.image.tag=v$(VERSION) \
		--namespace opensandbox-system \
		--create-namespace

.PHONY: helm-upgrade
helm-upgrade: ## Upgrade the Helm chart
	@echo "Upgrading Helm chart..."
	helm upgrade opensandbox-controller $(HELM_CHART_PATH) \
		--set controller.image.repository=$(IMAGE_TAG_BASE) \
		--set controller.image.tag=v$(VERSION) \
		--namespace opensandbox-system

.PHONY: helm-uninstall
helm-uninstall: ## Uninstall the Helm chart
	@echo "Uninstalling Helm chart..."
	helm uninstall opensandbox-controller --namespace opensandbox-system

.PHONY: helm-test
helm-test: ## Run Helm chart tests
	@echo "Running Helm chart tests..."
	helm test opensandbox-controller --namespace opensandbox-system

.PHONY: helm-docs
helm-docs: ## Generate Helm chart documentation (requires helm-docs)
	@if command -v helm-docs >/dev/null 2>&1; then \
		echo "Generating Helm chart documentation..."; \
		helm-docs $(HELM_CHART_PATH); \
	else \
		echo "helm-docs is not installed. Install it with: go install github.com/norwoodj/helm-docs/cmd/helm-docs@latest"; \
		exit 1; \
	fi

.PHONY: helm-dry-run
helm-dry-run: ## Perform a dry-run install of the Helm chart
	@echo "Performing dry-run installation..."
	helm install opensandbox-controller $(HELM_CHART_PATH) \
		--set controller.image.repository=$(IMAGE_TAG_BASE) \
		--set controller.image.tag=v$(VERSION) \
		--namespace opensandbox-system \
		--create-namespace \
		--dry-run --debug

.PHONY: helm-all
helm-all: helm-lint helm-package ## Run all Helm-related tasks (lint and package)

================================================
FILE: kubernetes/PROJECT
================================================
# Code generated by tool. DO NOT EDIT.
# This file is used to track the info used to scaffold your project
# and allow the plugins properly work.
# More info: https://book.kubebuilder.io/reference/project-config.html
domain: opensandbox.io
layout:
- go.kubebuilder.io/v4
plugins:
  manifests.sdk.operatorframework.io/v2: {}
  scorecard.sdk.operatorframework.io/v2: {}
projectName: sandbox-k8s
repo: github.com/alibaba/OpenSandbox/sandbox-k8s
resources:
- api:
    crdVersion: v1
    namespaced: true
  controller: true
  domain: opensandbox.io
  group: sandbox
  kind: Sandbox
  path: github.com/alibaba/OpenSandbox/sandbox-k8s/api/v1alpha1
  version: v1alpha1
- api:
    crdVersion: v1
    namespaced: true
  controller: true
  domain: opensandbox.io
  group: sandbox
  kind: BatchSandbox
  path: github.com/alibaba/OpenSandbox/sandbox-k8s/api/v1alpha1
  version: v1alpha1
- api:
    crdVersion: v1
    namespaced: true
  controller: true
  domain: opensandbox.io
  group: sandbox
  kind: Pool
  path: github.com/alibaba/OpenSandbox/sandbox-k8s/api/v1alpha1
  version: v1alpha1
version: "3"


================================================
FILE: kubernetes/README-ZH.md
================================================
# OpenSandbox Kubernetes 控制器

[English](README.md) | [中文](README-ZH.md)

OpenSandbox Kubernetes 控制器，通过自定义资源管理沙箱环境。它在 Kubernetes 集群中提供**自动化沙箱生命周期管理**、**资源池化以实现快速供应**、**批处理沙箱创建**和**可选的任务编排**功能。

## 关键特性

- **灵活的沙箱创建**：在池化和非池化沙箱创建模式之间选择
- **批处理和单个交付**：支持单个沙箱（用于真实用户交互）和批处理沙箱交付（用于高吞吐量智能体强化学习场景）
- **可选任务调度**：集成任务编排，支持可选的分片任务模板以实现异构任务分发和定制化沙箱交付（例如，进程注入）
- **资源池化**：维护预热的资源池以实现快速沙箱供应
- **全面监控**：实时跟踪沙箱和任务状态

## 功能特性

### 批处理沙箱管理
BatchSandbox 自定义资源允许您创建和管理多个相同的沙箱环境。主要功能包括：
- **灵活的创建模式**：支持池化（使用资源池）和非池化沙箱创建
- **单个和批处理交付**：根据需要创建单个沙箱（replicas=1）或批处理沙箱（replicas=N）
- **可扩展的副本管理**：通过副本配置轻松控制沙箱实例数量
- **自动过期**：设置 TTL（生存时间）以自动清理过期沙箱
- **可选任务调度**：内置任务执行引擎，支持可选任务模板
- **详细状态报告**：关于副本、分配和任务状态的综合指标

### 资源池化
Pool 自定义资源维护一个预热的计算资源池，以实现快速沙箱供应：
- 可配置的缓冲区大小（最小和最大）以平衡资源可用性和成本
- 池容量限制以控制总体资源消耗
- 基于需求的自动资源分配和释放
- 实时状态监控，显示总数、已分配和可用资源

### 任务编排
集成的任务管理系统，在沙箱内执行自定义工作负载：
- **可选执行**：任务调度完全可选 - 可以在不带任务的情况下创建沙箱
- **基于进程的任务**：支持在沙箱环境中执行基于进程的任务
- **异构任务分发**：使用 shardTaskPatches 为批处理中的每个沙箱定制单独的任务

### 高级调度
智能资源管理功能：
- 最小和最大缓冲区设置，以确保资源可用性同时控制成本
- 池范围的容量限制，防止资源耗尽
- 基于需求的自动扩展

## 运行时 API 支持说明

- Kubernetes 运行时当前**不支持** `pause` / `resume` 生命周期 API。
- 对 Kubernetes 运行时调用这两个 API 会返回 `501 Not Implemented`。
- OpenSandbox 的 pause/resume 语义是保留容器进程内存态后再恢复；当前 Kubernetes provider 主要覆盖 create/get/list/delete/renew 流程。


## 与 [kubernates-sigs/agent-sandbox](kubernates-sigs/agent-sandbox) 的关系

BatchSandbox 并非重复实现 Agent-Sandbox 的基础功能，而是作为其补充，提供了额外的增强能力：

1. **批量 Sandbox 语义**：在强化学习（RL）训练等场景下，显著提升 Sandbox 的交付吞吐量
2. **Task 调度能力**：通过 Task 调度实现差异化 Sandbox 交付，例如在交付 Sandbox 之前向容器内注入自定义进程

因此，您可以根据具体应用场景选择合适的项目作为 Sandbox 底层运行时。

### 性能测试

BatchSandbox 与 Sig Agent-Sandbox 在吞吐量方面的性能对比测试。

**测试环境**

**Controller 组件配置**
- 资源规格：request: 12C32G, limit: 16C64G
- 并发配置：
  - **Sig Agent-Sandbox**：共 3 个 controller（sandbox、sandboxclaim、sandboxwarmppool），代码中未提供并发度配置，默认值为 1
  - **BatchSandbox**：共 2 个 controller，batchsandbox controller 并发度为 32，pool controller 并发度为 1

**Pool 配置**
- 镜像：busybox:latest
- 资源规格：0.1C256MB

> **补充说明**：虽然 BatchSandbox 的 batchsandbox-controller 并发度为 32，但测试用例中仅创建了一个 BatchSandbox 对象，实际等价于并发度为 1。因此在并发度方面，BatchSandbox 与 SIG Agent-Sandbox 保持一致。

**性能对比结果**

在都使用资源池的情况下，交付 100 个 Sandbox 的总耗时对比：

| 测试场景 | 总耗时 (秒) |
|---------|------------|
| SIG Agent-Sandbox (创建并发=1) | 76.35 |
| SIG Agent-Sandbox (创建并发=10) | 23.17 |
| SIG Agent-Sandbox (创建并发=50) | 33.85 |
| BatchSandbox | 0.92 |

**原因分析**

核心差异：Sig Agent-Sandbox 和 BatchSandbox 批量交付 N 个 Sandbox 的时间复杂度分别为 O(N) 和 O(1)。

**Sig Agent-Sandbox 原理**
- 每个 Sandbox 的交付流程需要执行以下写操作（写操作总数与 Sandbox 规模成正比）：
  1. 创建一个 SandboxClaim
  2. 创建一个 Sandbox
  3. 更新 Pod 一次（从资源池中接管 Pod）
  4. 更新 Sandbox Status 一次
  5. 更新 SandboxClaim Status 一次

**BatchSandbox 原理**
- 每批 Sandbox 的交付流程需要执行以下写操作（写操作总数与 Sandbox 规模无关）：
  1. 创建一个 BatchSandbox
  2. 更新 BatchSandbox annotation 一次（写入批分配结果）
  3. 更新 BatchSandbox status 一次

## 入门指南

![](images/deploy-example.gif)

### 先决条件
- go 版本 v1.24.0+
- docker 版本 17.03+
- kubectl 版本 v1.11.3+
- 访问 Kubernetes v1.21.1+ 集群

如果您没有 Kubernetes 集群的访问权限，可以使用 [kind](https://kind.sigs.k8s.io/) 创建一个本地 Kubernetes 集群进行测试。Kind 在 Docker 容器中运行 Kubernetes 节点，使得设置本地开发环境变得容易。

安装 kind：
- 从[发布页面](https://github.com/kubernetes-sigs/kind/releases)下载适用于您操作系统的发布二进制文件并将其移动到 `$PATH` 中
- 或使用包管理器：
  - macOS (Homebrew)：`brew install kind`
  - Windows (winget)：`winget install Kubernetes.kind`

安装 kind 后，使用以下命令创建集群：
```sh
kind create cluster
```

此命令默认创建单节点集群。要与其交互，请使用生成的 kubeconfig 运行 `kubectl`。

**Kind 用户的重要说明**：如果您使用的是 kind 集群，在使用 `make docker-build` 构建镜像后，需要将控制器和任务执行器镜像加载到 kind 节点中。这是因为 kind 在 Docker 容器中运行 Kubernetes 节点，无法直接访问本地 Docker 守护进程中的镜像。

使用以下命令将镜像加载到 kind 集群中：
```sh
kind load docker-image <controller-image-name>:<tag>
kind load docker-image <task-executor-image-name>:<tag>
```

例如，如果您使用 `make docker-build CONTROLLER_IMG=my-controller:latest` 构建镜像，则使用以下命令加载：
```sh
kind load docker-image my-controller:latest
```

完成后使用以下命令删除集群：
```sh
kind delete cluster
```

有关使用 kind 的更多详细说明，请参阅[官方 kind 文档](https://kind.sigs.k8s.io/docs/user/quick-start/)。

### 部署

此项目需要两个独立的镜像 - 一个用于控制器，另一个用于任务执行器组件。

#### 方式 1：使用 Helm 部署（推荐）

**从 GitHub Release 安装：**

您可以直接从 GitHub Releases 安装 OpenSandbox Controller。查看 [Releases 页面](https://github.com/alibaba/OpenSandbox/releases?q=helm%2Fopensandbox-controller&expanded=true) 了解所有可用版本。

```sh
# 将 <version> 替换为所需版本（例如：0.1.0）
helm install opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/<version>/opensandbox-controller-<version>.tgz \
  --namespace opensandbox-system \
  --create-namespace
```

具体版本示例：
```sh
helm install opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.1.0/opensandbox-controller-0.1.0.tgz \
  --namespace opensandbox-system \
  --create-namespace
```

您也可以先下载 chart 然后再安装：
```sh
# 下载 chart
wget https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/<version>/opensandbox-controller-<version>.tgz

# 从本地文件安装
helm install opensandbox-controller ./opensandbox-controller-<version>.tgz \
  --namespace opensandbox-system \
  --create-namespace
```

**自定义安装：**

使用 `--set` 参数自定义配置：

```sh
# 示例：自定义资源限制
helm install opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.1.0/opensandbox-controller-0.1.0.tgz \
  --namespace opensandbox-system \
  --create-namespace \
  --set controller.replicaCount=2 \
  --set controller.resources.limits.cpu=1000m \
  --set controller.resources.limits.memory=512Mi

# 示例：自定义日志级别
helm install opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.1.0/opensandbox-controller-0.1.0.tgz \
  --namespace opensandbox-system \
  --create-namespace \
  --set controller.logLevel=debug
```

或使用 values 文件进行复杂配置：

```sh
# 创建自定义 values 文件
cat > custom-values.yaml <<EOF
controller:
  replicaCount: 2
  resources:
    limits:
      cpu: 1000m
      memory: 512Mi
    requests:
      cpu: 100m
      memory: 128Mi
  logLevel: debug
EOF

# 使用自定义 values 安装
helm install opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.1.0/opensandbox-controller-0.1.0.tgz \
  --namespace opensandbox-system \
  --create-namespace \
  -f custom-values.yaml
```

**从源码安装（用于开发）：**

如果您正在进行开发或需要自定义 chart：

1. **构建和推送您的镜像：**
   ```sh
   # 构建和推送控制器镜像
   make docker-build docker-push CONTROLLER_IMG=<some-registry>/opensandbox-controller:tag
   
   # 构建和推送任务执行器镜像
   make docker-build-task-executor docker-push-task-executor TASK_EXECUTOR_IMG=<some-registry>/opensandbox-task-executor:tag
   ```

2. **使用 Helm 安装：**
   ```sh
   helm install opensandbox-controller ./charts/opensandbox-controller \
     --set controller.image.repository=<some-registry>/opensandbox-controller \
     --set controller.image.tag=<tag> \
     --namespace opensandbox-system \
     --create-namespace
   ```

**验证安装：**

检查控制器是否运行：
```sh
kubectl get pods -n opensandbox-system
kubectl get deployment -n opensandbox-system

# 查看日志
kubectl logs -n opensandbox-system -l control-plane=controller-manager -f
```

**升级：**

```sh
# 升级到新版本
helm upgrade opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/<new-version>/opensandbox-controller-<new-version>.tgz \
  --namespace opensandbox-system
```

**卸载：**

```sh
helm uninstall opensandbox-controller -n opensandbox-system
```

有关更多配置选项和高级用法，请参阅 [Helm Chart README](charts/opensandbox-controller/README.md)。

#### 方式 2：使用 Kustomize 部署

1. **构建和推送您的镜像：**
   ```sh
   # 构建和推送控制器镜像
   make docker-build docker-push CONTROLLER_IMG=<some-registry>/opensandbox-controller:tag
   
   # 构建和推送任务执行器镜像
   make docker-build-task-executor docker-push-task-executor TASK_EXECUTOR_IMG=<some-registry>/opensandbox-task-executor:tag
   ```

   **注意：** 这些镜像应该发布在您指定的个人注册表中。需要能够从工作环境中拉取镜像。如果上述命令不起作用，请确保您对注册表具有适当的权限。

2. **将 CRD 安装到集群中：**
   ```sh
   make install
   ```

3. **将管理器部署到集群：**
   ```sh
   make deploy CONTROLLER_IMG=<some-registry>/opensandbox-controller:tag TASK_EXECUTOR_IMG=<some-registry>/opensandbox-task-executor:tag
   ```

   **注意**：您可能需要授予自己集群管理员权限或以管理员身份登录以确保您在运行命令之前具有集群管理员权限。

**Kind 用户的重要说明**：如果您使用的是 kind 集群，需要在构建镜像后将两个镜像都加载到 kind 节点中：
```sh
kind load docker-image <controller-image-name>:<tag>
kind load docker-image <task-executor-image-name>:<tag>
```

### 创建 BatchSandbox 和 Pool 资源

#### 基础示例
创建一个简单的非池化沙箱，不带任务调度：

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: basic-batch-sandbox
spec:
  replicas: 2
  template:
    spec:
      containers:
      - name: sandbox-container
        image: nginx:latest
        ports:
        - containerPort: 80
```

应用批处理沙箱配置：
```sh
kubectl apply -f basic-batch-sandbox.yaml
```

检查批处理沙箱状态：
```sh
kubectl get batchsandbox basic-batch-sandbox -o wide
```

示例输出：
```sh
NAME                   DESIRED   TOTAL   ALLOCATED   READY   EXPIRE   AGE
basic-batch-sandbox    2         2       2           2       <none>   5m
```

状态字段说明：
- **DESIRED**：请求的沙箱数量
- **TOTAL**：创建的沙箱总数
- **ALLOCATED**：成功分配的沙箱数量
- **READY**：准备使用的沙箱数量
- **EXPIRE**：过期时间（未设置时为空）
- **AGE**：资源创建以来的时间

沙箱准备好后，您可以在注解中找到端点信息：
```sh
kubectl get batchsandbox basic-batch-sandbox -o jsonpath='{.metadata.annotations.sandbox\.opensandbox\.io/endpoints}'
```

这将显示交付沙箱的 IP 地址。

#### 高级示例

##### 不带任务的池化沙箱
首先，创建一个资源池：

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  name: example-pool
spec:
  template:
    spec:
      containers:
      - name: sandbox-container
        image: nginx:latest
        ports:
        - containerPort: 80
  capacitySpec:
    bufferMax: 10
    bufferMin: 2
    poolMax: 20
    poolMin: 5
```

应用资源池配置：
```sh
kubectl apply -f pool-example.yaml
```

使用资源池创建一批沙箱：

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: pooled-batch-sandbox
spec:
  replicas: 3
  poolRef: example-pool
```

应用批处理沙箱配置：
```sh
kubectl apply -f pooled-batch-sandbox.yaml
```

##### 带异构任务的池化沙箱
创建一批带有基于进程的异构任务的沙箱。为了使任务执行正常工作，任务执行器必须作为 sidecar 容器部署在资源池模板中，并与沙箱容器共享进程命名空间：

首先，创建一个带有任务执行器 sidecar 的资源池：

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  name: task-example-pool
spec:
  template:
    spec:
      shareProcessNamespace: true
      containers:
      - name: sandbox-container
        image: ubuntu:latest
        command: ["sleep", "3600"]
      - name: task-executor
        image: <task-executor-image>:<tag>
        securityContext:
          capabilities:
            add: ["SYS_PTRACE"]
  capacitySpec:
    bufferMax: 10
    bufferMin: 2
    poolMax: 20
    poolMin: 5
```

使用我们刚刚创建的资源池创建一批带有基于进程的异构任务的沙箱：

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: task-batch-sandbox
spec:
  replicas: 2
  poolRef: task-example-pool
  taskTemplate:
    spec:
      process:
        command: ["echo", "Default task"]
  shardTaskPatches:
  - spec:
      process:
        command: ["echo", "Custom task for sandbox 1"]
  - spec:
      process:
        command: ["echo", "Custom task for sandbox 2"]
        args: ["with", "additional", "arguments"]
```

应用批处理沙箱配置：
```sh
kubectl apply -f task-batch-sandbox.yaml
```

检查带任务的批处理沙箱状态：
```sh
kubectl get batchsandbox task-batch-sandbox -o wide
```

示例输出：
```sh
NAME                   DESIRED   TOTAL   ALLOCATED   READY   TASK_RUNNING   TASK_SUCCEED   TASK_FAILED   TASK_UNKNOWN   EXPIRE   AGE
task-batch-sandbox     2         2       2           2       0              2              0             0              <none>   5m
```

任务状态字段说明：
- **TASK_RUNNING**：当前正在执行的任务数
- **TASK_SUCCEED**：成功完成的任务数
- **TASK_FAILED**：失败的任务数
- **TASK_UNKNOWN**：状态未知的任务数

当您删除带有运行任务的 BatchSandbox 时，控制器将首先停止所有任务，然后删除 BatchSandbox 资源。一旦所有任务都成功终止，BatchSandbox 将被完全删除，沙箱将返回到资源池中以供重用。

删除 BatchSandbox：
```sh
kubectl delete batchsandbox task-batch-sandbox
```

您可以通过观察 BatchSandbox 状态来监控删除过程：
```sh
kubectl get batchsandbox task-batch-sandbox -w
```

### 监控资源
检查资源池和批处理沙箱的状态：
```sh
# 查看资源池状态
kubectl get pools

# 查看批处理沙箱状态
kubectl get batchsandboxes

# 获取特定资源的详细信息
kubectl describe pool example-pool
kubectl describe batchsandbox example-batch-sandbox
```

## 项目结构

```
├── api/
│   └── v1alpha1/          # 自定义资源定义（BatchSandbox, Pool）
├── cmd/
│   ├── controller/         # 主控制器管理器入口点
│   └── task-executor/     # 任务执行器二进制文件
├── config/
│   ├── crd/               # 自定义资源定义清单
│   ├── default/           # 控制器部署的默认配置
│   ├── manager/           # 控制器管理器配置
│   ├── rbac/              # 基于角色的访问控制清单
│   └── samples/           # 资源的示例 YAML 清单
├── hack/                  # 开发脚本和工具
├── images/                # 文档图片
├── internal/
│   ├── controller/        # 核心控制器实现
│   ├── scheduler/         # 资源分配和调度逻辑
│   ├── task-executor/     # 任务执行引擎内部实现
│   └── utils/             # 实用函数和助手
├── pkg/
│   └── task-executor/     # 共享的任务执行器包
└── test/                  # 测试套件
```

## 贡献
欢迎为 OpenSandbox Kubernetes 控制器项目做出贡献。请随时提交问题、功能请求和拉取请求。

**注意：** 运行 `make help` 以获取所有潜在 `make` 目标的更多信息

更多信息请参见 [Kubebuilder 文档](https://book.kubebuilder.io/introduction.html)

## 许可证
此项目在 Apache 2.0 许可证下开源。

您可以将 OpenSandbox 用于个人或商业项目，只要遵守许可证条款即可。


================================================
FILE: kubernetes/README.md
================================================
# OpenSandbox Kubernetes Controller

[English](README.md) | [中文](README-ZH.md)

OpenSandbox Kubernetes Controller is a Kubernetes operator that manages sandbox environments through custom resources. It provides **automated sandbox lifecycle management**, **resource pooling for fast provisioning**, **batch sandbox creation**, and **optional task orchestration** capabilities in Kubernetes clusters.

## Key Features

- **Flexible Sandbox Creation**: Choose between pooled and non-pooled sandbox creation modes
- **Batch and Individual Delivery**: Support both single sandbox (for real-user interactions) and batch sandbox delivery (for high-throughput agentic-RL scenarios)
- **Optional Task Scheduling**: Integrated task orchestration with optional shard task templates for heterogeneous task distribution and customized sandbox delivery (e.g., process injection)
- **Resource Pooling**: Maintain pre-warmed resource pools for rapid sandbox provisioning
- **Comprehensive Monitoring**: Real-time status tracking of sandboxes and tasks

## Features

### Batch Sandbox Management
The BatchSandbox custom resource allows you to create and manage multiple identical sandbox environments. Key capabilities include:
- **Flexible Creation Modes**: Support both pooled (using resource pools) and non-pooled sandbox creation
- **Single and Batch Delivery**: Create single sandboxes (replicas=1) or batches of sandboxes (replicas=N) as needed
- **Scalable Replica Management**: Easily control the number of sandbox instances through replica configuration
- **Automatic Expiration**: Set TTL (time-to-live) for automatic cleanup of expired sandboxes
- **Optional Task Scheduling**: Built-in task execution engine with support for optional task templates
- **Detailed Status Reporting**: Comprehensive metrics on replicas, allocations, and task states

### Resource Pooling
The Pool custom resource maintains a pool of pre-warmed compute resources to enable rapid sandbox provisioning:
- Configurable buffer sizes (minimum and maximum) to balance resource availability and cost
- Pool capacity limits to control overall resource consumption
- Automatic resource allocation and deallocation based on demand
- Real-time status monitoring showing total, allocated, and available resources

### Task Orchestration
Integrated task management system that executes custom workloads within sandboxes:
- **Optional Execution**: Task scheduling is completely optional - sandboxes can be created without tasks
- **Process-Based Tasks**: Support for process-based tasks that execute within the sandbox environment
- **Heterogeneous Task Distribution**: Customize individual tasks for each sandbox in a batch using shardTaskPatches

### Advanced Scheduling
Intelligent resource management features:
- Minimum and maximum buffer settings to ensure resource availability while controlling costs
- Pool-wide capacity limits to prevent resource exhaustion
- Automatic scaling based on demand

## Runtime API Support Notes

- `pause` / `resume` lifecycle APIs are currently **not supported** by the Kubernetes runtime.
- Calling these APIs against Kubernetes runtime returns `501 Not Implemented`.
- Pause/resume semantics in OpenSandbox mean preserving in-memory process state (container-level suspend/resume). Kubernetes provider currently focuses on create/get/list/delete/renew workflows.


## Relationship with [kubernates-sigs/agent-sandbox](kubernates-sigs/agent-sandbox)

BatchSandbox does not duplicate the basic functionality of Agent-Sandbox, but rather complements it with additional enhanced capabilities:

1. **Batch Sandbox Semantics**: Significantly improves Sandbox delivery throughput in scenarios such as Reinforcement Learning (RL) training
2. **Task Scheduling Capability**: Enables differentiated Sandbox delivery through Task scheduling, such as injecting custom processes into containers before Sandbox delivery

Therefore, you can choose the appropriate project as your Sandbox underlying runtime based on your specific application scenarios.

### Performance Testing

Performance comparison test of BatchSandbox and Sig Agent-Sandbox in terms of throughput.

**Test Environment**

**Controller Component Configuration**
- Resource Specifications: request: 12C32G, limit: 16C64G
- Concurrency Configuration:
  - **Sig Agent-Sandbox**: 3 controllers (sandbox, sandboxclaim, sandboxwarmppool), no concurrency configuration provided in the code, default value is 1
  - **BatchSandbox**: 2 controllers, batchsandbox controller concurrency is 32, pool controller concurrency is 1

**Pool Configuration**
- Image: busybox:latest
- Resource Specifications: 0.1C256MB

> **Additional Note**: Although the batchsandbox-controller of BatchSandbox has a concurrency of 32, only one BatchSandbox object was created in the test cases, which is actually equivalent to a concurrency of 1. Therefore, in terms of concurrency, BatchSandbox is consistent with SIG Agent-Sandbox.

**Performance Comparison Results**

When both use resource pools, the total time comparison for delivering 100 Sandboxes:

| Test Scenario | Total Time (seconds) |
|---------------|---------------------|
| SIG Agent-Sandbox (concurrency=1) | 76.35 |
| SIG Agent-Sandbox (concurrency=10) | 23.17 |
| SIG Agent-Sandbox (concurrency=50) | 33.85 |
| BatchSandbox | 0.92 |

**Analysis**

Core Difference: The time complexity of Sig Agent-Sandbox and BatchSandbox for batch delivery of N Sandboxes is O(N) and O(1) respectively.

**Sig Agent-Sandbox Architecture**
- Each Sandbox delivery process requires the following write operations (total write operations are proportional to Sandbox scale):
  1. Create a SandboxClaim
  2. Create a Sandbox
  3. Update Pod once (adopt Pod from resource pool)
  4. Update Sandbox Status once
  5. Update SandboxClaim Status once

**BatchSandbox Architecture**
- Each batch Sandbox delivery process requires the following write operations (total write operations are independent of Sandbox scale):
  1. Create a BatchSandbox
  2. Update BatchSandbox annotation once (write batch allocation results)
  3. Update BatchSandbox status once

## Getting Started
![](images/deploy-example.gif)

### Prerequisites
- go version v1.24.0+
- docker version 17.03+
- kubectl version v1.11.3+
- Access to a Kubernetes v1.21.1+ cluster

If you don't have access to a Kubernetes cluster, you can use [kind](https://kind.sigs.k8s.io/) to create a local Kubernetes cluster for testing purposes. Kind runs Kubernetes nodes in Docker containers, making it easy to set up a local development environment.

To install kind:
- Download the release binary for your OS from the [releases page](https://github.com/kubernetes-sigs/kind/releases) and move it into your `$PATH`
- Or use a package manager:
  - macOS (Homebrew): `brew install kind`
  - Windows (winget): `winget install Kubernetes.kind`

After installing kind, create a cluster with:
```sh
kind create cluster
```

This command creates a single-node cluster by default. To interact with it, use `kubectl` with the generated kubeconfig.

**Important Note for Kind Users**: If you're using a kind cluster, you need to load the controller and task-executor images into the kind node after building them with `make docker-build`. This is because kind runs Kubernetes nodes in Docker containers and cannot directly access images from your local Docker daemon.

Load the images into the kind cluster with:
```sh
kind load docker-image <controller-image-name>:<tag>
kind load docker-image <task-executor-image-name>:<tag>
```

For example, if you built your images with `make docker-build CONTROLLER_IMG=my-controller:latest`, you would load them with:
```sh
kind load docker-image my-controller:latest
```

Delete the cluster when you're done with:
```sh
kind delete cluster
```

For more detailed instructions on using kind, please refer to the [official kind documentation](https://kind.sigs.k8s.io/docs/user/quick-start/).

### Deployment

This project requires two separate images - one for the controller and another for the task-executor component.

#### Option 1: Deploy with Helm (Recommended)

**Install from GitHub Release:**

You can install OpenSandbox Controller directly from GitHub Releases. Check the [Releases page](https://github.com/alibaba/OpenSandbox/releases?q=helm%2Fopensandbox-controller&expanded=true) for all available versions.

```sh
# Replace <version> with the desired version (e.g., 0.1.0)
helm install opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/<version>/opensandbox-controller-<version>.tgz \
  --namespace opensandbox-system \
  --create-namespace
```

Example with specific version:
```sh
helm install opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.1.0/opensandbox-controller-0.1.0.tgz \
  --namespace opensandbox-system \
  --create-namespace
```

You can also download the chart first and then install:
```sh
# Download the chart
wget https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/<version>/opensandbox-controller-<version>.tgz

# Install from local file
helm install opensandbox-controller ./opensandbox-controller-<version>.tgz \
  --namespace opensandbox-system \
  --create-namespace
```

**Customize Installation:**

Use `--set` flags to customize the configuration:

```sh
# Example: Custom resource limits
helm install opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.1.0/opensandbox-controller-0.1.0.tgz \
  --namespace opensandbox-system \
  --create-namespace \
  --set controller.replicaCount=2 \
  --set controller.resources.limits.cpu=1000m \
  --set controller.resources.limits.memory=512Mi

# Example: Custom log level
helm install opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.1.0/opensandbox-controller-0.1.0.tgz \
  --namespace opensandbox-system \
  --create-namespace \
  --set controller.logLevel=debug
```

Or use a values file for complex configurations:

```sh
# Create a custom values file
cat > custom-values.yaml <<EOF
controller:
  replicaCount: 2
  resources:
    limits:
      cpu: 1000m
      memory: 512Mi
    requests:
      cpu: 100m
      memory: 128Mi
  logLevel: debug
EOF

# Install with custom values
helm install opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.1.0/opensandbox-controller-0.1.0.tgz \
  --namespace opensandbox-system \
  --create-namespace \
  -f custom-values.yaml
```

**Install from source (for development):**

If you're developing or need to customize the chart:

1. **Build and push your images:**
   ```sh
   # Build and push the controller image
   make docker-build docker-push CONTROLLER_IMG=<some-registry>/opensandbox-controller:tag
   
   # Build and push the task-executor image
   make docker-build-task-executor docker-push-task-executor TASK_EXECUTOR_IMG=<some-registry>/opensandbox-task-executor:tag
   ```

2. **Install with Helm:**
   ```sh
   helm install opensandbox-controller ./charts/opensandbox-controller \
     --set controller.image.repository=<some-registry>/opensandbox-controller \
     --set controller.image.tag=<tag> \
     --namespace opensandbox-system \
     --create-namespace
   ```

**Verify Installation:**

Check the controller is running:
```sh
kubectl get pods -n opensandbox-system
kubectl get deployment -n opensandbox-system

# Check logs
kubectl logs -n opensandbox-system -l control-plane=controller-manager -f
```

**Upgrade:**

```sh
# Upgrade to a new version
helm upgrade opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/<new-version>/opensandbox-controller-<new-version>.tgz \
  --namespace opensandbox-system
```

**Uninstall:**

```sh
helm uninstall opensandbox-controller -n opensandbox-system
```

For more configuration options and advanced usage, see the [Helm Chart README](charts/opensandbox-controller/README.md).

#### Option 2: Deploy with Kustomize

1. **Build and push your images:**
   ```sh
   # Build and push the controller image
   make docker-build docker-push CONTROLLER_IMG=<some-registry>/opensandbox-controller:tag
   
   # Build and push the task-executor image
   make docker-build-task-executor docker-push-task-executor TASK_EXECUTOR_IMG=<some-registry>/opensandbox-task-executor:tag
   ```

   **NOTE:** These images ought to be published in the personal registry you specified. And it is required to have access to pull the images from the working environment. Make sure you have the proper permission to the registry if the above commands don't work.

2. **Install the CRDs into the cluster:**
   ```sh
   make install
   ```

3. **Deploy the Manager to the cluster:**
   ```sh
   make deploy CONTROLLER_IMG=<some-registry>/opensandbox-controller:tag TASK_EXECUTOR_IMG=<some-registry>/opensandbox-task-executor:tag
   ```

   **NOTE**: you may need to grant yourself cluster-admin privileges or be logged in as admin to ensure you have cluster-admin privileges before running the commands.

**Important Note for Kind Users**: If you're using a kind cluster, you need to load both images into the kind node after building them:
```sh
kind load docker-image <controller-image-name>:<tag>
kind load docker-image <task-executor-image-name>:<tag>
```

### Creating BatchSandbox and Pool Resources

#### Basic Example
Create a simple non-pooled sandbox without task scheduling:

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: basic-batch-sandbox
spec:
  replicas: 2
  template:
    spec:
      containers:
      - name: sandbox-container
        image: nginx:latest
        ports:
        - containerPort: 80
```

Apply the batch sandbox configuration:
```sh
kubectl apply -f basic-batch-sandbox.yaml
```

Check the status of your batch sandbox:
```sh
kubectl get batchsandbox basic-batch-sandbox -o wide
```

Example output:
```sh
NAME                   DESIRED   TOTAL   ALLOCATED   READY   EXPIRE   AGE
basic-batch-sandbox    2         2       2           2       <none>   5m
```

Status field explanations:
- **DESIRED**: The number of sandboxes requested
- **TOTAL**: The total number of sandboxes created
- **ALLOCATED**: The number of sandboxes successfully allocated
- **READY**: The number of sandboxes ready for use
- **EXPIRE**: Expiration time (empty if not set)
- **AGE**: Time since the resource was created

After the sandboxes are ready, you can find the endpoint information in the annotations:
```sh
kubectl get batchsandbox basic-batch-sandbox -o jsonpath='{.metadata.annotations.sandbox\.opensandbox\.io/endpoints}'
```

This will show the IP addresses of the delivered sandboxes.

#### Advanced Examples

##### Pooled Sandbox Without Task
First, create a resource pool:

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  name: example-pool
spec:
  template:
    spec:
      containers:
      - name: sandbox-container
        image: nginx:latest
        ports:
        - containerPort: 80
  capacitySpec:
    bufferMax: 10
    bufferMin: 2
    poolMax: 20
    poolMin: 5
```

Apply the pool configuration:
```sh
kubectl apply -f pool-example.yaml
```

Create a batch of sandboxes using the pool:

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: pooled-batch-sandbox
spec:
  replicas: 3
  poolRef: example-pool
```

Apply the batch sandbox configuration:
```sh
kubectl apply -f pooled-batch-sandbox.yaml
```

##### Pooled Sandbox With Heterogeneous Tasks
Create a batch of sandboxes with process-based heterogeneous tasks. For task execution to work properly, the task-executor must be deployed as a sidecar container in the pool template and share the process namespace with the sandbox container:

First, create a resource pool with the task-executor sidecar:

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  name: task-example-pool
spec:
  template:
    spec:
      shareProcessNamespace: true
      containers:
      - name: sandbox-container
        image: ubuntu:latest
        command: ["sleep", "3600"]
      - name: task-executor
        image: <task-executor-image>:<tag>
        securityContext:
          capabilities:
            add: ["SYS_PTRACE"]
  capacitySpec:
    bufferMax: 10
    bufferMin: 2
    poolMax: 20
    poolMin: 5
```

Create a batch of sandboxes with process-based heterogeneous tasks using the pool we just created:

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: task-batch-sandbox
spec:
  replicas: 2
  poolRef: task-example-pool
  taskTemplate:
    spec:
      process:
        command: ["echo", "Default task"]
  shardTaskPatches:
  - spec:
      process:
        command: ["echo", "Custom task for sandbox 1"]
  - spec:
      process:
        command: ["echo", "Custom task for sandbox 2"]
        args: ["with", "additional", "arguments"]
```

Apply the batch sandbox configuration:
```sh
kubectl apply -f task-batch-sandbox.yaml
```

Check the status of your batch sandbox with tasks:
```sh
kubectl get batchsandbox task-batch-sandbox -o wide
```

Example output:
```sh
NAME                   DESIRED   TOTAL   ALLOCATED   READY   TASK_RUNNING   TASK_SUCCEED   TASK_FAILED   TASK_UNKNOWN   EXPIRE   AGE
task-batch-sandbox     2         2       2           2       0              2              0             0              <none>   5m
```

Task status field explanations:
- **TASK_RUNNING**: The number of tasks currently executing
- **TASK_SUCCEED**: The number of tasks that have completed successfully
- **TASK_FAILED**: The number of tasks that have failed
- **TASK_UNKNOWN**: The number of tasks with unknown status

When you delete a BatchSandbox with running tasks, the controller will first stop all tasks before deleting the BatchSandbox resource. Once all tasks are successfully terminated, the BatchSandbox will be completely removed, and the sandboxes will be returned to the pool for reuse.

To delete the BatchSandbox:
```sh
kubectl delete batchsandbox task-batch-sandbox
```

You can monitor the deletion process by watching the BatchSandbox status:
```sh
kubectl get batchsandbox task-batch-sandbox -w
```

### Monitoring Resources
Check the status of your pools and batch sandboxes:

```sh
# View pool status
kubectl get pools

# View batch sandbox status
kubectl get batchsandboxes

# Get detailed information about a specific resource
kubectl describe pool example-pool
kubectl describe batchsandbox example-batch-sandbox
```

## Project Structure

```
├── api/
│   └── v1alpha1/          # Custom resource definitions (BatchSandbox, Pool)
├── cmd/
│   ├── controller/         # Main controller manager entry point
│   └── task-executor/     # Task executor binary
├── config/
│   ├── crd/               # Custom resource definitions manifests
│   ├── default/           # Default configuration for controller deployment
│   ├── manager/           # Controller manager configuration
│   ├── rbac/              # Role-based access control manifests
│   └── samples/           # Sample YAML manifests for resources
├── hack/                  # Development scripts and tools
├── images/                # Documentation images
├── internal/
│   ├── controller/        # Core controller implementations
│   ├── scheduler/         # Resource allocation and scheduling logic
│   ├── task-executor/     # Task execution engine internals
│   └── utils/             # Utility functions and helpers
├── pkg/
│   └── task-executor/     # Shared task executor packages
└── test/                  # Test suites and utilities
```

## Contributing
We welcome contributions to the OpenSandbox Kubernetes Controller project. Please feel free to submit issues, feature requests, and pull requests.

**NOTE:** Run `make help` for more information on all potential `make` targets

More information can be found via the [Kubebuilder Documentation](https://book.kubebuilder.io/introduction.html)

## License
This project is open source under the Apache 2.0 License.

You can use OpenSandbox for personal or commercial projects in compliance with the license terms.


================================================
FILE: kubernetes/apis/sandbox/v1alpha1/batchsandbox_types.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package v1alpha1

import (
	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	runtime "k8s.io/apimachinery/pkg/runtime"
)

// BatchSandboxSpec defines the desired state of BatchSandbox.
type BatchSandboxSpec struct {
	// Replicas is the number of desired replicas.
	// +kubebuilder:validation:Required
	// +kubebuilder:validation:Minimum=0
	// +kubebuilder:default=1
	Replicas *int32 `json:"replicas,omitempty"`
	// PoolRef references the Pool resource name for pooled sandbox creation.
	// Mutually exclusive with Template - use PoolRef for pool-based allocation or Template for direct sandbox creation.
	// +optional
	// +kubebuilder:validation:Optional
	PoolRef string `json:"poolRef,omitempty"`
	// +optional
	// Template describes the pods that will be created.
	// +kubebuilder:pruning:PreserveUnknownFields
	// +kubebuilder:validation:Schemaless
	// +kubebuilder:validation:Optional
	Template *corev1.PodTemplateSpec `json:"template"`
	// ShardPatches indicates patching to the Template for BatchSandbox.
	// +kubebuilder:pruning:PreserveUnknownFields
	// +kubebuilder:validation:Schemaless
	// +optional
	// +kubebuilder:validation:Optional
	ShardPatches []runtime.RawExtension `json:"shardPatches,omitempty"`
	// ExpireTime - Absolute time when the batch-sandbox is deleted.
	// If a time in the past is provided, the batch-sandbox will be deleted immediately.
	// +optional
	// +kubebuilder:validation:Format="date-time"
	// +kubebuilder:validation:Optional
	ExpireTime *metav1.Time `json:"expireTime,omitempty"`
	// Task is a custom task spec that is automatically dispatched after the sandbox is successfully created.
	// The Sandbox is responsible for managing the lifecycle of the task.
	// +optional
	// +kubebuilder:pruning:PreserveUnknownFields
	// +kubebuilder:validation:Schemaless
	// +kubebuilder:validation:Optional
	TaskTemplate *TaskTemplateSpec `json:"taskTemplate,omitempty"`
	// ShardTaskPatches indicates patching to the TaskTemplate for individual Task.
	// +kubebuilder:pruning:PreserveUnknownFields
	// +kubebuilder:validation:Schemaless
	// +optional
	// +kubebuilder:validation:Optional
	ShardTaskPatches []runtime.RawExtension `json:"shardTaskPatches,omitempty"`
	// TaskResourcePolicyWhenCompleted specifies how resources should be handled once a task reaches a completed state (SUCCEEDED or FAILED).
	// - Retain: Keep the resources until the BatchSandbox is deleted.
	// - Release: Free the resources immediately when the task completes.
	// +optional
	// +kubebuilder:default=Retain
	// +kubebuilder:validation:Optional
	TaskResourcePolicyWhenCompleted *TaskResourcePolicy `json:"taskResourcePolicyWhenCompleted,omitempty"`
}

type TaskResourcePolicy string

const (
	TaskResourcePolicyRetain  TaskResourcePolicy = "Retain"
	TaskResourcePolicyRelease TaskResourcePolicy = "Release"
)

// BatchSandboxStatus defines the observed state of BatchSandbox.
type BatchSandboxStatus struct {
	// ObservedGeneration is the most recent generation observed for this BatchSandbox. It corresponds to the
	// BatchSandbox's generation, which is updated on mutation by the API Server.
	ObservedGeneration int64 `json:"observedGeneration,omitempty"`
	// Replicas is the number of actual Pods
	Replicas int32 `json:"replicas"`
	//	Allocated is the number of actual scheduled Pod
	Allocated int32 `json:"allocated"`
	//	Ready is the number of actual Ready Pod
	Ready int32 `json:"ready"`
	// TaskRunning is the number of Running task
	TaskRunning int32 `json:"taskRunning"`
	// TaskSucceed is the number of Succeed task
	TaskSucceed int32 `json:"taskSucceed"`
	// TaskFailed is the number of Failed task
	TaskFailed int32 `json:"taskFailed"`
	// TaskPending is the number of Pending task which is unassigned
	TaskPending int32 `json:"taskPending"`
	// TaskUnknown is the number of Unknown task
	TaskUnknown int32 `json:"taskUnknown"`
}

// +genclient
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:resource:shortName=bsbx
// +kubebuilder:printcolumn:name="DESIRED",type="integer",JSONPath=".spec.replicas",description="The desired number of pods."
// +kubebuilder:printcolumn:name="TOTAL",type="integer",JSONPath=".status.replicas",description="The number of currently all pods."
// +kubebuilder:printcolumn:name="ALLOCATED",type="integer",JSONPath=".status.allocated",description="The number of currently all allocated pods."
// +kubebuilder:printcolumn:name="Ready",type="integer",JSONPath=".status.ready",description="The number of currently all ready pods."
// +kubebuilder:printcolumn:name="TASK_RUNNING",type="integer",priority=1,JSONPath=".status.taskRunning",description="The number of currently all running tasks."
// +kubebuilder:printcolumn:name="TASK_SUCCEED",type="integer",priority=1,JSONPath=".status.taskSucceed",description="The number of currently all succeed tasks."
// +kubebuilder:printcolumn:name="TASK_FAILED",type="integer",priority=1,JSONPath=".status.taskFailed",description="The number of currently all failed tasks."
// +kubebuilder:printcolumn:name="TASK_UNKNOWN",type="integer",priority=1,JSONPath=".status.taskUnknown",description="The number of currently all unknown tasks."
// +kubebuilder:printcolumn:name="EXPIRE",type="string",JSONPath=".spec.expireTime",description="sandbox expire time"
// +kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp",description="CreationTimestamp is a timestamp representing the server time when this object was created. It is not guaranteed to be set in happens-before order across separate operations. Clients may not set this value. It is represented in RFC3339 form and is in UTC."
// BatchSandbox is the Schema for the batchsandboxes API.
type BatchSandbox struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`

	Spec   BatchSandboxSpec   `json:"spec,omitempty"`
	Status BatchSandboxStatus `json:"status,omitempty"`
}

// +kubebuilder:object:root=true

// BatchSandboxList contains a list of BatchSandbox.
type BatchSandboxList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata,omitempty"`
	Items           []BatchSandbox `json:"items"`
}

func init() {
	SchemeBuilder.Register(&BatchSandbox{}, &BatchSandboxList{})
}

// TaskTemplateSpec task spec
type TaskTemplateSpec struct {
	// +optional
	Spec TaskSpec `json:"spec,omitempty"`
}

type TaskSpec struct {
	// +optional
	Process *ProcessTask `json:"process,omitempty"`
	// TimeoutSeconds specifies the maximum duration in seconds for task execution.
	// If exceeded, the task executor should terminate the task.
	// +optional
	TimeoutSeconds *int64 `json:"timeoutSeconds,omitempty"`
}

type ProcessTask struct {
	// Command command
	// +kubebuilder:validation:Required
	Command []string `json:"command"`
	// Arguments to the entrypoint.
	// +optional
	Args []string `json:"args,omitempty"`
	// List of environment variables to set in the task.
	// +optional
	// +patchMergeKey=name
	// +patchStrategy=merge
	Env []corev1.EnvVar `json:"env,omitempty"`
	// WorkingDir task working directory.
	// +optional
	WorkingDir string `json:"workingDir,omitempty"`
}

// TaskStatus task status
type TaskStatus struct {
	// Details about the task's current condition.
	// +optional
	State TaskState `json:"state,omitempty"`
	// Details about the task's last termination condition.
	// +optional
	LastTerminationState TaskState `json:"lastState,omitempty"`
}

// TaskState holds a possible state of task.
// Only one of its members may be specified.
// If none of them is specified, the default one is TaskStateWaiting.
type TaskState struct {
	// Details about a waiting task
	// +optional
	Waiting *TaskStateWaiting `json:"waiting,omitempty"`
	// Details about a running task
	// +optional
	Running *TaskStateRunning `json:"running,omitempty"`
	// Details about a terminated task
	// +optional
	Terminated *TaskStateTerminated `json:"terminated,omitempty"`
}

// TaskStateWaiting is a waiting state of a task.
type TaskStateWaiting struct {
	// (brief) reason the task is not yet running.
	// +optional
	Reason string `json:"reason,omitempty"`
	// Message regarding why the task is not yet running.
	// +optional
	Message string `json:"message,omitempty"`
}

// TaskStateRunning is a running state of a task.
type TaskStateRunning struct {
	// Time at which the task was last (re-)started
	// +optional
	StartedAt metav1.Time `json:"startedAt,omitempty"`
}

// TaskStateTerminated is a terminated state of a task.
type TaskStateTerminated struct {
	// Exit status from the last termination of the task
	ExitCode int32 `json:"exitCode"`
	// Signal from the last termination of the task
	// +optional
	Signal int32 `json:"signal,omitempty"`
	// (brief) reason from the last termination of the task
	// +optional
	Reason string `json:"reason,omitempty"`
	// Message regarding the last termination of the task
	// +optional
	Message string `json:"message,omitempty"`
	// Time at which previous execution of the task started
	// +optional
	StartedAt metav1.Time `json:"startedAt,omitempty"`
	// Time at which the task last terminated
	// +optional
	FinishedAt metav1.Time `json:"finishedAt,omitempty"`
}


================================================
FILE: kubernetes/apis/sandbox/v1alpha1/doc.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// +k8s:openapi-gen=true
// +groupName=sandbox.opensandbox.io
package v1alpha1


================================================
FILE: kubernetes/apis/sandbox/v1alpha1/groupversion_info.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package v1alpha1 contains API Schema definitions for the sandbox v1alpha1 API group.
// +kubebuilder:object:generate=true
// +groupName=sandbox.opensandbox.io
package v1alpha1

import (
	"k8s.io/apimachinery/pkg/runtime/schema"
	"sigs.k8s.io/controller-runtime/pkg/scheme"
)

var (
	// GroupVersion is group version used to register these objects.
	GroupVersion = schema.GroupVersion{Group: "sandbox.opensandbox.io", Version: "v1alpha1"}

	// SchemeGroupVersion is an alias for GroupVersion to match code-generator expectations
	SchemeGroupVersion = GroupVersion

	// SchemeBuilder is used to add go types to the GroupVersionKind scheme.
	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}

	// AddToScheme adds the types in this group-version to the given scheme.
	AddToScheme = SchemeBuilder.AddToScheme
)

// Resource takes an unqualified resource and returns a Group qualified GroupResource
func Resource(resource string) schema.GroupResource {
	return SchemeGroupVersion.WithResource(resource).GroupResource()
}


================================================
FILE: kubernetes/apis/sandbox/v1alpha1/pool_types.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package v1alpha1

import (
	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// EDIT THIS FILE!  THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized.

// PoolSpec defines the desired state of Pool.
type PoolSpec struct {
	// Pod Template used to create pre-warmed nodes in the pool.
	// +kubebuilder:pruning:PreserveUnknownFields
	// +kubebuilder:validation:Schemaless
	// +kubebuilder:validation:Optional
	Template *corev1.PodTemplateSpec `json:"template"`
	// CapacitySpec controls the size of the resource pool.
	// +kubebuilder:validation:Required
	CapacitySpec CapacitySpec `json:"capacitySpec"`
}

type CapacitySpec struct {
	// BufferMax is the maximum number of nodes kept in the warm buffer.
	// +kubebuilder:validation:Minimum=0
	// +kubebuilder:validation:Required
	BufferMax int32 `json:"bufferMax"`
	// BufferMin is the minimum number of nodes that must remain in the buffer.
	// +kubebuilder:validation:Minimum=0
	// +kubebuilder:validation:Required
	BufferMin int32 `json:"bufferMin"`
	// PoolMax is the maximum total number of nodes allowed in the entire pool.
	// +kubebuilder:validation:Minimum=0
	// +kubebuilder:validation:Required
	PoolMax int32 `json:"poolMax"`
	// PoolMin is the minimum total size of the pool.
	// +kubebuilder:validation:Minimum=0
	// +kubebuilder:validation:Required
	PoolMin int32 `json:"poolMin"`
}

// PoolStatus defines the observed state of Pool.
type PoolStatus struct {
	// ObservedGeneration is the most recent generation observed for this BatchSandbox. It corresponds to the
	// BatchSandbox's generation, which is updated on mutation by the API Server.
	ObservedGeneration int64 `json:"observedGeneration,omitempty"`
	// Revision is the latest version of pool
	Revision string `json:"revision"`
	// Total is the total number of nodes in the pool.
	Total int32 `json:"total"`
	// Allocated is the number of nodes currently allocated to sandboxes.
	Allocated int32 `json:"allocated"`
	// Available is the number of nodes currently available in the pool.
	Available int32 `json:"available"`
}

// +genclient
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:printcolumn:name="TOTAL",type="integer",JSONPath=".status.total",description="The number of all nodes in pool."
// +kubebuilder:printcolumn:name="ALLOCATED",type="integer",JSONPath=".status.allocated",description="The number of allocated nodes in pool."
// +kubebuilder:printcolumn:name="AVAILABLE",type="integer",JSONPath=".status.available",description="The number of available nodes in pool."
// Pool is the Schema for the pools API.
type Pool struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`

	Spec   PoolSpec   `json:"spec,omitempty"`
	Status PoolStatus `json:"status,omitempty"`
}

// +kubebuilder:object:root=true

// PoolList contains a list of Pool.
type PoolList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata,omitempty"`
	Items           []Pool `json:"items"`
}

func init() {
	SchemeBuilder.Register(&Pool{}, &PoolList{})
}


================================================
FILE: kubernetes/apis/sandbox/v1alpha1/zz_generated.deepcopy.go
================================================
//go:build !ignore_autogenerated

// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Code generated by controller-gen. DO NOT EDIT.

package v1alpha1

import (
	"k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/runtime"
)

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *BatchSandbox) DeepCopyInto(out *BatchSandbox) {
	*out = *in
	out.TypeMeta = in.TypeMeta
	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
	in.Spec.DeepCopyInto(&out.Spec)
	out.Status = in.Status
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BatchSandbox.
func (in *BatchSandbox) DeepCopy() *BatchSandbox {
	if in == nil {
		return nil
	}
	out := new(BatchSandbox)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *BatchSandbox) DeepCopyObject() runtime.Object {
	if c := in.DeepCopy(); c != nil {
		return c
	}
	return nil
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *BatchSandboxList) DeepCopyInto(out *BatchSandboxList) {
	*out = *in
	out.TypeMeta = in.TypeMeta
	in.ListMeta.DeepCopyInto(&out.ListMeta)
	if in.Items != nil {
		in, out := &in.Items, &out.Items
		*out = make([]BatchSandbox, len(*in))
		for i := range *in {
			(*in)[i].DeepCopyInto(&(*out)[i])
		}
	}
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BatchSandboxList.
func (in *BatchSandboxList) DeepCopy() *BatchSandboxList {
	if in == nil {
		return nil
	}
	out := new(BatchSandboxList)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *BatchSandboxList) DeepCopyObject() runtime.Object {
	if c := in.DeepCopy(); c != nil {
		return c
	}
	return nil
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *BatchSandboxSpec) DeepCopyInto(out *BatchSandboxSpec) {
	*out = *in
	if in.Replicas != nil {
		in, out := &in.Replicas, &out.Replicas
		*out = new(int32)
		**out = **in
	}
	if in.Template != nil {
		in, out := &in.Template, &out.Template
		*out = new(v1.PodTemplateSpec)
		(*in).DeepCopyInto(*out)
	}
	if in.ShardPatches != nil {
		in, out := &in.ShardPatches, &out.ShardPatches
		*out = make([]runtime.RawExtension, len(*in))
		for i := range *in {
			(*in)[i].DeepCopyInto(&(*out)[i])
		}
	}
	if in.ExpireTime != nil {
		in, out := &in.ExpireTime, &out.ExpireTime
		*out = (*in).DeepCopy()
	}
	if in.TaskTemplate != nil {
		in, out := &in.TaskTemplate, &out.TaskTemplate
		*out = new(TaskTemplateSpec)
		(*in).DeepCopyInto(*out)
	}
	if in.ShardTaskPatches != nil {
		in, out := &in.ShardTaskPatches, &out.ShardTaskPatches
		*out = make([]runtime.RawExtension, len(*in))
		for i := range *in {
			(*in)[i].DeepCopyInto(&(*out)[i])
		}
	}
	if in.TaskResourcePolicyWhenCompleted != nil {
		in, out := &in.TaskResourcePolicyWhenCompleted, &out.TaskResourcePolicyWhenCompleted
		*out = new(TaskResourcePolicy)
		**out = **in
	}
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BatchSandboxSpec.
func (in *BatchSandboxSpec) DeepCopy() *BatchSandboxSpec {
	if in == nil {
		return nil
	}
	out := new(BatchSandboxSpec)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *BatchSandboxStatus) DeepCopyInto(out *BatchSandboxStatus) {
	*out = *in
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BatchSandboxStatus.
func (in *BatchSandboxStatus) DeepCopy() *BatchSandboxStatus {
	if in == nil {
		return nil
	}
	out := new(BatchSandboxStatus)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *CapacitySpec) DeepCopyInto(out *CapacitySpec) {
	*out = *in
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CapacitySpec.
func (in *CapacitySpec) DeepCopy() *CapacitySpec {
	if in == nil {
		return nil
	}
	out := new(CapacitySpec)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Pool) DeepCopyInto(out *Pool) {
	*out = *in
	out.TypeMeta = in.TypeMeta
	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
	in.Spec.DeepCopyInto(&out.Spec)
	out.Status = in.Status
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Pool.
func (in *Pool) DeepCopy() *Pool {
	if in == nil {
		return nil
	}
	out := new(Pool)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *Pool) DeepCopyObject() runtime.Object {
	if c := in.DeepCopy(); c != nil {
		return c
	}
	return nil
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PoolList) DeepCopyInto(out *PoolList) {
	*out = *in
	out.TypeMeta = in.TypeMeta
	in.ListMeta.DeepCopyInto(&out.ListMeta)
	if in.Items != nil {
		in, out := &in.Items, &out.Items
		*out = make([]Pool, len(*in))
		for i := range *in {
			(*in)[i].DeepCopyInto(&(*out)[i])
		}
	}
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolList.
func (in *PoolList) DeepCopy() *PoolList {
	if in == nil {
		return nil
	}
	out := new(PoolList)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *PoolList) DeepCopyObject() runtime.Object {
	if c := in.DeepCopy(); c != nil {
		return c
	}
	return nil
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PoolSpec) DeepCopyInto(out *PoolSpec) {
	*out = *in
	if in.Template != nil {
		in, out := &in.Template, &out.Template
		*out = new(v1.PodTemplateSpec)
		(*in).DeepCopyInto(*out)
	}
	out.CapacitySpec = in.CapacitySpec
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolSpec.
func (in *PoolSpec) DeepCopy() *PoolSpec {
	if in == nil {
		return nil
	}
	out := new(PoolSpec)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PoolStatus) DeepCopyInto(out *PoolStatus) {
	*out = *in
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolStatus.
func (in *PoolStatus) DeepCopy() *PoolStatus {
	if in == nil {
		return nil
	}
	out := new(PoolStatus)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ProcessTask) DeepCopyInto(out *ProcessTask) {
	*out = *in
	if in.Command != nil {
		in, out := &in.Command, &out.Command
		*out = make([]string, len(*in))
		copy(*out, *in)
	}
	if in.Args != nil {
		in, out := &in.Args, &out.Args
		*out = make([]string, len(*in))
		copy(*out, *in)
	}
	if in.Env != nil {
		in, out := &in.Env, &out.Env
		*out = make([]v1.EnvVar, len(*in))
		for i := range *in {
			(*in)[i].DeepCopyInto(&(*out)[i])
		}
	}
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProcessTask.
func (in *ProcessTask) DeepCopy() *ProcessTask {
	if in == nil {
		return nil
	}
	out := new(ProcessTask)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *TaskSpec) DeepCopyInto(out *TaskSpec) {
	*out = *in
	if in.Process != nil {
		in, out := &in.Process, &out.Process
		*out = new(ProcessTask)
		(*in).DeepCopyInto(*out)
	}
	if in.TimeoutSeconds != nil {
		in, out := &in.TimeoutSeconds, &out.TimeoutSeconds
		*out = new(int64)
		**out = **in
	}
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskSpec.
func (in *TaskSpec) DeepCopy() *TaskSpec {
	if in == nil {
		return nil
	}
	out := new(TaskSpec)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *TaskState) DeepCopyInto(out *TaskState) {
	*out = *in
	if in.Waiting != nil {
		in, out := &in.Waiting, &out.Waiting
		*out = new(TaskStateWaiting)
		**out = **in
	}
	if in.Running != nil {
		in, out := &in.Running, &out.Running
		*out = new(TaskStateRunning)
		(*in).DeepCopyInto(*out)
	}
	if in.Terminated != nil {
		in, out := &in.Terminated, &out.Terminated
		*out = new(TaskStateTerminated)
		(*in).DeepCopyInto(*out)
	}
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskState.
func (in *TaskState) DeepCopy() *TaskState {
	if in == nil {
		return nil
	}
	out := new(TaskState)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *TaskStateRunning) DeepCopyInto(out *TaskStateRunning) {
	*out = *in
	in.StartedAt.DeepCopyInto(&out.StartedAt)
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskStateRunning.
func (in *TaskStateRunning) DeepCopy() *TaskStateRunning {
	if in == nil {
		return nil
	}
	out := new(TaskStateRunning)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *TaskStateTerminated) DeepCopyInto(out *TaskStateTerminated) {
	*out = *in
	in.StartedAt.DeepCopyInto(&out.StartedAt)
	in.FinishedAt.DeepCopyInto(&out.FinishedAt)
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskStateTerminated.
func (in *TaskStateTerminated) DeepCopy() *TaskStateTerminated {
	if in == nil {
		return nil
	}
	out := new(TaskStateTerminated)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *TaskStateWaiting) DeepCopyInto(out *TaskStateWaiting) {
	*out = *in
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskStateWaiting.
func (in *TaskStateWaiting) DeepCopy() *TaskStateWaiting {
	if in == nil {
		return nil
	}
	out := new(TaskStateWaiting)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *TaskStatus) DeepCopyInto(out *TaskStatus) {
	*out = *in
	in.State.DeepCopyInto(&out.State)
	in.LastTerminationState.DeepCopyInto(&out.LastTerminationState)
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskStatus.
func (in *TaskStatus) DeepCopy() *TaskStatus {
	if in == nil {
		return nil
	}
	out := new(TaskStatus)
	in.DeepCopyInto(out)
	return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *TaskTemplateSpec) DeepCopyInto(out *TaskTemplateSpec) {
	*out = *in
	in.Spec.DeepCopyInto(&out.Spec)
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskTemplateSpec.
func (in *TaskTemplateSpec) DeepCopy() *TaskTemplateSpec {
	if in == nil {
		return nil
	}
	out := new(TaskTemplateSpec)
	in.DeepCopyInto(out)
	return out
}


================================================
FILE: kubernetes/build.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

# Default values
TAG=${TAG:-latest}
COMPONENT=${COMPONENT:-controller}
PUSH=${PUSH:-true}

# Image repository
ACR_REPO="sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox"

# Component specific settings
if [ "$COMPONENT" == "controller" ]; then
    IMAGE_NAME="controller"
    BUILD_ARG="--build-arg PACKAGE=cmd/controller/main.go"
elif [ "$COMPONENT" == "task-executor" ]; then
    IMAGE_NAME="task-executor"
    BUILD_ARG="--build-arg PACKAGE=cmd/task-executor/main.go --build-arg USERID=0"
else
    echo "Error: Unknown component: $COMPONENT"
    echo "Available components: controller, task-executor"
    exit 1
fi

echo "========================================="
echo "Building $COMPONENT"
echo "Image: $IMAGE_NAME"
echo "Tag: $TAG"
echo "Push: $PUSH"
echo "========================================="

# Build for multiple platforms
PLATFORMS="linux/amd64,linux/arm64"

if [ "$PUSH" == "true" ]; then
    # Build and push to ACR registry
    docker buildx build \
        --platform $PLATFORMS \
        $BUILD_ARG \
        -t ${ACR_REPO}/${IMAGE_NAME}:${TAG} \
        --push \
        -f Dockerfile \
        .
    
    echo "========================================="
    echo "Successfully built and pushed:"
    echo "  ${ACR_REPO}/${IMAGE_NAME}:${TAG}"
    echo "========================================="
else
    # Build only (for local testing)
    docker buildx build \
        --platform linux/amd64 \
        $BUILD_ARG \
        -t ${IMAGE_NAME}:${TAG} \
        -f Dockerfile \
        --load \
        .
    
    echo "========================================="
    echo "Successfully built (local only):"
    echo "  ${IMAGE_NAME}:${TAG}"
    echo "========================================="
fi


================================================
FILE: kubernetes/charts/opensandbox-controller/.helmignore
================================================
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
# OWNERS file
OWNERS
# Make files
Makefile


================================================
FILE: kubernetes/charts/opensandbox-controller/Chart.yaml
================================================
apiVersion: v2
name: opensandbox-controller
description: A Kubernetes operator for managing sandbox environments with resource pooling and batch delivery
type: application
version: 0.1.0
appVersion: "0.1.0"

keywords:
  - sandbox
  - kubernetes
  - operator
  - resource-pool
  - batch-sandbox
  - task-orchestration

home: https://github.com/alibaba/OpenSandbox
sources:
  - https://github.com/alibaba/OpenSandbox/tree/main/kubernetes

maintainers:
  - name: OpenSandbox Team
    email: opensandbox@example.com

icon: https://raw.githubusercontent.com/alibaba/OpenSandbox/main/kubernetes/images/logo.png

# Kubernetes version constraints
kubeVersion: ">=1.21.1-0"

annotations:
  # Category for Artifact Hub
  artifacthub.io/category: integration-delivery
  artifacthub.io/license: Apache-2.0
  artifacthub.io/signKey: |
    fingerprint: [your-gpg-fingerprint]
  artifacthub.io/prerelease: "false"
  artifacthub.io/operator: "true"
  artifacthub.io/operatorCapabilities: Full Lifecycle
  artifacthub.io/recommendations: |
    - url: https://github.com/kubernetes-sigs/kind
  artifacthub.io/links: |
    - name: Documentation
      url: https://github.com/alibaba/OpenSandbox/blob/main/kubernetes/README.md
    - name: Support
      url: https://github.com/alibaba/OpenSandbox/issues


================================================
FILE: kubernetes/charts/opensandbox-controller/README.md
================================================
# OpenSandbox Controller Helm Chart

A Helm chart for deploying the OpenSandbox Kubernetes Controller, which manages sandbox environments with resource pooling and batch delivery capabilities.

## Introduction

This chart bootstraps an OpenSandbox Controller deployment on a Kubernetes cluster using the Helm package manager. The controller provides:

- **Batch Sandbox Management**: Create and manage multiple identical sandbox environments
- **Resource Pooling**: Maintain pre-warmed resource pools for rapid sandbox provisioning
- **Task Orchestration**: Optional task execution within sandboxes
- **High Performance**: O(1) time complexity for batch sandbox delivery

## Prerequisites

- Kubernetes 1.21.1+
- Helm 3.0+
- Container runtime (Docker, containerd, etc.)

## Installing the Chart

To install the chart with the release name `opensandbox-controller`:

```bash
helm install opensandbox-controller ./opensandbox-controller \
  --set controller.image.repository=<your-registry>/opensandbox-controller \
  --set controller.image.tag=v0.1.0 \
  --namespace opensandbox-system \
  --create-namespace
```

The command deploys OpenSandbox Controller on the Kubernetes cluster with default configuration. The [Parameters](#parameters) section lists the parameters that can be configured during installation.

## Uninstalling the Chart

To uninstall/delete the `opensandbox-controller` deployment:

```bash
helm delete opensandbox-controller -n opensandbox-system
```

The command removes all the Kubernetes components associated with the chart. Note that CRDs are kept by default (can be changed via `crds.keep`).

To also remove the CRDs:

```bash
kubectl delete crd batchsandboxes.sandbox.opensandbox.io
kubectl delete crd pools.sandbox.opensandbox.io
```

## Parameters

### Global Parameters

| Name | Description | Value |
|------|-------------|-------|
| `nameOverride` | Override the name of the chart | `""` |
| `fullnameOverride` | Override the full name of the chart | `""` |
| `namespaceOverride` | Override the namespace where resources will be created | `""` |

### Controller Parameters

| Name | Description | Value |
|------|-------------|-------|
| `controller.image.repository` | Controller image repository | `opensandbox.io/opensandbox-controller` |
| `controller.image.pullPolicy` | Image pull policy | `IfNotPresent` |
| `controller.image.tag` | Overrides the image tag (default is chart appVersion) | `""` |
| `controller.replicaCount` | Number of controller replicas | `1` |
| `controller.resources.limits.cpu` | CPU resource limits | `500m` |
| `controller.resources.limits.memory` | Memory resource limits | `128Mi` |
| `controller.resources.requests.cpu` | CPU resource requests | `10m` |
| `controller.resources.requests.memory` | Memory resource requests | `64Mi` |
| `controller.logLevel` | Can be one of 'debug', 'info', 'error' | `info` |
| `controller.kubeClient.qps` | QPS for Kubernetes client rate limiter | `100` |
| `controller.kubeClient.burst` | Burst for Kubernetes client rate limiter | `200` |
| `controller.leaderElection.enabled` | Enable leader election | `true` |
| `controller.nodeSelector` | Node labels for pod assignment | `{}` |
| `controller.tolerations` | Tolerations for pod assignment | `[]` |
| `controller.affinity` | Affinity for pod assignment | `{}` |
| `controller.podLabels` | Additional labels for controller pods | `{}` |
| `controller.podAnnotations` | Additional annotations for controller pods | `{}` |
| `controller.priorityClassName` | Priority class name for controller pods | `""` |

### RBAC Parameters

| Name | Description | Value |
|------|-------------|-------|
| `rbac.create` | Specifies whether RBAC resources should be created | `true` |
| `serviceAccount.create` | Specifies whether a service account should be created | `true` |
| `serviceAccount.annotations` | Annotations to add to the service account | `{}` |
| `serviceAccount.name` | The name of the service account to use | `""` |

### CRD Parameters

| Name | Description | Value |
|------|-------------|-------|
| `crds.install` | Specifies whether CRDs should be installed | `true` |
| `crds.keep` | Keep CRDs on chart uninstall | `true` |
| `crds.annotations` | Annotations to add to CRDs | `{"helm.sh/resource-policy": "keep"}` |

### Additional Parameters

| Name | Description | Value |
|------|-------------|-------|
| `imagePullSecrets` | Image pull secrets for private registries | `[]` |
| `extraEnv` | Additional environment variables | `[]` |
| `extraVolumes` | Additional volumes | `[]` |
| `extraVolumeMounts` | Additional volume mounts | `[]` |
| `extraInitContainers` | Additional init containers | `[]` |
| `extraContainers` | Additional sidecar containers | `[]` |

## Configuration Examples

### Custom Resource Limits

```yaml
controller:
  resources:
    limits:
      cpu: 1000m
      memory: 512Mi
    requests:
      cpu: 100m
      memory: 128Mi
```

### Custom Kubernetes Client Rate Limiter

Configure the QPS and Burst for the Kubernetes client to handle high-throughput scenarios:

```yaml
controller:
  kubeClient:
    qps: 100
    burst: 250
```

> Note: Default values are QPS=100, Burst=200.

### Use Private Registry

```yaml
controller:
  image:
    repository: myregistry.example.com/opensandbox-controller
    tag: v0.1.0

imagePullSecrets:
  - name: myregistrykey
```

### Node Affinity

```yaml
controller:
  affinity:
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
        - matchExpressions:
          - key: node-role.kubernetes.io/control-plane
            operator: Exists
```

## Usage Examples

After installation, you can create resources:

### Create a Resource Pool

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  name: example-pool
spec:
  template:
    spec:
      containers:
      - name: sandbox-container
        image: nginx:latest
        ports:
        - containerPort: 80
  capacitySpec:
    bufferMax: 10
    bufferMin: 2
    poolMax: 20
    poolMin: 5
```

### Create a Batch Sandbox

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: example-batch-sandbox
spec:
  replicas: 3
  poolRef: example-pool
```

## Upgrading

To upgrade the chart:

```bash
helm upgrade opensandbox-controller ./opensandbox-controller \
  --namespace opensandbox-system \
  -f custom-values.yaml
```

## Troubleshooting

### Check controller logs

```bash
kubectl logs -n opensandbox-system -l control-plane=controller-manager -f
```

### Check CRD installation

```bash
kubectl get crd | grep opensandbox
```

### Verify RBAC permissions

```bash
kubectl auth can-i --as=system:serviceaccount:opensandbox-system:opensandbox-controller-controller-manager create pods
```

## Additional Resources

- [OpenSandbox GitHub](https://github.com/alibaba/OpenSandbox)
- [Documentation](https://github.com/alibaba/OpenSandbox/blob/main/kubernetes/README.md)
- [Examples](https://github.com/alibaba/OpenSandbox/tree/main/kubernetes/config/samples)

## License

Apache 2.0 License


================================================
FILE: kubernetes/charts/opensandbox-controller/templates/NOTES.txt
================================================
Thank you for installing {{ .Chart.Name }}!

Your release is named {{ .Release.Name }}.

To learn more about the release, try:

  $ helm status {{ .Release.Name }} -n {{ include "opensandbox.namespace" . }}
  $ helm get all {{ .Release.Name }} -n {{ include "opensandbox.namespace" . }}

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

🎉 OpenSandbox Controller has been successfully installed!

📋 Verify the installation:

  kubectl --namespace {{ include "opensandbox.namespace" . }} get pods -l "app.kubernetes.io/name={{ include "opensandbox.name" . }}"

📚 Check the installed CRDs:

  kubectl get crd batchsandboxes.sandbox.opensandbox.io
  kubectl get crd pools.sandbox.opensandbox.io

🚀 Create your first resources:

  # Create a resource pool
  cat <<EOF | kubectl apply -f -
  apiVersion: sandbox.opensandbox.io/v1alpha1
  kind: Pool
  metadata:
    name: example-pool
    namespace: {{ include "opensandbox.namespace" . }}
  spec:
    template:
      spec:
        containers:
        - name: sandbox-container
          image: nginx:latest
          ports:
          - containerPort: 80
    capacitySpec:
      bufferMax: 10
      bufferMin: 2
      poolMax: 20
      poolMin: 5
  EOF

  # Create a batch sandbox
  cat <<EOF | kubectl apply -f -
  apiVersion: sandbox.opensandbox.io/v1alpha1
  kind: BatchSandbox
  metadata:
    name: example-batch-sandbox
    namespace: {{ include "opensandbox.namespace" . }}
  spec:
    replicas: 3
    poolRef: example-pool
  EOF

📊 Monitor resources:

  # View pool status
  kubectl get pools -n {{ include "opensandbox.namespace" . }}

  # View batch sandbox status
  kubectl get batchsandboxes -n {{ include "opensandbox.namespace" . }}

  # Get detailed information
  kubectl describe pool example-pool -n {{ include "opensandbox.namespace" . }}
  kubectl describe batchsandbox example-batch-sandbox -n {{ include "opensandbox.namespace" . }}

📖 Documentation:

  GitHub: https://github.com/alibaba/OpenSandbox
  Docs:   https://github.com/alibaba/OpenSandbox/blob/main/kubernetes/README.md

💡 Examples:

  Check out example configurations in the repository:
  https://github.com/alibaba/OpenSandbox/tree/main/kubernetes/config/samples

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

⚠️  Note: This is an operator that manages sandbox resources. The controller
    itself doesn't run sandboxes - it manages Pool and BatchSandbox resources.

{{- if not .Values.rbac.create }}

⚠️  WARNING: RBAC is disabled. Make sure the ServiceAccount has proper permissions.

{{- end }}

{{- if not .Values.crds.install }}

⚠️  WARNING: CRD installation is disabled. Make sure CRDs are installed manually.

{{- end }}


================================================
FILE: kubernetes/charts/opensandbox-controller/templates/_helpers.tpl
================================================
{{/*
Expand the name of the chart.
*/}}
{{- define "opensandbox.name" -}}
{{- default "opensandbox" .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "opensandbox.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "opensandbox.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "opensandbox.labels" -}}
helm.sh/chart: {{ include "opensandbox.chart" . }}
{{ include "opensandbox.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "opensandbox.selectorLabels" -}}
app.kubernetes.io/name: {{ include "opensandbox.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
control-plane: controller-manager
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "opensandbox.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default "opensandbox-controller-manager" .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

{{/*
Get the namespace to use
*/}}
{{- define "opensandbox.namespace" -}}
{{- if .Values.namespaceOverride }}
{{- .Values.namespaceOverride }}
{{- else }}
{{- print "opensandbox-system" }}
{{- end }}
{{- end }}

{{/*
Controller image with automatic version prefix handling.
Prepends 'v' to semantic version tags (e.g., 0.0.1 -> v0.0.1) but preserves
special tags like 'latest', 'dev', 'main', etc. as-is.
*/}}
{{- define "opensandbox.controllerImage" -}}
{{- $tag := .Values.controller.image.tag | default .Chart.AppVersion }}
{{- $finalTag := $tag }}
{{- if and (not (hasPrefix "v" $tag)) (regexMatch "^[0-9]+\\.[0-9]+\\.[0-9]+" $tag) }}
{{- $finalTag = printf "v%s" $tag }}
{{- end }}
{{- printf "%s:%s" .Values.controller.image.repository $finalTag }}
{{- end }}

{{/*
Create the name for the leader election role
*/}}
{{- define "opensandbox.leaderElectionRoleName" -}}
{{- print "opensandbox-leader-election-role" }}
{{- end }}

{{/*
Create the name for the manager role
*/}}
{{- define "opensandbox.managerRoleName" -}}
{{- print "opensandbox-manager-role" }}
{{- end }}

{{/*
Return the appropriate apiVersion for RBAC APIs
*/}}
{{- define "opensandbox.rbac.apiVersion" -}}
{{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1" }}
{{- print "rbac.authorization.k8s.io/v1" }}
{{- else }}
{{- print "rbac.authorization.k8s.io/v1beta1" }}
{{- end }}
{{- end }}

{{/*
Return image pull policy
*/}}
{{- define "opensandbox.imagePullPolicy" -}}
{{- .Values.controller.image.pullPolicy | default "IfNotPresent" }}
{{- end }}


================================================
FILE: kubernetes/charts/opensandbox-controller/templates/clusterrole.yaml
================================================
{{- if .Values.rbac.create -}}
---
# Leader election role
apiVersion: {{ include "opensandbox.rbac.apiVersion" . }}
kind: Role
metadata:
  name: {{ include "opensandbox.leaderElectionRoleName" . }}
  namespace: {{ include "opensandbox.namespace" . }}
  labels:
    {{- include "opensandbox.labels" . | nindent 4 }}
    app.kubernetes.io/component: rbac
rules:
- apiGroups:
  - ""
  resources:
  - configmaps
  verbs:
  - get
  - list
  - watch
  - create
  - update
  - patch
  - delete
- apiGroups:
  - coordination.k8s.io
  resources:
  - leases
  verbs:
  - get
  - list
  - watch
  - create
  - update
  - patch
  - delete
- apiGroups:
  - ""
  resources:
  - events
  verbs:
  - create
  - patch

---
# Manager ClusterRole
apiVersion: {{ include "opensandbox.rbac.apiVersion" . }}
kind: ClusterRole
metadata:
  name: {{ include "opensandbox.managerRoleName" . }}
  labels:
    {{- include "opensandbox.labels" . | nindent 4 }}
    app.kubernetes.io/component: rbac
rules:
- apiGroups:
  - ""
  resources:
  - events
  - pods
  verbs:
  - create
  - delete
  - get
  - list
  - patch
  - update
  - watch
- apiGroups:
  - ""
  resources:
  - pods/status
  verbs:
  - get
  - patch
  - update
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - batchsandboxes
  - pools
  verbs:
  - create
  - delete
  - get
  - list
  - patch
  - update
  - watch
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - batchsandboxes/finalizers
  - pools/finalizers
  verbs:
  - update
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - batchsandboxes/status
  - pools/status
  verbs:
  - get
  - patch
  - update

{{- end }}


================================================
FILE: kubernetes/charts/opensandbox-controller/templates/clusterrolebinding.yaml
================================================
{{- if .Values.rbac.create -}}
---
# Leader election role binding
apiVersion: {{ include "opensandbox.rbac.apiVersion" . }}
kind: RoleBinding
metadata:
  name: {{ include "opensandbox.leaderElectionRoleName" . }}
  namespace: {{ include "opensandbox.namespace" . }}
  labels:
    {{- include "opensandbox.labels" . | nindent 4 }}
    app.kubernetes.io/component: rbac
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: {{ include "opensandbox.leaderElectionRoleName" . }}
subjects:
- kind: ServiceAccount
  name: {{ include "opensandbox.serviceAccountName" . }}
  namespace: {{ include "opensandbox.namespace" . }}

---
# Manager role binding
apiVersion: {{ include "opensandbox.rbac.apiVersion" . }}
kind: ClusterRoleBinding
metadata:
  name: {{ include "opensandbox.managerRoleName" . }}
  labels:
    {{- include "opensandbox.labels" . | nindent 4 }}
    app.kubernetes.io/component: rbac
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: {{ include "opensandbox.managerRoleName" . }}
subjects:
- kind: ServiceAccount
  name: {{ include "opensandbox.serviceAccountName" . }}
  namespace: {{ include "opensandbox.namespace" . }}

{{- end }}


================================================
FILE: kubernetes/charts/opensandbox-controller/templates/crds/batchsandboxes.yaml
================================================
{{- if .Values.crds.install -}}
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
  annotations:
    controller-gen.kubebuilder.io/version: v0.18.0
    {{- if .Values.crds.keep }}
    helm.sh/resource-policy: keep
    {{- end }}
    {{- with .Values.crds.annotations }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
  name: batchsandboxes.sandbox.opensandbox.io
  labels:
    {{- include "opensandbox.labels" . | nindent 4 }}
spec:
  group: sandbox.opensandbox.io
  names:
    kind: BatchSandbox
    listKind: BatchSandboxList
    plural: batchsandboxes
    shortNames:
    - bsbx
    singular: batchsandbox
  scope: Namespaced
  versions:
  - additionalPrinterColumns:
    - description: The desired number of pods.
      jsonPath: .spec.replicas
      name: DESIRED
      type: integer
    - description: The number of currently all pods.
      jsonPath: .status.replicas
      name: TOTAL
      type: integer
    - description: The number of currently all allocated pods.
      jsonPath: .status.allocated
      name: ALLOCATED
      type: integer
    - description: The number of currently all ready pods.
      jsonPath: .status.ready
      name: Ready
      type: integer
    - description: The number of currently all running tasks.
      jsonPath: .status.taskRunning
      name: TASK_RUNNING
      priority: 1
      type: integer
    - description: The number of currently all succeed tasks.
      jsonPath: .status.taskSucceed
      name: TASK_SUCCEED
      priority: 1
      type: integer
    - description: The number of currently all failed tasks.
      jsonPath: .status.taskFailed
      name: TASK_FAILED
      priority: 1
      type: integer
    - description: The number of currently all unknown tasks.
      jsonPath: .status.taskUnknown
      name: TASK_UNKNOWN
      priority: 1
      type: integer
    - description: sandbox expire time
      jsonPath: .spec.expireTime
      name: EXPIRE
      type: string
    - description: CreationTimestamp is a timestamp representing the server time when
        this object was created. It is not guaranteed to be set in happens-before
        order across separate operations. Clients may not set this value. It is represented
        in RFC3339 form and is in UTC.
      jsonPath: .metadata.creationTimestamp
      name: AGE
      type: date
    name: v1alpha1
    schema:
      openAPIV3Schema:
        description: BatchSandbox is the Schema for the batchsandboxes API.
        properties:
          apiVersion:
            description: |-
              APIVersion defines the versioned schema of this representation of an object.
              Servers should convert recognized schemas to the latest internal value, and
              may reject unrecognized values.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
            type: string
          kind:
            description: |-
              Kind is a string value representing the REST resource this object represents.
              Servers may infer this from the endpoint the client submits requests to.
              Cannot be updated.
              In CamelCase.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
            type: string
          metadata:
            type: object
          spec:
            description: BatchSandboxSpec defines the desired state of BatchSandbox.
            properties:
              expireTime:
                description: |-
                  ExpireTime - Absolute time when the batch-sandbox is deleted.
                  If a time in the past is provided, the batch-sandbox will be deleted immediately.
                format: date-time
                type: string
              poolRef:
                description: |-
                  PoolRef references the Pool resource name for pooled sandbox creation.
                  Mutually exclusive with Template - use PoolRef for pool-based allocation or Template for direct sandbox creation.
                type: string
              replicas:
                default: 1
                description: Replicas is the number of desired replicas.
                format: int32
                minimum: 0
                type: integer
              shardPatches:
                description: ShardPatches indicates patching to the Template for BatchSandbox.
                x-kubernetes-preserve-unknown-fields: true
              shardTaskPatches:
                description: ShardTaskPatches indicates patching to the TaskTemplate
                  for individual Task.
                x-kubernetes-preserve-unknown-fields: true
              taskResourcePolicyWhenCompleted:
                default: Retain
                description: |-
                  TaskResourcePolicyWhenCompleted specifies how resources should be handled once a task reaches a completed state (SUCCEEDED or FAILED).
                  - Retain: Keep the resources until the BatchSandbox is deleted.
                  - Release: Free the resources immediately when the task completes.
                type: string
              taskTemplate:
                description: |-
                  Task is a custom task spec that is automatically dispatched after the sandbox is successfully created.
                  The Sandbox is responsible for managing the lifecycle of the task.
                x-kubernetes-preserve-unknown-fields: true
              template:
                description: Template describes the pods that will be created.
                x-kubernetes-preserve-unknown-fields: true
            required:
            - replicas
            type: object
          status:
            description: BatchSandboxStatus defines the observed state of BatchSandbox.
            properties:
              allocated:
                description: "\tAllocated is the number of actual scheduled Pod"
                format: int32
                type: integer
              observedGeneration:
                description: |-
                  ObservedGeneration is the most recent generation observed for this BatchSandbox. It corresponds to the
                  BatchSandbox's generation, which is updated on mutation by the API Server.
                format: int64
                type: integer
              ready:
                description: "\tReady is the number of actual Ready Pod"
                format: int32
                type: integer
              replicas:
                description: Replicas is the number of actual Pods
                format: int32
                type: integer
              taskFailed:
                description: TaskFailed is the number of Failed task
                format: int32
                type: integer
              taskPending:
                description: TaskPending is the number of Pending task which is unassigned
                format: int32
                type: integer
              taskRunning:
                description: TaskRunning is the number of Running task
                format: int32
                type: integer
              taskSucceed:
                description: TaskSucceed is the number of Succeed task
                format: int32
                type: integer
              taskUnknown:
                description: TaskUnknown is the number of Unknown task
                format: int32
                type: integer
            required:
            - allocated
            - ready
            - replicas
            - taskFailed
            - taskPending
            - taskRunning
            - taskSucceed
            - taskUnknown
            type: object
        type: object
    served: true
    storage: true
    subresources:
      status: {}
{{- end }}


================================================
FILE: kubernetes/charts/opensandbox-controller/templates/crds/pools.yaml
================================================
{{- if .Values.crds.install -}}
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
  annotations:
    controller-gen.kubebuilder.io/version: v0.18.0
    {{- if .Values.crds.keep }}
    helm.sh/resource-policy: keep
    {{- end }}
    {{- with .Values.crds.annotations }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
  name: pools.sandbox.opensandbox.io
  labels:
    {{- include "opensandbox.labels" . | nindent 4 }}
spec:
  group: sandbox.opensandbox.io
  names:
    kind: Pool
    listKind: PoolList
    plural: pools
    singular: pool
  scope: Namespaced
  versions:
  - additionalPrinterColumns:
    - description: The number of all nodes in pool.
      jsonPath: .status.total
      name: TOTAL
      type: integer
    - description: The number of allocated nodes in pool.
      jsonPath: .status.allocated
      name: ALLOCATED
      type: integer
    - description: The number of available nodes in pool.
      jsonPath: .status.available
      name: AVAILABLE
      type: integer
    name: v1alpha1
    schema:
      openAPIV3Schema:
        description: Pool is the Schema for the pools API.
        properties:
          apiVersion:
            description: |-
              APIVersion defines the versioned schema of this representation of an object.
              Servers should convert recognized schemas to the latest internal value, and
              may reject unrecognized values.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
            type: string
          kind:
            description: |-
              Kind is a string value representing the REST resource this object represents.
              Servers may infer this from the endpoint the client submits requests to.
              Cannot be updated.
              In CamelCase.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
            type: string
          metadata:
            type: object
          spec:
            description: PoolSpec defines the desired state of Pool.
            properties:
              capacitySpec:
                description: CapacitySpec controls the size of the resource pool.
                properties:
                  bufferMax:
                    description: BufferMax is the maximum number of nodes kept in
                      the warm buffer.
                    format: int32
                    minimum: 0
                    type: integer
                  bufferMin:
                    description: BufferMin is the minimum number of nodes that must
                      remain in the buffer.
                    format: int32
                    minimum: 0
                    type: integer
                  poolMax:
                    description: PoolMax is the maximum total number of nodes allowed
                      in the entire pool.
                    format: int32
                    minimum: 0
                    type: integer
                  poolMin:
                    description: PoolMin is the minimum total size of the pool.
                    format: int32
                    minimum: 0
                    type: integer
                required:
                - bufferMax
                - bufferMin
                - poolMax
                - poolMin
                type: object
              template:
                description: Pod Template used to create pre-warmed nodes in the pool.
                x-kubernetes-preserve-unknown-fields: true
            required:
            - capacitySpec
            type: object
          status:
            description: PoolStatus defines the observed state of Pool.
            properties:
              allocated:
                description: Allocated is the number of nodes currently allocated
                  to sandboxes.
                format: int32
                type: integer
              available:
                description: Available is the number of nodes currently available
                  in the pool.
                format: int32
                type: integer
              observedGeneration:
                description: |-
                  ObservedGeneration is the most recent generation observed for this BatchSandbox. It corresponds to the
                  BatchSandbox's generation, which is updated on mutation by the API Server.
                format: int64
                type: integer
              revision:
                description: Revision is the latest version of pool
                type: string
              total:
                description: Total is the total number of nodes in the pool.
                format: int32
                type: integer
            required:
            - allocated
            - available
            - revision
            - total
            type: object
        type: object
    served: true
    storage: true
    subresources:
      status: {}
{{- end }}


================================================
FILE: kubernetes/charts/opensandbox-controller/templates/deployment.yaml
================================================
apiVersion: apps/v1
kind: Deployment
metadata:
  name: opensandbox-controller-manager
  namespace: {{ include "opensandbox.namespace" . }}
  labels:
    {{- include "opensandbox.labels" . | nindent 4 }}
    app.kubernetes.io/component: controller-manager
spec:
  replicas: {{ .Values.controller.replicaCount }}
  selector:
    matchLabels:
      {{- include "opensandbox.selectorLabels" . | nindent 6 }}
  template:
    metadata:
      annotations:
        kubectl.kubernetes.io/default-container: manager
        {{- with .Values.controller.podAnnotations }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
      labels:
        {{- include "opensandbox.selectorLabels" . | nindent 8 }}
        {{- with .Values.controller.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "opensandbox.serviceAccountName" . }}
      {{- with .Values.controller.podSecurityContext }}
      securityContext:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.controller.priorityClassName }}
      priorityClassName: {{ . }}
      {{- end }}
      {{- with .Values.extraInitContainers }}
      initContainers:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
      - name: manager
        image: {{ include "opensandbox.controllerImage" . }}
        imagePullPolicy: {{ include "opensandbox.imagePullPolicy" . }}
        command:
        - /workspace/server
        args:
        {{- if .Values.controller.leaderElection.enabled }}
        - --leader-elect
        {{- end }}
        - --health-probe-bind-address=:8081
        - --zap-log-level={{ .Values.controller.logLevel }}
        {{- if and .Values.controller.kubeClient (gt .Values.controller.kubeClient.qps 0) }}
        - --kube-client-qps={{ .Values.controller.kubeClient.qps }}
        {{- end }}
        {{- if and .Values.controller.kubeClient (gt .Values.controller.kubeClient.burst 0) }}
        - --kube-client-burst={{ .Values.controller.kubeClient.burst }}
        {{- end }}
        ports:
        - name: health
          containerPort: 8081
          protocol: TCP
        {{- with .Values.controller.containerSecurityContext }}
        securityContext:
          {{- toYaml . | nindent 10 }}
        {{- end }}
        {{- if .Values.controller.livenessProbe.enabled }}
        livenessProbe:
          httpGet:
            path: {{ .Values.controller.livenessProbe.httpGet.path }}
            port: {{ .Values.controller.livenessProbe.httpGet.port }}
          initialDelaySeconds: {{ .Values.controller.livenessProbe.initialDelaySeconds }}
          periodSeconds: {{ .Values.controller.livenessProbe.periodSeconds }}
          timeoutSeconds: {{ .Values.controller.livenessProbe.timeoutSeconds }}
          successThreshold: {{ .Values.controller.livenessProbe.successThreshold }}
          failureThreshold: {{ .Values.controller.livenessProbe.failureThreshold }}
        {{- end }}
        {{- if .Values.controller.readinessProbe.enabled }}
        readinessProbe:
          httpGet:
            path: {{ .Values.controller.readinessProbe.httpGet.path }}
            port: {{ .Values.controller.readinessProbe.httpGet.port }}
          initialDelaySeconds: {{ .Values.controller.readinessProbe.initialDelaySeconds }}
          periodSeconds: {{ .Values.controller.readinessProbe.periodSeconds }}
          timeoutSeconds: {{ .Values.controller.readinessProbe.timeoutSeconds }}
          successThreshold: {{ .Values.controller.readinessProbe.successThreshold }}
          failureThreshold: {{ .Values.controller.readinessProbe.failureThreshold }}
        {{- end }}
        resources:
          {{- toYaml .Values.controller.resources | nindent 10 }}
        {{- if .Values.extraEnv }}
        env:
        {{- toYaml .Values.extraEnv | nindent 8 }}
        {{- end }}
        volumeMounts:
        {{- with .Values.extraVolumeMounts }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
      {{- with .Values.extraContainers }}
      {{- toYaml . | nindent 6 }}
      {{- end }}
      volumes:
      {{- with .Values.extraVolumes }}
      {{- toYaml . | nindent 6 }}
      {{- end }}
      {{- with .Values.controller.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.controller.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.controller.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      terminationGracePeriodSeconds: 10


================================================
FILE: kubernetes/charts/opensandbox-controller/templates/serviceaccount.yaml
================================================
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
  name: {{ include "opensandbox.serviceAccountName" . }}
  namespace: {{ include "opensandbox.namespace" . }}
  labels:
    {{- include "opensandbox.labels" . | nindent 4 }}
    app.kubernetes.io/component: serviceaccount
  {{- with .Values.serviceAccount.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
{{- if .Values.imagePullSecrets }}
imagePullSecrets:
  {{- toYaml .Values.imagePullSecrets | nindent 2 }}
{{- end }}
{{- end }}


================================================
FILE: kubernetes/charts/opensandbox-controller/values.yaml
================================================
# Default values for opensandbox-controller.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

# -- Override the name of the chart
nameOverride: ""
# -- Override the full name of the chart
fullnameOverride: ""

# -- Override the namespace where resources will be created
# If not set, defaults to "opensandbox-system"
namespaceOverride: ""

# Controller configuration
controller:
  # -- Controller image configuration
  image:
    # -- Controller image repository
    repository: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/controller
    # -- Image pull policy
    pullPolicy: IfNotPresent
    # -- Overrides the image tag whose default is the chart appVersion
    tag: ""
  
  # -- Number of controller replicas
  replicaCount: 1
  
  # -- Resource requests and limits for the controller
  resources:
    limits:
      cpu: 500m
      memory: 128Mi
    requests:
      cpu: 10m
      memory: 64Mi
  
  # -- Log level for zap logger (debug, info, error)
  logLevel: info

  # -- Kubernetes client rate limiter configuration
  kubeClient:
    # -- QPS for Kubernetes client rate limiter.
    qps: 100
    # -- Burst for Kubernetes client rate limiter.
    burst: 200

  # -- Enable leader election for controller manager
  leaderElection:
    enabled: true
  
  # -- Liveness probe configuration
  livenessProbe:
    enabled: true
    httpGet:
      path: /healthz
      port: 8081
    initialDelaySeconds: 15
    periodSeconds: 20
    timeoutSeconds: 1
    successThreshold: 1
    failureThreshold: 3
  
  # -- Readiness probe configuration
  readinessProbe:
    enabled: true
    httpGet:
      path: /readyz
      port: 8081
    initialDelaySeconds: 5
    periodSeconds: 10
    timeoutSeconds: 1
    successThreshold: 1
    failureThreshold: 3
  
  # -- Node labels for controller pod assignment
  nodeSelector: {}
  
  # -- Tolerations for controller pod assignment
  tolerations: []
  
  # -- Affinity for controller pod assignment
  affinity: {}
  
  # -- Pod security context
  podSecurityContext:
    runAsNonRoot: true
    seccompProfile:
      type: RuntimeDefault
  
  # -- Container security context
  containerSecurityContext:
    allowPrivilegeEscalation: false
    capabilities:
      drop:
        - "ALL"
    readOnlyRootFilesystem: false
  
  # -- Additional labels for controller pods
  podLabels: {}
  
  # -- Additional annotations for controller pods
  podAnnotations: {}
  
  # -- Priority class name for controller pods
  priorityClassName: ""

# -- Image pull secrets for private registries
imagePullSecrets: []
# - name: myregistrykey

# ServiceAccount configuration
serviceAccount:
  # -- Specifies whether a service account should be created
  create: true
  # -- Annotations to add to the service account
  annotations: {}
  # -- The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""

# RBAC configuration
rbac:
  # -- Specifies whether RBAC resources should be created
  create: true

# CRD configuration
crds:
  # -- Specifies whether CRDs should be installed
  install: true
  # -- Keep CRDs on chart uninstall (adds helm.sh/resource-policy: keep annotation)
  keep: true
  # -- Additional annotations to add to CRDs (will be merged with resource-policy if keep is true)
  annotations: {}

# Network Policy configuration
networkPolicy:
  # -- Enable network policy
  enabled: false
  # -- Ingress rules for network policy
  ingress: []
  # -- Egress rules for network policy
  egress: []

# -- Additional environment variables for the controller
extraEnv: []
# - name: CUSTOM_VAR
#   value: "custom-value"

# -- Additional volumes for the controller
extraVolumes: []
# - name: custom-volume
#   emptyDir: {}

# -- Additional volume mounts for the controller
extraVolumeMounts: []
# - name: custom-volume
#   mountPath: /custom-path

# -- Additional init containers
extraInitContainers: []

# -- Additional sidecar containers
extraContainers: []

# Example values for different environments
# You can create separate values files for different environments:
# - values-dev.yaml
# - values-staging.yaml
# - values-prod.yaml


================================================
FILE: kubernetes/charts/opensandbox-server/.helmignore
================================================
# Patterns to ignore when building packages.
.DS_Store
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
*.swp
*.bak
*.tmp
*.orig
*~
.project
.idea/
*.tmproj
.vscode/
OWNERS
Makefile


================================================
FILE: kubernetes/charts/opensandbox-server/Chart.yaml
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: v2
name: opensandbox-server
description: OpenSandbox Lifecycle API server for sandbox creation and management
type: application
version: 0.1.0
appVersion: "0.1.0"

keywords:
  - sandbox
  - kubernetes
  - api
  - lifecycle
  - batchsandbox
  - ingress
  - gateway

home: https://github.com/alibaba/OpenSandbox
sources:
  - https://github.com/alibaba/OpenSandbox/tree/main/server
  - https://github.com/alibaba/OpenSandbox/tree/main/components/ingress

maintainers:
  - name: OpenSandbox Team
    email: opensandbox@example.com

# Kubernetes version constraints
kubeVersion: ">=1.21.1-0"

annotations:
  artifacthub.io/category: integration-delivery
  artifacthub.io/license: Apache-2.0
  artifacthub.io/prerelease: "false"


================================================
FILE: kubernetes/charts/opensandbox-server/README.md
================================================
# opensandbox-server Helm Chart

OpenSandbox Lifecycle API server: provides sandbox create/delete and other lifecycle APIs, typically used with BatchSandbox/Pool on Kubernetes.

## Prerequisites

- Kubernetes 1.21.1+
- Helm 3.0+
- OpenSandbox CRDs installed (deploy opensandbox-controller first)

## Install

```bash
# Server only (default namespace opensandbox-system)
helm install opensandbox-server ./kubernetes/charts/opensandbox-server \
  --namespace opensandbox-system \
  --create-namespace

# With custom image and config
helm install opensandbox-server ./kubernetes/charts/opensandbox-server \
  --set server.image.repository=your-registry/opensandbox/server \
  --set server.image.tag=v0.1.0 \
  --namespace opensandbox-system \
  --create-namespace
```

### Deploy server and ingress-gateway together

To run both the Lifecycle API server and the ingress gateway (components/ingress) in one release, set `server.gateway.enabled=true`. The chart will deploy the server and the gateway (Deployment, Service, RBAC), and write server config `[ingress] mode = "gateway"` so the server returns the correct gateway address to clients.

```bash
helm install opensandbox-server ./kubernetes/charts/opensandbox-server \
  --namespace opensandbox-system \
  --create-namespace \
  --set server.gateway.enabled=true \
  --set server.gateway.host=gateway.example.com
```

Optional: override gateway image, replicas, or resources (see `server.gateway.*` in Configuration).

## Configuration

| Parameter | Description | Default |
|-----------|-------------|---------|
| `server.image.repository` | Server image repository | `sandbox-registry.../opensandbox/server` |
| `server.image.tag` | Server image tag | Chart `appVersion` |
| `server.replicaCount` | Server replicas | `2` |
| `server.resources` | CPU/memory requests and limits | See values.yaml |
| `namespaceOverride` | Deployment namespace | `opensandbox-system` |
| `configToml` | config.toml content ([ingress] block generated from server.gateway) | See values.yaml |
| `server.gateway.enabled` | When true: set server config to gateway and deploy components/ingress gateway | `false` |
| `server.gateway.host` | config `gateway.address` (address returned to clients) | `opensandbox.example.com` |
| `server.gateway.gatewayRouteMode` | server config and gateway route mode (header/uri) | `header` |
| `server.gateway.*` | Gateway image, replicas, port, dataplaneNamespace, providerType, resources | See values.yaml |

**Gateway**: When `server.gateway.enabled=true`, the chart writes `[ingress] mode = "gateway"` in config.toml and deploys **components/ingress** Deployment/Service/RBAC; gateway `--mode` matches config. External access must be configured separately.

Set `[kubernetes].namespace` in config for the sandbox workload namespace. Override `api_key` via Secret or values in production.

## Upgrade and uninstall

```bash
helm upgrade opensandbox-server ./kubernetes/charts/opensandbox-server -n opensandbox-system
helm uninstall opensandbox-server -n opensandbox-system
```

## References

- [OpenSandbox](https://github.com/alibaba/OpenSandbox)
- [Helm deployment docs](../../docs/HELM-DEPLOYMENT.md)


================================================
FILE: kubernetes/charts/opensandbox-server/templates/NOTES.txt
================================================
Thank you for installing {{ .Chart.Name }}!

Your release is named {{ .Release.Name }}.

To learn more about the release, try:

  $ helm status {{ .Release.Name }} -n {{ include "opensandbox-server.namespace" . }}
  $ helm get all {{ .Release.Name }} -n {{ include "opensandbox-server.namespace" . }}

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

OpenSandbox Lifecycle API server has been installed.

Verify the installation:

  kubectl --namespace {{ include "opensandbox-server.namespace" . }} get pods -l "app.kubernetes.io/name={{ include "opensandbox-server.name" . }}"
  kubectl --namespace {{ include "opensandbox-server.namespace" . }} get svc -l "app.kubernetes.io/name={{ include "opensandbox-server.name" . }}"

The server exposes the Lifecycle API (create/delete sandboxes, etc.).
{{- if .Values.server.gateway.enabled }}

Server config [ingress]: mode=gateway, gateway.address={{ .Values.server.gateway.host }}, gateway.route.mode={{ .Values.server.gateway.gatewayRouteMode }}. Ingress gateway (components/ingress) is deployed in this release.
{{- else }}

Port-forward to access locally:

  kubectl port-forward -n {{ include "opensandbox-server.namespace" . }} svc/{{ include "opensandbox-server.fullname" . }} 8080:80

Then use the API at http://localhost:8080 (set api_key in config if required).
{{- end }}

Documentation: https://github.com/alibaba/OpenSandbox


================================================
FILE: kubernetes/charts/opensandbox-server/templates/_helpers.tpl
================================================
{{/*
Expand the name of the chart.
*/}}
{{- define "opensandbox-server.name" -}}
{{- default "opensandbox-server" .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
*/}}
{{- define "opensandbox-server.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Chart name and version for labels.
*/}}
{{- define "opensandbox-server.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "opensandbox-server.labels" -}}
helm.sh/chart: {{ include "opensandbox-server.chart" . }}
{{ include "opensandbox-server.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/component: opensandbox-server
app.kubernetes.io/part-of: opensandbox
{{- end }}

{{/*
Selector labels
*/}}
{{- define "opensandbox-server.selectorLabels" -}}
app.kubernetes.io/name: {{ include "opensandbox-server.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Namespace to use
*/}}
{{- define "opensandbox-server.namespace" -}}
{{- if .Values.namespaceOverride }}
{{- .Values.namespaceOverride }}
{{- else }}
{{- print "opensandbox-system" }}
{{- end }}
{{- end }}

{{/*
ServiceAccount name (same as fullname, always created by chart)
*/}}
{{- define "opensandbox-server.serviceAccountName" -}}
{{- include "opensandbox-server.fullname" . }}
{{- end }}

{{/*
Server image with tag (prepend v to semver if missing)
*/}}
{{- define "opensandbox-server.serverImage" -}}
{{- $tag := .Values.server.image.tag | default .Chart.AppVersion }}
{{- $finalTag := $tag }}
{{- if and (not (hasPrefix "v" $tag)) (regexMatch "^[0-9]+\\.[0-9]+\\.[0-9]+" $tag) }}
{{- $finalTag = printf "v%s" $tag }}
{{- end }}
{{- printf "%s:%s" .Values.server.image.repository $finalTag }}
{{- end }}

{{/*
Image pull policy
*/}}
{{- define "opensandbox-server.imagePullPolicy" -}}
{{- .Values.server.image.pullPolicy | default "IfNotPresent" }}
{{- end }}

{{/*
RBAC apiVersion
*/}}
{{- define "opensandbox-server.rbac.apiVersion" -}}
{{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1" }}
{{- print "rbac.authorization.k8s.io/v1" }}
{{- else }}
{{- print "rbac.authorization.k8s.io/v1beta1" }}
{{- end }}
{{- end }}

{{/*
ClusterRole name for server
*/}}
{{- define "opensandbox-server.roleName" -}}
{{- include "opensandbox-server.fullname" . }}-role
{{- end }}

{{/*
Render [ingress] TOML block from server.gateway.
When server.gateway.enabled=true: mode=gateway + gateway.address + gateway.route.mode; otherwise mode=direct.
*/}}
{{- define "opensandbox-server.ingressConfigToml" -}}
[ingress]
mode = {{ .Values.server.gateway.enabled | ternary "gateway" "direct" | quote }}
{{- if .Values.server.gateway.enabled }}

gateway.address = {{ .Values.server.gateway.host | quote }}
gateway.route.mode = {{ .Values.server.gateway.gatewayRouteMode | quote }}
{{- end }}

{{- end }}

{{/*
Gateway fixed name (independent of server)
*/}}
{{- define "opensandbox-server.ingressGatewayFullname" -}}
opensandbox-ingress-gateway
{{- end }}

{{- define "opensandbox-server.ingressGatewaySelectorLabels" -}}
app.kubernetes.io/name: {{ include "opensandbox-server.ingressGatewayFullname" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{- define "opensandbox-server.ingressGatewayImage" -}}
{{- $tag := .Values.server.gateway.image.tag | default "v1.0.2" }}
{{- printf "%s:%s" .Values.server.gateway.image.repository $tag }}
{{- end }}


================================================
FILE: kubernetes/charts/opensandbox-server/templates/ingress-gateway.yaml
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
# Gateway (components/ingress): proxies sandbox traffic, aligned with server config [ingress].
# Includes ServiceAccount, Role, RoleBinding, Deployment, Service.
{{- if .Values.server.gateway.enabled }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: {{ include "opensandbox-server.ingressGatewayFullname" . }}
  namespace: {{ include "opensandbox-server.namespace" . }}
  labels:
    app.kubernetes.io/name: {{ include "opensandbox-server.ingressGatewayFullname" . }}
    app.kubernetes.io/part-of: opensandbox
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  name: {{ include "opensandbox-server.ingressGatewayFullname" . }}-reader
  namespace: {{ .Values.server.gateway.dataplaneNamespace }}
  labels:
    app.kubernetes.io/name: {{ include "opensandbox-server.ingressGatewayFullname" . }}
    app.kubernetes.io/part-of: opensandbox
rules:
  - apiGroups: [""]
    resources: ["pods", "pods/status", "services"]
    verbs: ["get", "list", "watch"]
  - apiGroups: ["sandbox.opensandbox.io"]
    resources: ["batchsandboxes", "batchsandboxes/status"]
    verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: {{ include "opensandbox-server.ingressGatewayFullname" . }}-reader
  namespace: {{ .Values.server.gateway.dataplaneNamespace }}
  labels:
    app.kubernetes.io/name: {{ include "opensandbox-server.ingressGatewayFullname" . }}
    app.kubernetes.io/part-of: opensandbox
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: {{ include "opensandbox-server.ingressGatewayFullname" . }}-reader
subjects:
  - kind: ServiceAccount
    name: {{ include "opensandbox-server.ingressGatewayFullname" . }}
    namespace: {{ include "opensandbox-server.namespace" . }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "opensandbox-server.ingressGatewayFullname" . }}
  namespace: {{ include "opensandbox-server.namespace" . }}
  labels:
    helm.sh/chart: {{ include "opensandbox-server.chart" . }}
    {{- include "opensandbox-server.ingressGatewaySelectorLabels" . | nindent 4 }}
    app.kubernetes.io/component: ingress-gateway
    app.kubernetes.io/part-of: opensandbox
spec:
  replicas: {{ .Values.server.gateway.replicaCount }}
  selector:
    matchLabels:
      {{- include "opensandbox-server.ingressGatewaySelectorLabels" . | nindent 6 }}
  template:
    metadata:
      labels:
        {{- include "opensandbox-server.ingressGatewaySelectorLabels" . | nindent 8 }}
        app.kubernetes.io/component: ingress-gateway
        app.kubernetes.io/part-of: opensandbox
    spec:
      serviceAccountName: {{ include "opensandbox-server.ingressGatewayFullname" . }}
      containers:
        - name: main
          image: {{ include "opensandbox-server.ingressGatewayImage" . }}
          imagePullPolicy: IfNotPresent
          args:
            - "--namespace={{ .Values.server.gateway.dataplaneNamespace }}"
            - "--port={{ .Values.server.gateway.port }}"
            - "--provider-type={{ .Values.server.gateway.providerType }}"
            - "--mode={{ .Values.server.gateway.gatewayRouteMode }}"
            - "--log-level={{ .Values.server.gateway.logLevel }}"
          ports:
            - name: http
              containerPort: {{ .Values.server.gateway.port }}
              protocol: TCP
          livenessProbe:
            httpGet:
              path: /status.ok
              port: http
            initialDelaySeconds: 10
            periodSeconds: 15
            timeoutSeconds: 5
          readinessProbe:
            httpGet:
              path: /status.ok
              port: http
            initialDelaySeconds: 5
            periodSeconds: 10
            timeoutSeconds: 3
          resources:
            {{- toYaml .Values.server.gateway.resources | nindent 12 }}
---
apiVersion: v1
kind: Service
metadata:
  name: {{ include "opensandbox-server.ingressGatewayFullname" . }}
  namespace: {{ include "opensandbox-server.namespace" . }}
  labels:
    helm.sh/chart: {{ include "opensandbox-server.chart" . }}
    {{- include "opensandbox-server.ingressGatewaySelectorLabels" . | nindent 4 }}
    app.kubernetes.io/component: ingress-gateway
    app.kubernetes.io/part-of: opensandbox
spec:
  type: ClusterIP
  ports:
    - port: 80
      targetPort: {{ .Values.server.gateway.port }}
      protocol: TCP
      name: http
  selector:
    {{- include "opensandbox-server.ingressGatewaySelectorLabels" . | nindent 4 }}
{{- end }}


================================================
FILE: kubernetes/charts/opensandbox-server/templates/server.yaml
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
# Server resources: ServiceAccount, ClusterRole, ClusterRoleBinding, ConfigMap, Deployment, Service.
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: {{ include "opensandbox-server.serviceAccountName" . }}
  namespace: {{ include "opensandbox-server.namespace" . }}
  labels:
    {{- include "opensandbox-server.labels" . | nindent 4 }}
---
apiVersion: {{ include "opensandbox-server.rbac.apiVersion" . }}
kind: ClusterRole
metadata:
  name: {{ include "opensandbox-server.roleName" . }}
  labels:
    {{- include "opensandbox-server.labels" . | nindent 4 }}
rules:
  - apiGroups: [""]
    resources: ["pods", "pods/status", "events", "services", "configmaps"]
    verbs: ["create", "delete", "get", "list", "patch", "update", "watch"]
  - apiGroups: [""]
    resources: ["secrets"]
    verbs: ["create", "delete", "get"]
  - apiGroups: ["node.k8s.io"]
    resources: ["runtimeclasses"]
    verbs: ["get", "list"]
  - apiGroups: ["sandbox.opensandbox.io"]
    resources: ["batchsandboxes", "batchsandboxes/status", "batchsandboxes/finalizers"]
    verbs: ["create", "delete", "get", "list", "patch", "update", "watch"]
  - apiGroups: ["sandbox.opensandbox.io"]
    resources: ["pools", "pools/status", "pools/finalizers"]
    verbs: ["create", "delete", "get", "list", "patch", "update", "watch"]
---
apiVersion: {{ include "opensandbox-server.rbac.apiVersion" . }}
kind: ClusterRoleBinding
metadata:
  name: {{ include "opensandbox-server.fullname" . }}-rolebinding
  labels:
    {{- include "opensandbox-server.labels" . | nindent 4 }}
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: {{ include "opensandbox-server.roleName" . }}
subjects:
  - kind: ServiceAccount
    name: {{ include "opensandbox-server.serviceAccountName" . }}
    namespace: {{ include "opensandbox-server.namespace" . }}
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: {{ include "opensandbox-server.fullname" . }}-config
  namespace: {{ include "opensandbox-server.namespace" . }}
  labels:
    {{- include "opensandbox-server.labels" . | nindent 4 }}
data:
  config.toml: |
{{ .Values.configToml | indent 4 }}
{{ include "opensandbox-server.ingressConfigToml" . | indent 4 }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "opensandbox-server.fullname" . }}
  namespace: {{ include "opensandbox-server.namespace" . }}
  labels:
    {{- include "opensandbox-server.labels" . | nindent 4 }}
spec:
  replicas: {{ .Values.server.replicaCount }}
  selector:
    matchLabels:
      {{- include "opensandbox-server.selectorLabels" . | nindent 6 }}
  template:
    metadata:
      labels:
        {{- include "opensandbox-server.selectorLabels" . | nindent 8 }}
        app.kubernetes.io/part-of: opensandbox
    spec:
      serviceAccountName: {{ include "opensandbox-server.serviceAccountName" . }}
      containers:
        - name: main
          image: {{ include "opensandbox-server.serverImage" . }}
          imagePullPolicy: {{ include "opensandbox-server.imagePullPolicy" . }}
          args:
            - "--config"
            - "/etc/opensandbox/config.toml"
          ports:
            - name: http
              containerPort: 80
              protocol: TCP
          env:
            - name: SANDBOX_CONFIG_PATH
              value: "/etc/opensandbox/config.toml"
          volumeMounts:
            - name: config
              mountPath: /etc/opensandbox/config.toml
              subPath: config.toml
              readOnly: true
          livenessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 10
            periodSeconds: 15
            timeoutSeconds: 5
          readinessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 5
            periodSeconds: 10
            timeoutSeconds: 3
          resources:
            {{- toYaml .Values.server.resources | nindent 12 }}
      volumes:
        - name: config
          configMap:
            name: {{ include "opensandbox-server.fullname" . }}-config
      {{- with .Values.server.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.server.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
---
apiVersion: v1
kind: Service
metadata:
  name: {{ include "opensandbox-server.fullname" . }}
  namespace: {{ include "opensandbox-server.namespace" . }}
  labels:
    {{- include "opensandbox-server.labels" . | nindent 4 }}
spec:
  type: ClusterIP
  ports:
    - port: 80
      targetPort: http
      protocol: TCP
      name: http
  selector:
    {{- include "opensandbox-server.selectorLabels" . | nindent 4 }}


================================================
FILE: kubernetes/charts/opensandbox-server/values.yaml
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
# Default values for opensandbox-server.

# -- Override the name of the chart
nameOverride: ""
# -- Resource names and app.kubernetes.io/name are fixed to this value, independent of release name
fullnameOverride: "opensandbox-server"

# -- Override the namespace (default: opensandbox-system)
namespaceOverride: ""

# Server configuration
server:
  # -- Server image configuration
  image:
    repository: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/server
    tag: "v0.1.7"

  # -- Number of server replicas
  replicaCount: 2

  # -- Resource requests and limits
  resources:
    limits:
      cpu: "2"
      memory: 8Gi
    requests:
      cpu: "1"
      memory: 4Gi

  tolerations: []
  affinity: {}

  # Gateway (components/ingress): when enabled, writes config [ingress] and deploys the gateway
  gateway:
    enabled: false
    host: opensandbox.example.com
    gatewayRouteMode: "header"
    image:
      repository: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/ingress
      tag: "v1.0.4"
    replicaCount: 2
    port: 28888
    dataplaneNamespace: "opensandbox"
    providerType: "batchsandbox"
    logLevel: "info"
    resources:
      limits:
        cpu: "2"
        memory: 8Gi
      requests:
        cpu: "1"
        memory: 4Gi

# -- Server config (TOML). Mounted at /etc/opensandbox/config.toml.
configToml: |
  [server]
  host = "0.0.0.0"
  port = 80
  log_level = "INFO"
  api_key = ""

  [runtime]
  type = "kubernetes"
  execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.7"

  [kubernetes]
  kubeconfig_path = ""
  namespace = "opensandbox"
  informer_enabled = true
  informer_resync_seconds = 300
  informer_watch_timeout_seconds = 60
  workload_provider = "batchsandbox"
  batchsandbox_template_file = "/etc/opensandbox/example.batchsandbox-template.yaml"
  
  [egress]
  image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:v1.0.3"
  mode = "dns+nft"


================================================
FILE: kubernetes/cmd/controller/main.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"crypto/tls"
	"flag"
	"os"
	"path/filepath"

	// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
	// to ensure that exec-entrypoint and run can make use of them.
	_ "k8s.io/client-go/plugin/pkg/client/auth"

	"k8s.io/apimachinery/pkg/runtime"
	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
	ctrl "sigs.k8s.io/controller-runtime"
	"sigs.k8s.io/controller-runtime/pkg/certwatcher"
	"sigs.k8s.io/controller-runtime/pkg/healthz"
	"sigs.k8s.io/controller-runtime/pkg/log/zap"
	"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
	"sigs.k8s.io/controller-runtime/pkg/webhook"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/controller"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/fieldindex"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/logging"
	// +kubebuilder:scaffold:imports
)

var (
	scheme   = runtime.NewScheme()
	setupLog = ctrl.Log.WithName("setup")
)

func init() {
	utilruntime.Must(clientgoscheme.AddToScheme(scheme))

	utilruntime.Must(sandboxv1alpha1.AddToScheme(scheme))
	// +kubebuilder:scaffold:scheme
}

// nolint:gocyclo
func main() {
	var metricsAddr string
	var metricsCertPath, metricsCertName, metricsCertKey string
	var webhookCertPath, webhookCertName, webhookCertKey string
	var enableLeaderElection bool
	var probeAddr string
	var secureMetrics bool
	var enableHTTP2 bool
	var tlsOpts []func(*tls.Config)

	// Log file options
	var enableFileLog bool
	var logFilePath string
	var logMaxSize int
	var logMaxBackups int
	var logMaxAge int
	var logCompress bool

	// Kubernetes client rate limiter options
	var kubeClientQPS float64
	var kubeClientBurst int

	flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+
		"Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.")
	flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
	flag.BoolVar(&enableLeaderElection, "leader-elect", false,
		"Enable leader election for controller manager. "+
			"Enabling this will ensure there is only one active controller manager.")
	flag.BoolVar(&secureMetrics, "metrics-secure", true,
		"If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.")
	flag.StringVar(&webhookCertPath, "webhook-cert-path", "", "The directory that contains the webhook certificate.")
	flag.StringVar(&webhookCertName, "webhook-cert-name", "tls.crt", "The name of the webhook certificate file.")
	flag.StringVar(&webhookCertKey, "webhook-cert-key", "tls.key", "The name of the webhook key file.")
	flag.StringVar(&metricsCertPath, "metrics-cert-path", "",
		"The directory that contains the metrics server certificate.")
	flag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.")
	flag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.")
	flag.BoolVar(&enableHTTP2, "enable-http2", false,
		"If set, HTTP/2 will be enabled for the metrics and webhook servers")

	// Log file flags
	flag.BoolVar(&enableFileLog, "enable-file-log", false, "Enable log output to file")
	flag.StringVar(&logFilePath, "log-file-path", "/var/log/sandbox-controller/controller.log", "Path to the log file")
	flag.IntVar(&logMaxSize, "log-max-size", 100, "Maximum size in megabytes of the log file before it gets rotated")
	flag.IntVar(&logMaxBackups, "log-max-backups", 10, "Maximum number of old log files to retain")
	flag.IntVar(&logMaxAge, "log-max-age", 30, "Maximum number of days to retain old log files")
	flag.BoolVar(&logCompress, "log-compress", true, "Compress determines if the rotated log files should be compressed using gzip")
	flag.Float64Var(&kubeClientQPS, "kube-client-qps", 100, "QPS for Kubernetes client rate limiter.")
	flag.IntVar(&kubeClientBurst, "kube-client-burst", 200, "Burst for Kubernetes client rate limiter.")

	opts := zap.Options{}
	opts.BindFlags(flag.CommandLine)

	flag.Parse()

	// Setup logger with file rotation support
	logOpts := logging.Options{
		Development:      opts.Development,
		EnableFileOutput: enableFileLog,
		LogFilePath:      logFilePath,
		MaxSize:          logMaxSize,
		MaxBackups:       logMaxBackups,
		MaxAge:           logMaxAge,
		Compress:         logCompress,
		ZapOptions:       opts,
	}

	logger := logging.NewLoggerWithZapOptions(logOpts)
	ctrl.SetLogger(logger)

	// if the enable-http2 flag is false (the default), http/2 should be disabled
	// due to its vulnerabilities. More specifically, disabling http/2 will
	// prevent from being vulnerable to the HTTP/2 Stream Cancellation and
	// Rapid Reset CVEs. For more information see:
	// - https://github.com/advisories/GHSA-qppj-fm5r-hxr3
	// - https://github.com/advisories/GHSA-4374-p667-p6c8
	disableHTTP2 := func(c *tls.Config) {
		setupLog.Info("disabling http/2")
		c.NextProtos = []string{"http/1.1"}
	}

	if !enableHTTP2 {
		tlsOpts = append(tlsOpts, disableHTTP2)
	}

	// Create watchers for metrics and webhooks certificates
	var metricsCertWatcher, webhookCertWatcher *certwatcher.CertWatcher

	// Initial webhook TLS options
	webhookTLSOpts := tlsOpts

	if len(webhookCertPath) > 0 {
		setupLog.Info("Initializing webhook certificate watcher using provided certificates",
			"webhook-cert-path", webhookCertPath, "webhook-cert-name", webhookCertName, "webhook-cert-key", webhookCertKey)

		var err error
		webhookCertWatcher, err = certwatcher.New(
			filepath.Join(webhookCertPath, webhookCertName),
			filepath.Join(webhookCertPath, webhookCertKey),
		)
		if err != nil {
			setupLog.Error(err, "Failed to initialize webhook certificate watcher")
			os.Exit(1)
		}

		webhookTLSOpts = append(webhookTLSOpts, func(config *tls.Config) {
			config.GetCertificate = webhookCertWatcher.GetCertificate
		})
	}

	webhookServer := webhook.NewServer(webhook.Options{
		TLSOpts: webhookTLSOpts,
	})

	// Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server.
	// More info:
	// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/metrics/server
	// - https://book.kubebuilder.io/reference/metrics.html
	metricsServerOptions := metricsserver.Options{
		BindAddress:   metricsAddr,
		SecureServing: secureMetrics,
		TLSOpts:       tlsOpts,
	}

	if secureMetrics {
		// FilterProvider is used to protect the metrics endpoint with authn/authz.
		// These configurations ensure that only authorized users and service accounts
		// can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info:
		// https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/metrics/filters#WithAuthenticationAndAuthorization
		metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization
	}

	// If the certificate is not specified, controller-runtime will automatically
	// generate self-signed certificates for the metrics server. While convenient for development and testing,
	// this setup is not recommended for production.
	//
	// TODO(user): If you enable certManager, uncomment the following lines:
	// - [METRICS-WITH-CERTS] at config/default/kustomization.yaml to generate and use certificates
	// managed by cert-manager for the metrics server.
	// - [PROMETHEUS-WITH-CERTS] at config/prometheus/kustomization.yaml for TLS certification.
	if len(metricsCertPath) > 0 {
		setupLog.Info("Initializing metrics certificate watcher using provided certificates",
			"metrics-cert-path", metricsCertPath, "metrics-cert-name", metricsCertName, "metrics-cert-key", metricsCertKey)

		var err error
		metricsCertWatcher, err = certwatcher.New(
			filepath.Join(metricsCertPath, metricsCertName),
			filepath.Join(metricsCertPath, metricsCertKey),
		)
		if err != nil {
			setupLog.Error(err, "to initialize metrics certificate watcher", "error", err)
			os.Exit(1)
		}

		metricsServerOptions.TLSOpts = append(metricsServerOptions.TLSOpts, func(config *tls.Config) {
			config.GetCertificate = metricsCertWatcher.GetCertificate
		})
	}

	config := ctrl.GetConfigOrDie()
	// Set client rate limiter if specified
	if kubeClientQPS > 0 {
		config.QPS = float32(kubeClientQPS)
	}
	if kubeClientBurst > 0 {
		config.Burst = kubeClientBurst
	}

	mgr, err := ctrl.NewManager(config, ctrl.Options{
		Scheme:                 scheme,
		Metrics:                metricsServerOptions,
		WebhookServer:          webhookServer,
		HealthProbeBindAddress: probeAddr,
		LeaderElection:         enableLeaderElection,
		LeaderElectionID:       "2fa1c467.opensandbox.io",
		// LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily
		// when the Manager ends. This requires the binary to immediately end when the
		// Manager is stopped, otherwise, this setting is unsafe. Setting this significantly
		// speeds up voluntary leader transitions as the new leader don't have to wait
		// LeaseDuration time first.
		//
		// In the default scaffold provided, the program ends immediately after
		// the manager stops, so would be fine to enable this option. However,
		// if you are doing or is intended to do any operation such as perform cleanups
		// after the manager stops then its usage might be unsafe.
		// LeaderElectionReleaseOnCancel: true,
	})
	if err != nil {
		setupLog.Error(err, "unable to start manager")
		os.Exit(1)
	}
	setupLog.Info("register field index")
	if err := fieldindex.RegisterFieldIndexes(mgr.GetCache()); err != nil {
		setupLog.Error(err, "failed to register field index")
		os.Exit(1)
	}
	if err := (&controller.BatchSandboxReconciler{
		Client:   mgr.GetClient(),
		Scheme:   mgr.GetScheme(),
		Recorder: mgr.GetEventRecorderFor("batchsandbox-controller"),
	}).SetupWithManager(mgr); err != nil {
		setupLog.Error(err, "unable to create controller", "controller", "BatchSandbox")
		os.Exit(1)
	}
	if err := (&controller.PoolReconciler{
		Client:    mgr.GetClient(),
		Scheme:    mgr.GetScheme(),
		Recorder:  mgr.GetEventRecorderFor("pool-controller"),
		Allocator: controller.NewDefaultAllocator(mgr.GetClient()),
	}).SetupWithManager(mgr); err != nil {
		setupLog.Error(err, "unable to create controller", "controller", "Pool")
		os.Exit(1)
	}
	// +kubebuilder:scaffold:builder

	if metricsCertWatcher != nil {
		setupLog.Info("Adding metrics certificate watcher to manager")
		if err := mgr.Add(metricsCertWatcher); err != nil {
			setupLog.Error(err, "unable to add metrics certificate watcher to manager")
			os.Exit(1)
		}
	}

	if webhookCertWatcher != nil {
		setupLog.Info("Adding webhook certificate watcher to manager")
		if err := mgr.Add(webhookCertWatcher); err != nil {
			setupLog.Error(err, "unable to add webhook certificate watcher to manager")
			os.Exit(1)
		}
	}

	if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
		setupLog.Error(err, "unable to set up health check")
		os.Exit(1)
	}
	if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
		setupLog.Error(err, "unable to set up ready check")
		os.Exit(1)
	}

	setupLog.Info("starting manager")
	if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
		setupLog.Error(err, "problem running manager")
		os.Exit(1)
	}
}


================================================
FILE: kubernetes/cmd/task-executor/main.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"context"
	"fmt"
	"net/http"
	"os"
	"os/signal"
	"syscall"
	"time"

	"k8s.io/klog/v2"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/config"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/manager"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/runtime"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/server"
	store "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/storage"
)

func main() {
	// Load configuration
	cfg := config.NewConfig()
	cfg.LoadFromEnv()
	cfg.LoadFromFlags()
	if err := cfg.InitKlog(); err != nil {
		fmt.Println("failed to init klog")
		os.Exit(1)
	}
	klog.InfoS("task-executor starting", "dataDir", cfg.DataDir, "listenAddr", cfg.ListenAddr, "sidecarMode", cfg.EnableSidecarMode)

	// Initialize TaskStore
	taskStore, err := store.NewFileStore(cfg.DataDir)
	if err != nil {
		klog.ErrorS(err, "failed to create task store")
		os.Exit(1)
	}
	klog.InfoS("task store initialized", "dataDir", cfg.DataDir)

	// Initialize Executor
	exec, err := runtime.NewExecutor(cfg)
	if err != nil {
		klog.ErrorS(err, "failed to create executor")
		os.Exit(1)
	}

	// Initialize TaskManager
	taskManager, err := manager.NewTaskManager(cfg, taskStore, exec)
	if err != nil {
		klog.ErrorS(err, "failed to create task manager")
		os.Exit(1)
	}

	// Start TaskManager
	taskManager.Start(context.Background())
	klog.InfoS("task manager started")

	// Initialize HTTP Handler and Router
	handler := server.NewHandler(taskManager, cfg)
	router := server.NewRouter(handler)

	// Create HTTP Server
	svr := &http.Server{
		Addr:         cfg.ListenAddr,
		Handler:      router,
		ReadTimeout:  cfg.ReadTimeout,
		WriteTimeout: cfg.WriteTimeout,
	}

	// Start HTTP server in goroutine
	go func() {
		klog.InfoS("HTTP server listening", "address", cfg.ListenAddr)
		if err := svr.ListenAndServe(); err != nil && err != http.ErrServerClosed {
			klog.ErrorS(err, "HTTP server error")
			os.Exit(1)
		}
	}()

	// Wait for interrupt signal
	quit := make(chan os.Signal, 1)
	signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
	<-quit

	klog.InfoS("shutting down task-executor gracefully...")

	// Shutdown context with timeout
	shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second)
	defer shutdownCancel()

	// 1. Stop HTTP server first
	if err := svr.Shutdown(shutdownCtx); err != nil {
		klog.ErrorS(err, "HTTP server shutdown error")
	} else {
		klog.InfoS("HTTP server stopped")
	}

	// 2. Stop TaskManager
	taskManager.Stop()
	klog.InfoS("task manager stopped")

	klog.InfoS("task-executor stopped successfully")
}


================================================
FILE: kubernetes/config/crd/bases/sandbox.opensandbox.io_batchsandboxes.yaml
================================================
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
  annotations:
    controller-gen.kubebuilder.io/version: v0.18.0
  name: batchsandboxes.sandbox.opensandbox.io
spec:
  group: sandbox.opensandbox.io
  names:
    kind: BatchSandbox
    listKind: BatchSandboxList
    plural: batchsandboxes
    shortNames:
    - bsbx
    singular: batchsandbox
  scope: Namespaced
  versions:
  - additionalPrinterColumns:
    - description: The desired number of pods.
      jsonPath: .spec.replicas
      name: DESIRED
      type: integer
    - description: The number of currently all pods.
      jsonPath: .status.replicas
      name: TOTAL
      type: integer
    - description: The number of currently all allocated pods.
      jsonPath: .status.allocated
      name: ALLOCATED
      type: integer
    - description: The number of currently all ready pods.
      jsonPath: .status.ready
      name: Ready
      type: integer
    - description: The number of currently all running tasks.
      jsonPath: .status.taskRunning
      name: TASK_RUNNING
      priority: 1
      type: integer
    - description: The number of currently all succeed tasks.
      jsonPath: .status.taskSucceed
      name: TASK_SUCCEED
      priority: 1
      type: integer
    - description: The number of currently all failed tasks.
      jsonPath: .status.taskFailed
      name: TASK_FAILED
      priority: 1
      type: integer
    - description: The number of currently all unknown tasks.
      jsonPath: .status.taskUnknown
      name: TASK_UNKNOWN
      priority: 1
      type: integer
    - description: sandbox expire time
      jsonPath: .spec.expireTime
      name: EXPIRE
      type: string
    - description: CreationTimestamp is a timestamp representing the server time when
        this object was created. It is not guaranteed to be set in happens-before
        order across separate operations. Clients may not set this value. It is represented
        in RFC3339 form and is in UTC.
      jsonPath: .metadata.creationTimestamp
      name: AGE
      type: date
    name: v1alpha1
    schema:
      openAPIV3Schema:
        description: BatchSandbox is the Schema for the batchsandboxes API.
        properties:
          apiVersion:
            description: |-
              APIVersion defines the versioned schema of this representation of an object.
              Servers should convert recognized schemas to the latest internal value, and
              may reject unrecognized values.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
            type: string
          kind:
            description: |-
              Kind is a string value representing the REST resource this object represents.
              Servers may infer this from the endpoint the client submits requests to.
              Cannot be updated.
              In CamelCase.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
            type: string
          metadata:
            type: object
          spec:
            description: BatchSandboxSpec defines the desired state of BatchSandbox.
            properties:
              expireTime:
                description: |-
                  ExpireTime - Absolute time when the batch-sandbox is deleted.
                  If a time in the past is provided, the batch-sandbox will be deleted immediately.
                format: date-time
                type: string
              poolRef:
                description: |-
                  PoolRef references the Pool resource name for pooled sandbox creation.
                  Mutually exclusive with Template - use PoolRef for pool-based allocation or Template for direct sandbox creation.
                type: string
              replicas:
                default: 1
                description: Replicas is the number of desired replicas.
                format: int32
                minimum: 0
                type: integer
              shardPatches:
                description: ShardPatches indicates patching to the Template for BatchSandbox.
                x-kubernetes-preserve-unknown-fields: true
              shardTaskPatches:
                description: ShardTaskPatches indicates patching to the TaskTemplate
                  for individual Task.
                x-kubernetes-preserve-unknown-fields: true
              taskResourcePolicyWhenCompleted:
                default: Retain
                description: |-
                  TaskResourcePolicyWhenCompleted specifies how resources should be handled once a task reaches a completed state (SUCCEEDED or FAILED).
                  - Retain: Keep the resources until the BatchSandbox is deleted.
                  - Release: Free the resources immediately when the task completes.
                type: string
              taskTemplate:
                description: |-
                  Task is a custom task spec that is automatically dispatched after the sandbox is successfully created.
                  The Sandbox is responsible for managing the lifecycle of the task.
                x-kubernetes-preserve-unknown-fields: true
              template:
                description: Template describes the pods that will be created.
                x-kubernetes-preserve-unknown-fields: true
            required:
            - replicas
            type: object
          status:
            description: BatchSandboxStatus defines the observed state of BatchSandbox.
            properties:
              allocated:
                description: "\tAllocated is the number of actual scheduled Pod"
                format: int32
                type: integer
              observedGeneration:
                description: |-
                  ObservedGeneration is the most recent generation observed for this BatchSandbox. It corresponds to the
                  BatchSandbox's generation, which is updated on mutation by the API Server.
                format: int64
                type: integer
              ready:
                description: "\tReady is the number of actual Ready Pod"
                format: int32
                type: integer
              replicas:
                description: Replicas is the number of actual Pods
                format: int32
                type: integer
              taskFailed:
                description: TaskFailed is the number of Failed task
                format: int32
                type: integer
              taskPending:
                description: TaskPending is the number of Pending task which is unassigned
                format: int32
                type: integer
              taskRunning:
                description: TaskRunning is the number of Running task
                format: int32
                type: integer
              taskSucceed:
                description: TaskSucceed is the number of Succeed task
                format: int32
                type: integer
              taskUnknown:
                description: TaskUnknown is the number of Unknown task
                format: int32
                type: integer
            required:
            - allocated
            - ready
            - replicas
            - taskFailed
            - taskPending
            - taskRunning
            - taskSucceed
            - taskUnknown
            type: object
        type: object
    served: true
    storage: true
    subresources:
      status: {}


================================================
FILE: kubernetes/config/crd/bases/sandbox.opensandbox.io_pools.yaml
================================================
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
  annotations:
    controller-gen.kubebuilder.io/version: v0.18.0
  name: pools.sandbox.opensandbox.io
spec:
  group: sandbox.opensandbox.io
  names:
    kind: Pool
    listKind: PoolList
    plural: pools
    singular: pool
  scope: Namespaced
  versions:
  - additionalPrinterColumns:
    - description: The number of all nodes in pool.
      jsonPath: .status.total
      name: TOTAL
      type: integer
    - description: The number of allocated nodes in pool.
      jsonPath: .status.allocated
      name: ALLOCATED
      type: integer
    - description: The number of available nodes in pool.
      jsonPath: .status.available
      name: AVAILABLE
      type: integer
    name: v1alpha1
    schema:
      openAPIV3Schema:
        description: Pool is the Schema for the pools API.
        properties:
          apiVersion:
            description: |-
              APIVersion defines the versioned schema of this representation of an object.
              Servers should convert recognized schemas to the latest internal value, and
              may reject unrecognized values.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
            type: string
          kind:
            description: |-
              Kind is a string value representing the REST resource this object represents.
              Servers may infer this from the endpoint the client submits requests to.
              Cannot be updated.
              In CamelCase.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
            type: string
          metadata:
            type: object
          spec:
            description: PoolSpec defines the desired state of Pool.
            properties:
              capacitySpec:
                description: CapacitySpec controls the size of the resource pool.
                properties:
                  bufferMax:
                    description: BufferMax is the maximum number of nodes kept in
                      the warm buffer.
                    format: int32
                    minimum: 0
                    type: integer
                  bufferMin:
                    description: BufferMin is the minimum number of nodes that must
                      remain in the buffer.
                    format: int32
                    minimum: 0
                    type: integer
                  poolMax:
                    description: PoolMax is the maximum total number of nodes allowed
                      in the entire pool.
                    format: int32
                    minimum: 0
                    type: integer
                  poolMin:
                    description: PoolMin is the minimum total size of the pool.
                    format: int32
                    minimum: 0
                    type: integer
                required:
                - bufferMax
                - bufferMin
                - poolMax
                - poolMin
                type: object
              template:
                description: Pod Template used to create pre-warmed nodes in the pool.
                x-kubernetes-preserve-unknown-fields: true
            required:
            - capacitySpec
            type: object
          status:
            description: PoolStatus defines the observed state of Pool.
            properties:
              allocated:
                description: Allocated is the number of nodes currently allocated
                  to sandboxes.
                format: int32
                type: integer
              available:
                description: Available is the number of nodes currently available
                  in the pool.
                format: int32
                type: integer
              observedGeneration:
                description: |-
                  ObservedGeneration is the most recent generation observed for this BatchSandbox. It corresponds to the
                  BatchSandbox's generation, which is updated on mutation by the API Server.
                format: int64
                type: integer
              revision:
                description: Revision is the latest version of pool
                type: string
              total:
                description: Total is the total number of nodes in the pool.
                format: int32
                type: integer
            required:
            - allocated
            - available
            - revision
            - total
            type: object
        type: object
    served: true
    storage: true
    subresources:
      status: {}


================================================
FILE: kubernetes/config/crd/kustomization.yaml
================================================
# This kustomization.yaml is not intended to be run by itself,
# since it depends on service name and namespace that are out of this kustomize package.
# It should be run by config/default
resources:
- bases/sandbox.opensandbox.io_batchsandboxes.yaml
- bases/sandbox.opensandbox.io_pools.yaml
# +kubebuilder:scaffold:crdkustomizeresource

patches:
# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix.
# patches here are for enabling the conversion webhook for each CRD
# +kubebuilder:scaffold:crdkustomizewebhookpatch

# [WEBHOOK] To enable webhook, uncomment the following section
# the following config is for teaching kustomize how to do kustomization for CRDs.
#configurations:
#- kustomizeconfig.yaml


================================================
FILE: kubernetes/config/crd/kustomizeconfig.yaml
================================================
# This file is for teaching kustomize how to substitute name and namespace reference in CRD
nameReference:
- kind: Service
  version: v1
  fieldSpecs:
  - kind: CustomResourceDefinition
    version: v1
    group: apiextensions.k8s.io
    path: spec/conversion/webhook/clientConfig/service/name

namespace:
- kind: CustomResourceDefinition
  version: v1
  group: apiextensions.k8s.io
  path: spec/conversion/webhook/clientConfig/service/namespace
  create: false

varReference:
- path: metadata/annotations


================================================
FILE: kubernetes/config/default/cert_metrics_manager_patch.yaml
================================================
# This patch adds the args, volumes, and ports to allow the manager to use the metrics-server certs.

# Add the volumeMount for the metrics-server certs
- op: add
  path: /spec/template/spec/containers/0/volumeMounts/-
  value:
    mountPath: /tmp/k8s-metrics-server/metrics-certs
    name: metrics-certs
    readOnly: true

# Add the --metrics-cert-path argument for the metrics server
- op: add
  path: /spec/template/spec/containers/0/args/-
  value: --metrics-cert-path=/tmp/k8s-metrics-server/metrics-certs

# Add the metrics-server certs volume configuration
- op: add
  path: /spec/template/spec/volumes/-
  value:
    name: metrics-certs
    secret:
      secretName: metrics-server-cert
      optional: false
      items:
        - key: ca.crt
          path: ca.crt
        - key: tls.crt
          path: tls.crt
        - key: tls.key
          path: tls.key


================================================
FILE: kubernetes/config/default/kustomization.yaml
================================================
# Adds namespace to all resources.
namespace: opensandbox-system

# Value of this field is prepended to the
# names of all resources, e.g. a deployment named
# "wordpress" becomes "alices-wordpress".
# Note that it should also match with the prefix (text before '-') of the namespace
# field above.
namePrefix: opensandbox-

# Labels to add to all resources and selectors.
#labels:
#- includeSelectors: true
#  pairs:
#    someName: someValue

resources:
- ../crd
- ../rbac
- ../manager
# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
# crd/kustomization.yaml
#- ../webhook
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
#- ../certmanager
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
#- ../prometheus
# [METRICS] Expose the controller manager metrics service.
- metrics_service.yaml
# [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy.
# Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics.
# Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will
# be able to communicate with the Webhook Server.
#- ../network-policy

# Uncomment the patches line if you enable Metrics
patches:
# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
# More info: https://book.kubebuilder.io/reference/metrics
- path: manager_metrics_patch.yaml
  target:
    kind: Deployment

# Uncomment the patches line if you enable Metrics and CertManager
# [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line.
# This patch will protect the metrics with certManager self-signed certs.
#- path: cert_metrics_manager_patch.yaml
#  target:
#    kind: Deployment

# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
# crd/kustomization.yaml
#- path: manager_webhook_patch.yaml
#  target:
#    kind: Deployment

# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
# Uncomment the following replacements to add the cert-manager CA injection annotations
#replacements:
# - source: # Uncomment the following block to enable certificates for metrics
#     kind: Service
#     version: v1
#     name: controller-manager-metrics-service
#     fieldPath: metadata.name
#   targets:
#     - select:
#         kind: Certificate
#         group: cert-manager.io
#         version: v1
#         name: metrics-certs
#       fieldPaths:
#         - spec.dnsNames.0
#         - spec.dnsNames.1
#       options:
#         delimiter: '.'
#         index: 0
#         create: true
#     - select: # Uncomment the following to set the Service name for TLS config in Prometheus ServiceMonitor
#         kind: ServiceMonitor
#         group: monitoring.coreos.com
#         version: v1
#         name: controller-manager-metrics-monitor
#       fieldPaths:
#         - spec.endpoints.0.tlsConfig.serverName
#       options:
#         delimiter: '.'
#         index: 0
#         create: true
#
# - source:
#     kind: Service
#     version: v1
#     name: controller-manager-metrics-service
#     fieldPath: metadata.namespace
#   targets:
#     - select:
#         kind: Certificate
#         group: cert-manager.io
#         version: v1
#         name: metrics-certs
#       fieldPaths:
#         - spec.dnsNames.0
#         - spec.dnsNames.1
#       options:
#         delimiter: '.'
#         index: 1
#         create: true
#     - select: # Uncomment the following to set the Service namespace for TLS in Prometheus ServiceMonitor
#         kind: ServiceMonitor
#         group: monitoring.coreos.com
#         version: v1
#         name: controller-manager-metrics-monitor
#       fieldPaths:
#         - spec.endpoints.0.tlsConfig.serverName
#       options:
#         delimiter: '.'
#         index: 1
#         create: true
#
# - source: # Uncomment the following block if you have any webhook
#     kind: Service
#     version: v1
#     name: webhook-service
#     fieldPath: .metadata.name # Name of the service
#   targets:
#     - select:
#         kind: Certificate
#         group: cert-manager.io
#         version: v1
#         name: serving-cert
#       fieldPaths:
#         - .spec.dnsNames.0
#         - .spec.dnsNames.1
#       options:
#         delimiter: '.'
#         index: 0
#         create: true
# - source:
#     kind: Service
#     version: v1
#     name: webhook-service
#     fieldPath: .metadata.namespace # Namespace of the service
#   targets:
#     - select:
#         kind: Certificate
#         group: cert-manager.io
#         version: v1
#         name: serving-cert
#       fieldPaths:
#         - .spec.dnsNames.0
#         - .spec.dnsNames.1
#       options:
#         delimiter: '.'
#         index: 1
#         create: true
#
# - source: # Uncomment the following block if you have a ValidatingWebhook (--programmatic-validation)
#     kind: Certificate
#     group: cert-manager.io
#     version: v1
#     name: serving-cert # This name should match the one in certificate.yaml
#     fieldPath: .metadata.namespace # Namespace of the certificate CR
#   targets:
#     - select:
#         kind: ValidatingWebhookConfiguration
#       fieldPaths:
#         - .metadata.annotations.[cert-manager.io/inject-ca-from]
#       options:
#         delimiter: '/'
#         index: 0
#         create: true
# - source:
#     kind: Certificate
#     group: cert-manager.io
#     version: v1
#     name: serving-cert
#     fieldPath: .metadata.name
#   targets:
#     - select:
#         kind: ValidatingWebhookConfiguration
#       fieldPaths:
#         - .metadata.annotations.[cert-manager.io/inject-ca-from]
#       options:
#         delimiter: '/'
#         index: 1
#         create: true
#
# - source: # Uncomment the following block if you have a DefaultingWebhook (--defaulting )
#     kind: Certificate
#     group: cert-manager.io
#     version: v1
#     name: serving-cert
#     fieldPath: .metadata.namespace # Namespace of the certificate CR
#   targets:
#     - select:
#         kind: MutatingWebhookConfiguration
#       fieldPaths:
#         - .metadata.annotations.[cert-manager.io/inject-ca-from]
#       options:
#         delimiter: '/'
#         index: 0
#         create: true
# - source:
#     kind: Certificate
#     group: cert-manager.io
#     version: v1
#     name: serving-cert
#     fieldPath: .metadata.name
#   targets:
#     - select:
#         kind: MutatingWebhookConfiguration
#       fieldPaths:
#         - .metadata.annotations.[cert-manager.io/inject-ca-from]
#       options:
#         delimiter: '/'
#         index: 1
#         create: true
#
# - source: # Uncomment the following block if you have a ConversionWebhook (--conversion)
#     kind: Certificate
#     group: cert-manager.io
#     version: v1
#     name: serving-cert
#     fieldPath: .metadata.namespace # Namespace of the certificate CR
#   targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD.
# +kubebuilder:scaffold:crdkustomizecainjectionns
# - source:
#     kind: Certificate
#     group: cert-manager.io
#     version: v1
#     name: serving-cert
#     fieldPath: .metadata.name
#   targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD.
# +kubebuilder:scaffold:crdkustomizecainjectionname


================================================
FILE: kubernetes/config/default/manager_metrics_patch.yaml
================================================
# This patch adds the args to allow exposing the metrics endpoint using HTTPS
- op: add
  path: /spec/template/spec/containers/0/args/0
  value: --metrics-bind-address=:8443


================================================
FILE: kubernetes/config/default/metrics_service.yaml
================================================
apiVersion: v1
kind: Service
metadata:
  labels:
    control-plane: controller-manager
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: controller-manager-metrics-service
  namespace: system
spec:
  ports:
  - name: https
    port: 8443
    protocol: TCP
    targetPort: 8443
  selector:
    control-plane: controller-manager
    app.kubernetes.io/name: opensandbox


================================================
FILE: kubernetes/config/manager/kustomization.yaml
================================================
resources:
- manager.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
images:
- name: controller
  newName: controller
  newTag: dev
- name: manager
  newName: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/controller
  newTag: v0.0.1


================================================
FILE: kubernetes/config/manager/manager.yaml
================================================
apiVersion: v1
kind: Namespace
metadata:
  labels:
    control-plane: controller-manager
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: system
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: controller-manager
  namespace: system
  labels:
    control-plane: controller-manager
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
spec:
  selector:
    matchLabels:
      control-plane: controller-manager
      app.kubernetes.io/name: opensandbox
  replicas: 1
  template:
    metadata:
      annotations:
        kubectl.kubernetes.io/default-container: manager
      labels:
        control-plane: controller-manager
        app.kubernetes.io/name: opensandbox
    spec:
      # TODO(user): Uncomment the following code to configure the nodeAffinity expression
      # according to the platforms which are supported by your solution.
      # It is considered best practice to support multiple architectures. You can
      # build your manager image using the makefile target docker-buildx.
      # affinity:
      #   nodeAffinity:
      #     requiredDuringSchedulingIgnoredDuringExecution:
      #       nodeSelectorTerms:
      #         - matchExpressions:
      #           - key: kubernetes.io/arch
      #             operator: In
      #             values:
      #               - amd64
      #               - arm64
      #               - ppc64le
      #               - s390x
      #           - key: kubernetes.io/os
      #             operator: In
      #             values:
      #               - linux
      securityContext:
        # Projects are configured by default to adhere to the "restricted" Pod Security Standards.
        # This ensures that deployments meet the highest security requirements for Kubernetes.
        # For more details, see: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted
        runAsNonRoot: true
        seccompProfile:
          type: RuntimeDefault
      containers:
      - command:
        - /workspace/server
        args:
          - --leader-elect
          - --health-probe-bind-address=:8081
        image: controller:dev
        name: manager
        ports: []
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            drop:
            - "ALL"
        livenessProbe:
          httpGet:
            path: /healthz
            port: 8081
          initialDelaySeconds: 15
          periodSeconds: 20
        readinessProbe:
          httpGet:
            path: /readyz
            port: 8081
          initialDelaySeconds: 5
          periodSeconds: 10
        # TODO(user): Configure the resources accordingly based on the project requirements.
        # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
        resources:
          limits:
            cpu: 500m
            memory: 128Mi
          requests:
            cpu: 10m
            memory: 64Mi
        volumeMounts: []
      volumes: []
      serviceAccountName: controller-manager
      terminationGracePeriodSeconds: 10


================================================
FILE: kubernetes/config/manifests/kustomization.yaml
================================================
# These resources constitute the fully configured set of manifests
# used to generate the 'manifests/' directory in a bundle.
resources:
- bases/sandbox-k8s.clusterserviceversion.yaml
- ../default
- ../samples
- ../scorecard

# [WEBHOOK] To enable webhooks, uncomment all the sections with [WEBHOOK] prefix.
# Do NOT uncomment sections with prefix [CERTMANAGER], as OLM does not support cert-manager.
# These patches remove the unnecessary "cert" volume and its manager container volumeMount.
#patches:
#- target:
#    group: apps
#    version: v1
#    kind: Deployment
#    name: controller-manager
#    namespace: system
#  patch: |-
#    # Remove the manager container's "cert" volumeMount, since OLM will create and mount a set of certs.
#    # Update the indices in this path if adding or removing containers/volumeMounts in the manager's Deployment.
#    - op: remove

#      path: /spec/template/spec/containers/0/volumeMounts/0
#    # Remove the "cert" volume, since OLM will create and mount a set of certs.
#    # Update the indices in this path if adding or removing volumes in the manager's Deployment.
#    - op: remove
#      path: /spec/template/spec/volumes/0


================================================
FILE: kubernetes/config/network-policy/allow-metrics-traffic.yaml
================================================
# This NetworkPolicy allows ingress traffic
# with Pods running on namespaces labeled with 'metrics: enabled'. Only Pods on those
# namespaces are able to gather data from the metrics endpoint.
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: allow-metrics-traffic
  namespace: system
spec:
  podSelector:
    matchLabels:
      control-plane: controller-manager
      app.kubernetes.io/name: opensandbox
  policyTypes:
    - Ingress
  ingress:
    # This allows ingress traffic from any namespace with the label metrics: enabled
    - from:
      - namespaceSelector:
          matchLabels:
            metrics: enabled  # Only from namespaces with this label
      ports:
        - port: 8443
          protocol: TCP


================================================
FILE: kubernetes/config/network-policy/kustomization.yaml
================================================
resources:
- allow-metrics-traffic.yaml


================================================
FILE: kubernetes/config/prometheus/kustomization.yaml
================================================
resources:
- monitor.yaml

# [PROMETHEUS-WITH-CERTS] The following patch configures the ServiceMonitor in ../prometheus
# to securely reference certificates created and managed by cert-manager.
# Additionally, ensure that you uncomment the [METRICS WITH CERTMANAGER] patch under config/default/kustomization.yaml
# to mount the "metrics-server-cert" secret in the Manager Deployment.
#patches:
#  - path: monitor_tls_patch.yaml
#    target:
#      kind: ServiceMonitor


================================================
FILE: kubernetes/config/prometheus/monitor.yaml
================================================
# Prometheus Monitor Service (Metrics)
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  labels:
    control-plane: controller-manager
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: controller-manager-metrics-monitor
  namespace: system
spec:
  endpoints:
    - path: /metrics
      port: https # Ensure this is the name of the port that exposes HTTPS metrics
      scheme: https
      bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
      tlsConfig:
        # TODO(user): The option insecureSkipVerify: true is not recommended for production since it disables
        # certificate verification, exposing the system to potential man-in-the-middle attacks.
        # For production environments, it is recommended to use cert-manager for automatic TLS certificate management.
        # To apply this configuration, enable cert-manager and use the patch located at config/prometheus/servicemonitor_tls_patch.yaml,
        # which securely references the certificate from the 'metrics-server-cert' secret.
        insecureSkipVerify: true
  selector:
    matchLabels:
      control-plane: controller-manager
      app.kubernetes.io/name: opensandbox


================================================
FILE: kubernetes/config/prometheus/monitor_tls_patch.yaml
================================================
# Patch for Prometheus ServiceMonitor to enable secure TLS configuration
# using certificates managed by cert-manager
- op: replace
  path: /spec/endpoints/0/tlsConfig
  value:
    # SERVICE_NAME and SERVICE_NAMESPACE will be substituted by kustomize
    serverName: SERVICE_NAME.SERVICE_NAMESPACE.svc
    insecureSkipVerify: false
    ca:
      secret:
        name: metrics-server-cert
        key: ca.crt
    cert:
      secret:
        name: metrics-server-cert
        key: tls.crt
    keySecret:
      name: metrics-server-cert
      key: tls.key


================================================
FILE: kubernetes/config/rbac/batchsandbox_admin_role.yaml
================================================
# This rule is not used by the project sandbox-k8s itself.
# It is provided to allow the cluster admin to help manage permissions for users.
#
# Grants full permissions ('*') over sandbox.opensandbox.io.
# This role is intended for users authorized to modify roles and bindings within the cluster,
# enabling them to delegate specific permissions to other users or groups as needed.

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: batchsandbox-admin-role
rules:
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - batchsandboxes
  verbs:
  - '*'
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - batchsandboxes/status
  verbs:
  - get


================================================
FILE: kubernetes/config/rbac/batchsandbox_editor_role.yaml
================================================
# This rule is not used by the project sandbox-k8s itself.
# It is provided to allow the cluster admin to help manage permissions for users.
#
# Grants permissions to create, update, and delete resources within the sandbox.opensandbox.io.
# This role is intended for users who need to manage these resources
# but should not control RBAC or manage permissions for others.

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: batchsandbox-editor-role
rules:
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - batchsandboxes
  verbs:
  - create
  - delete
  - get
  - list
  - patch
  - update
  - watch
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - batchsandboxes/status
  verbs:
  - get


================================================
FILE: kubernetes/config/rbac/batchsandbox_viewer_role.yaml
================================================
# This rule is not used by the project sandbox-k8s itself.
# It is provided to allow the cluster admin to help manage permissions for users.
#
# Grants read-only access to sandbox.opensandbox.io resources.
# This role is intended for users who need visibility into these resources
# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing.

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: batchsandbox-viewer-role
rules:
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - batchsandboxes
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - batchsandboxes/status
  verbs:
  - get


================================================
FILE: kubernetes/config/rbac/kustomization.yaml
================================================
resources:
# All RBAC will be applied under this service account in
# the deployment namespace. You may comment out this resource
# if your manager will use a service account that exists at
# runtime. Be sure to update RoleBinding and ClusterRoleBinding
# subjects if changing service account names.
- service_account.yaml
- role.yaml
- role_binding.yaml
- leader_election_role.yaml
- leader_election_role_binding.yaml
# The following RBAC configurations are used to protect
# the metrics endpoint with authn/authz. These configurations
# ensure that only authorized users and service accounts
# can access the metrics endpoint. Comment the following
# permissions if you want to disable this protection.
# More info: https://book.kubebuilder.io/reference/metrics.html
- metrics_auth_role.yaml
- metrics_auth_role_binding.yaml
- metrics_reader_role.yaml
# For each CRD, "Admin", "Editor" and "Viewer" roles are scaffolded by
# default, aiding admins in cluster management. Those roles are
# not used by the sandbox-k8s itself. You can comment the following lines
# if you do not want those helpers be installed with your Project.
- pool_admin_role.yaml
- pool_editor_role.yaml
- pool_viewer_role.yaml
- batchsandbox_admin_role.yaml
- batchsandbox_editor_role.yaml
- batchsandbox_viewer_role.yaml


================================================
FILE: kubernetes/config/rbac/leader_election_role.yaml
================================================
# permissions to do leader election.
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: leader-election-role
rules:
- apiGroups:
  - ""
  resources:
  - configmaps
  verbs:
  - get
  - list
  - watch
  - create
  - update
  - patch
  - delete
- apiGroups:
  - coordination.k8s.io
  resources:
  - leases
  verbs:
  - get
  - list
  - watch
  - create
  - update
  - patch
  - delete
- apiGroups:
  - ""
  resources:
  - events
  verbs:
  - create
  - patch


================================================
FILE: kubernetes/config/rbac/leader_election_role_binding.yaml
================================================
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: leader-election-rolebinding
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: leader-election-role
subjects:
- kind: ServiceAccount
  name: controller-manager
  namespace: system


================================================
FILE: kubernetes/config/rbac/metrics_auth_role.yaml
================================================
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: metrics-auth-role
rules:
- apiGroups:
  - authentication.k8s.io
  resources:
  - tokenreviews
  verbs:
  - create
- apiGroups:
  - authorization.k8s.io
  resources:
  - subjectaccessreviews
  verbs:
  - create


================================================
FILE: kubernetes/config/rbac/metrics_auth_role_binding.yaml
================================================
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: metrics-auth-rolebinding
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: metrics-auth-role
subjects:
- kind: ServiceAccount
  name: controller-manager
  namespace: system


================================================
FILE: kubernetes/config/rbac/metrics_reader_role.yaml
================================================
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: metrics-reader
rules:
- nonResourceURLs:
  - "/metrics"
  verbs:
  - get


================================================
FILE: kubernetes/config/rbac/pool_admin_role.yaml
================================================
# This rule is not used by the project sandbox-k8s itself.
# It is provided to allow the cluster admin to help manage permissions for users.
#
# Grants full permissions ('*') over sandbox.opensandbox.io.
# This role is intended for users authorized to modify roles and bindings within the cluster,
# enabling them to delegate specific permissions to other users or groups as needed.

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: pool-admin-role
rules:
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - pools
  verbs:
  - '*'
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - pools/status
  verbs:
  - get


================================================
FILE: kubernetes/config/rbac/pool_editor_role.yaml
================================================
# This rule is not used by the project sandbox-k8s itself.
# It is provided to allow the cluster admin to help manage permissions for users.
#
# Grants permissions to create, update, and delete resources within the sandbox.opensandbox.io.
# This role is intended for users who need to manage these resources
# but should not control RBAC or manage permissions for others.

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: pool-editor-role
rules:
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - pools
  verbs:
  - create
  - delete
  - get
  - list
  - patch
  - update
  - watch
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - pools/status
  verbs:
  - get


================================================
FILE: kubernetes/config/rbac/pool_viewer_role.yaml
================================================
# This rule is not used by the project sandbox-k8s itself.
# It is provided to allow the cluster admin to help manage permissions for users.
#
# Grants read-only access to sandbox.opensandbox.io resources.
# This role is intended for users who need visibility into these resources
# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing.

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: pool-viewer-role
rules:
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - pools
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - pools/status
  verbs:
  - get


================================================
FILE: kubernetes/config/rbac/role.yaml
================================================
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: manager-role
rules:
- apiGroups:
  - ""
  resources:
  - events
  - pods
  verbs:
  - create
  - delete
  - get
  - list
  - patch
  - update
  - watch
- apiGroups:
  - ""
  resources:
  - pods/status
  verbs:
  - get
  - patch
  - update
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - batchsandboxes
  - pools
  verbs:
  - create
  - delete
  - get
  - list
  - patch
  - update
  - watch
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - batchsandboxes/finalizers
  - pools/finalizers
  verbs:
  - update
- apiGroups:
  - sandbox.opensandbox.io
  resources:
  - batchsandboxes/status
  - pools/status
  verbs:
  - get
  - patch
  - update


================================================
FILE: kubernetes/config/rbac/role_binding.yaml
================================================
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: manager-rolebinding
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: manager-role
subjects:
- kind: ServiceAccount
  name: controller-manager
  namespace: system


================================================
FILE: kubernetes/config/rbac/service_account.yaml
================================================
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: controller-manager
  namespace: system


================================================
FILE: kubernetes/config/samples/kustomization.yaml
================================================
## Append samples of your project ##
resources:
- sandbox_v1alpha1_sandbox.yaml
- sandbox_v1alpha1_batchsandbox.yaml
- sandbox_v1alpha1_pool.yaml
# +kubebuilder:scaffold:manifestskustomizesamples


================================================
FILE: kubernetes/config/samples/sandbox_v1alpha1_batchsandbox-with-task.yaml
================================================
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: batchsandbox-sample
  namespace: opensandbox
spec:
  replicas: 2
  template:
    metadata:
      labels:
        app: example
    spec:
      containers:
      - name: main
        image: registry.k8s.io/e2e-test-images/httpd:2.4.38-4
        command:
        - tail
        - -f
        - /dev/null
  expireTime: "2025-12-03T12:55:41Z"
  taskTemplate:
    spec:
      process:
        command:
        - sleep
        args:
        - infinite
        env:
        - name: foo
          value: bar
  shardTaskPatches:
  - spec:
      process:
        command: # patch command and args, the final command is `python -m http.server 8080` with process envs(foo=bar)
        - python
        args:
        - -m
        - http.server
        - "8080"
  - spec:
      process:
        args: # patch args, the final command is `sleep 3600` with process envs(foo=bar;hello=world)
        - 3600
        env:
        - name: hello
          value: world


================================================
FILE: kubernetes/config/samples/sandbox_v1alpha1_batchsandbox.yaml
================================================
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: batchsandbox-sample
  namespace: opensandbox
spec:
  replicas: 1
  poolRef: pool-sample
  expireTime: "2026-12-03T12:55:41Z"

================================================
FILE: kubernetes/config/samples/sandbox_v1alpha1_pool.yaml
================================================
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: pool-sample
  namespace: opensandbox
spec:
  template:
    metadata:
      labels:
        app: example
    spec:
      volumes:
        - name: sandbox-storage
          emptyDir: { }
        - name: opensandbox-bin
          emptyDir: { }
        - name: sandbox-logs
          emptyDir: { }
      initContainers:
        - name: task-executor-installer
          image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/task-executor:v0.1.0
          command: [ "/bin/sh", "-c" ]
          args:
            - |
              cp /workspace/server /opt/opensandbox/bin/task-executor && 
              chmod +x /opt/opensandbox/bin/task-executor
          volumeMounts:
            - name: opensandbox-bin
              mountPath: /opt/opensandbox/bin
        - name: execd-installer
          image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.7
          command: [ "/bin/sh", "-c" ]
          args:
            - |
              cp ./execd /opt/opensandbox/bin/execd && 
              cp ./bootstrap.sh /opt/opensandbox/bin/bootstrap.sh &&
              chmod +x /opt/opensandbox/bin/execd &&
              chmod +x /opt/opensandbox/bin/bootstrap.sh
          volumeMounts:
            - name: opensandbox-bin
              mountPath: /opt/opensandbox/bin
      containers:
        - name: sandbox
          image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2
          command:
          - "/bin/sh"
          - "-c"
          - |
            /opt/opensandbox/bin/task-executor -listen-addr=0.0.0.0:5758 >/tmp/task-executor.log 2>&1
          env:
          - name: SANDBOX_MAIN_CONTAINER
            value: main
          - name: EXECD_ENVS
            value: /opt/opensandbox/.env
          - name: EXECD
            value: /opt/opensandbox/bin/execd
          volumeMounts:
            - name: sandbox-storage
              mountPath: /var/lib/sandbox
            - name: opensandbox-bin
              mountPath: /opt/opensandbox/bin
            - name: sandbox-logs
              mountPath: /workspace/logs
      tolerations:
        - operator: "Exists"
  capacitySpec:
    bufferMax: 3
    bufferMin: 1
    poolMax: 5
    poolMin: 0


================================================
FILE: kubernetes/config/samples/sandbox_v1alpha1_pooled_batchsandbox.yaml
================================================
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  labels:
    app.kubernetes.io/name: opensandbox
    app.kubernetes.io/managed-by: kustomize
  name: batchsandbox-pool-sample
  namespace: opensandbox
spec:
  poolRef: pool-sample
  replicas: 2
  expireTime: "2026-12-03T12:55:41Z"


================================================
FILE: kubernetes/config/scorecard/bases/config.yaml
================================================
apiVersion: scorecard.operatorframework.io/v1alpha3
kind: Configuration
metadata:
  name: config
stages:
- parallel: true
  tests: []


================================================
FILE: kubernetes/config/scorecard/kustomization.yaml
================================================
resources:
- bases/config.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
patches:
- path: patches/basic.config.yaml
  target:
    group: scorecard.operatorframework.io
    kind: Configuration
    name: config
    version: v1alpha3
- path: patches/olm.config.yaml
  target:
    group: scorecard.operatorframework.io
    kind: Configuration
    name: config
    version: v1alpha3
# +kubebuilder:scaffold:patches


================================================
FILE: kubernetes/config/scorecard/patches/basic.config.yaml
================================================
- op: add
  path: /stages/0/tests/-
  value:
    entrypoint:
    - scorecard-test
    - basic-check-spec
    image: quay.io/operator-framework/scorecard-test:v1.42.0
    labels:
      suite: basic
      test: basic-check-spec-test


================================================
FILE: kubernetes/config/scorecard/patches/olm.config.yaml
================================================
- op: add
  path: /stages/0/tests/-
  value:
    entrypoint:
    - scorecard-test
    - olm-bundle-validation
    image: quay.io/operator-framework/scorecard-test:v1.42.0
    labels:
      suite: olm
      test: olm-bundle-validation-test
- op: add
  path: /stages/0/tests/-
  value:
    entrypoint:
    - scorecard-test
    - olm-crds-have-validation
    image: quay.io/operator-framework/scorecard-test:v1.42.0
    labels:
      suite: olm
      test: olm-crds-have-validation-test
- op: add
  path: /stages/0/tests/-
  value:
    entrypoint:
    - scorecard-test
    - olm-crds-have-resources
    image: quay.io/operator-framework/scorecard-test:v1.42.0
    labels:
      suite: olm
      test: olm-crds-have-resources-test
- op: add
  path: /stages/0/tests/-
  value:
    entrypoint:
    - scorecard-test
    - olm-spec-descriptors
    image: quay.io/operator-framework/scorecard-test:v1.42.0
    labels:
      suite: olm
      test: olm-spec-descriptors-test
- op: add
  path: /stages/0/tests/-
  value:
    entrypoint:
    - scorecard-test
    - olm-status-descriptors
    image: quay.io/operator-framework/scorecard-test:v1.42.0
    labels:
      suite: olm
      test: olm-status-descriptors-test


================================================
FILE: kubernetes/docs/BUILD-IMAGES.md
================================================
# 镜像构建指南

本文档介绍如何构建 OpenSandbox Kubernetes Controller 和 Task Executor 镜像。

## 方式一: 使用构建脚本（推荐）

### 本地构建

```bash
cd kubernetes

# 构建 controller 镜像
COMPONENT=controller TAG=v0.1.0 PUSH=false ./build.sh

# 构建 task-executor 镜像
COMPONENT=task-executor TAG=v0.1.0 PUSH=false ./build.sh
```

### 构建并推送到镜像仓库

```bash
# 确保已登录阿里云 ACR
docker login sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com

# 构建并推送 controller 镜像
COMPONENT=controller TAG=v0.1.0 ./build.sh

# 构建并推送 task-executor 镜像
COMPONENT=task-executor TAG=v0.1.0 ./build.sh
```

### 环境变量说明

- `COMPONENT`: 要构建的组件，可选值: `controller`, `task-executor`
- `TAG`: 镜像标签，默认为 `latest`
- `PUSH`: 是否推送到远程仓库，默认为 `true`

## 方式二: 使用 GitHub Actions

### 手动触发工作流

1. 打开 [Actions 页面](https://github.com/alibaba/OpenSandbox/actions)
2. 选择 "Publish Components Image" 工作流
3. 点击 "Run workflow"
4. 选择组件和镜像标签:
   - Component: 在下拉菜单中选择组件名称
     - Controller: `controller`
     - Task Executor: `task-executor`
   - Image tag: 输入镜像标签，例如 `v0.1.0`
5. 点击 "Run workflow" 开始构建

### 通过 Git Tag 触发（推荐）

创建带有特定前缀的 tag 即可自动触发构建:

```bash
# 构建 controller v0.1.0
git tag k8s/controller/v0.1.0
git push origin k8s/controller/v0.1.0

# 构建 task-executor v0.1.0
git tag k8s/task-executor/v0.1.0
git push origin k8s/task-executor/v0.1.0
```

**Tag 命名规则**: `k8s/<component>/<version>`
- `<component>`: 组件名称 `controller` 或 `task-executor`
- `<version>`: 镜像版本号，例如 `v0.1.0`

## 方式三: 使用 Makefile

```bash
cd kubernetes

# 构建 controller 镜像（仅本地）
make docker-build CONTROLLER_IMG=myregistry/opensandbox-controller:v0.1.0

# 构建 task-executor 镜像（仅本地）
make docker-build-task-executor TASK_EXECUTOR_IMG=myregistry/opensandbox-task-executor:v0.1.0

# 推送镜像
make docker-push CONTROLLER_IMG=myregistry/opensandbox-controller:v0.1.0
make docker-push-task-executor TASK_EXECUTOR_IMG=myregistry/opensandbox-task-executor:v0.1.0
```

## 镜像仓库

构建的镜像会推送到以下仓库:

### 阿里云容器镜像服务 (ACR)
- Controller: `sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/controller:<tag>`
- Task Executor: `sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/task-executor:<tag>`

## 多架构支持

构建脚本默认支持以下架构:
- `linux/amd64`
- `linux/arm64`

如需构建其他架构，请修改 `build.sh` 中的 `PLATFORMS` 变量。

## 本地测试

如果只想在本地测试镜像而不推送:

```bash
# 构建本地镜像
COMPONENT=controller TAG=test PUSH=false ./build.sh

# 加载到 kind 集群测试
kind load docker-image opensandbox-controller:test

# 或加载到 minikube 测试
minikube image load opensandbox-controller:test
```

## 故障排查

### 权限问题

如果遇到 Docker 权限问题:
```bash
sudo usermod -aG docker $USER
newgrp docker
```

### Buildx 不可用

确保启用 Docker Buildx:
```bash
docker buildx create --use
docker buildx inspect --bootstrap
```

### 磁盘空间不足

清理 Docker 缓存:
```bash
docker system prune -a
docker builder prune -a
```

## 配置私有镜像仓库

如需使用自己的镜像仓库，修改 `build.sh` 中的仓库地址:

```bash
# 编辑 build.sh
ACR_REPO="your-acr-registry.cr.aliyuncs.com/your-namespace"
```

或者直接在构建时使用环境变量:
```bash
ACR_REPO=myregistry.com/myrepo COMPONENT=controller TAG=v0.1.0 ./build.sh
```


================================================
FILE: kubernetes/docs/HELM-DEPLOYMENT.md
================================================
# Helm Chart 部署方式

本文档介绍如何使用 Helm Chart 部署 OpenSandbox Controller。

## 前置要求

- Kubernetes 1.22.4+
- Helm 3.0+
- kubectl 已配置并可访问目标集群

## 快速开始

### 方式一: 直接从 GitHub Release 安装 (推荐)

直接下载并安装发布的 Chart 包:

```bash
# 安装最新版本 (0.1.0)
helm install opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.1.0/opensandbox-controller-0.1.0.tgz \
  --namespace opensandbox-system \
  --create-namespace
```

如需使用自定义镜像:

```bash
helm install opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.1.0/opensandbox-controller-0.1.0.tgz \
  --set controller.image.repository=<your-registry>/controller \
  --set controller.image.tag=v0.0.1 \
  --namespace opensandbox-system \
  --create-namespace
```

### 方式二: 本地 Chart 安装

如果您从源码构建,可以使用本地 Chart:

#### 1. 构建镜像

首先构建 controller 和 task-executor 镜像:

```bash
# 构建 controller 镜像
cd kubernetes
COMPONENT=controller TAG=v0.0.1 ./build.sh

# 构建 task-executor 镜像
COMPONENT=task-executor TAG=v0.0.1 ./build.sh
```

#### 2. 安装本地 Helm Chart

```bash
helm install opensandbox-controller ./charts/opensandbox-controller \
  --set controller.image.repository=<your-registry>/controller \
  --set controller.image.tag=v0.0.1 \
  --namespace opensandbox-system \
  --create-namespace
```

或者使用 Makefile:

```bash
make helm-install \
  IMAGE_TAG_BASE=<your-registry>/controller \
  VERSION=v0.0.1
```

### 3. 验证安装

```bash
# 检查 Pod 状态
kubectl get pods -n opensandbox-system

# 检查 CRD
kubectl get crd | grep opensandbox

# 查看安装状态
helm status opensandbox-controller -n opensandbox-system

# 查看已安装的 Chart 版本
helm list -n opensandbox-system
```

## 版本管理

### 查看可用版本

访问 GitHub Releases 查看所有可用版本:
https://github.com/alibaba/OpenSandbox/releases

查找以 `helm/opensandbox-controller/` 开头的 tag,如 `helm/opensandbox-controller/0.1.0`

### 升级到指定版本

```bash
# 直接从 GitHub Release 升级
helm upgrade opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.2.0/opensandbox-controller-0.2.0.tgz \
  --namespace opensandbox-system
```

## 自定义配置

### 使用自定义 values 文件

创建自定义 values 文件 `custom-values.yaml`:

```yaml
controller:
  image:
    repository: myregistry.example.com/opensandbox-controller
    tag: v0.1.0
  
  resources:
    limits:
      cpu: 1000m
      memory: 512Mi
    requests:
      cpu: 100m
      memory: 128Mi
  
  logLevel: debug

imagePullSecrets:
  - name: myregistrykey
```

使用自定义配置安装:

```bash
helm install opensandbox-controller ./charts/opensandbox-controller \
  -f custom-values.yaml \
  --namespace opensandbox-system \
  --create-namespace
```

### 常用配置示例

#### 1. 调整资源配置

```bash
helm install opensandbox-controller ./charts/opensandbox-controller \
  --set controller.resources.limits.cpu=1000m \
  --set controller.resources.limits.memory=512Mi \
  --namespace opensandbox-system
```

#### 3. 配置节点亲和性

创建 `affinity-values.yaml`:

```yaml
controller:
  resources:
    limits:
      cpu: 1000m
      memory: 512Mi
  affinity:
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
        - matchExpressions:
          - key: node-role.kubernetes.io/control-plane
            operator: Exists
```

```bash
helm install opensandbox-controller ./charts/opensandbox-controller \
  -f affinity-values.yaml \
  --namespace opensandbox-system
```

## 升级

### 升级 Helm Release

从 GitHub Release 升级:

```bash
# 升级到指定版本
helm upgrade opensandbox-controller \
  https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.2.0/opensandbox-controller-0.2.0.tgz \
  --namespace opensandbox-system
```

从本地 Chart 升级:

```bash
helm upgrade opensandbox-controller ./charts/opensandbox-controller \
  --set controller.image.tag=v0.0.2 \
  --namespace opensandbox-system
```

或使用 Makefile:

```bash
make helm-upgrade VERSION=v0.0.2
```

### 查看升级历史

```bash
helm history opensandbox-controller -n opensandbox-system
```

### 回滚

```bash
# 回滚到上一个版本
helm rollback opensandbox-controller -n opensandbox-system

# 回滚到指定版本
helm rollback opensandbox-controller 1 -n opensandbox-system
```

## 卸载

### 卸载 Helm Release

```bash
helm uninstall opensandbox-controller -n opensandbox-system
```

或使用 Makefile:

```bash
make helm-uninstall
```

**注意**: 默认情况下,CRD 会被保留。如需删除 CRD:

```bash
kubectl delete crd batchsandboxes.sandbox.opensandbox.io
kubectl delete crd pools.sandbox.opensandbox.io
```

### 清理 Namespace

如果要完全清理:

```bash
kubectl delete namespace opensandbox-system
```

## Makefile 命令

项目提供了一系列 Makefile 命令来简化 Helm 操作:

```bash
# 检查 Helm Chart 语法
make helm-lint

# 生成 Kubernetes 清单(不安装)
make helm-template

# 生成清单并显示调试信息
make helm-template-debug

# 打包 Helm Chart
make helm-package

# 安装 Helm Chart
make helm-install

# 升级 Helm Chart
make helm-upgrade

# 卸载 Helm Chart
make helm-uninstall

# 测试已安装的 Chart
make helm-test

# 执行 dry-run 安装
make helm-dry-run

# 执行所有 Helm 相关任务
make helm-all
```

## 验证部署

### 1. 检查 Controller 状态

```bash
kubectl get deployment -n opensandbox-system
kubectl get pods -n opensandbox-system
kubectl logs -n opensandbox-system -l control-plane=controller-manager -f
```

### 2. 验证 CRD

```bash
kubectl get crd batchsandboxes.sandbox.opensandbox.io -o yaml
kubectl get crd pools.sandbox.opensandbox.io -o yaml
```

### 3. 创建测试资源

```bash
# 创建 Pool
kubectl apply -f config/samples/sandbox_v1alpha1_pool.yaml

# 创建 BatchSandbox
kubectl apply -f config/samples/sandbox_v1alpha1_batchsandbox.yaml

# 查看状态
kubectl get pools -n opensandbox-system
kubectl get batchsandboxes -n opensandbox-system
```

## 故障排查

### Chart 验证失败

```bash
# 检查 Chart 语法
make helm-lint

# 查看详细模板输出
make helm-template-debug
```

### Controller 无法启动

```bash
# 查看 Pod 状态
kubectl describe pod -n opensandbox-system -l control-plane=controller-manager

# 查看日志
kubectl logs -n opensandbox-system -l control-plane=controller-manager

# 检查 RBAC 权限
kubectl auth can-i --as=system:serviceaccount:opensandbox-system:opensandbox-opensandbox-controller-controller-manager create pods
```

### 镜像拉取失败

```bash
# 检查镜像配置
helm get values opensandbox-controller -n opensandbox-system

# 添加镜像拉取密钥
kubectl create secret docker-registry myregistrykey \
  --docker-server=<your-registry> \
  --docker-username=<username> \
  --docker-password=<password> \
  -n opensandbox-system

# 使用密钥重新安装
helm upgrade opensandbox-controller ./charts/opensandbox-controller \
  --set imagePullSecrets[0].name=myregistrykey \
  --namespace opensandbox-system
```

## 高级配置

### 多环境部署

为不同环境创建专用的 values 文件:

#### values-dev.yaml
```yaml
controller:
  logLevel: debug
  resources:
    limits:
      cpu: 200m
      memory: 128Mi
```

#### values-prod.yaml
```yaml
controller:
  logLevel: warn
  replicaCount: 3
  resources:
    limits:
      cpu: 1000m
      memory: 512Mi
  affinity:
    podAntiAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
      - labelSelector:
          matchExpressions:
          - key: control-plane
            operator: In
            values:
            - controller-manager
        topologyKey: kubernetes.io/hostname
```

部署到不同环境:

```bash
# 开发环境
helm install opensandbox-controller ./charts/opensandbox-controller \
  -f values-dev.yaml \
  --namespace opensandbox-dev

# 生产环境
helm install opensandbox-controller ./charts/opensandbox-controller \
  -f values-prod.yaml \
  --namespace opensandbox-prod
```

## 发布 Helm Chart (维护者使用)

### 自动发布

通过 GitHub Actions 自动发布 Helm Chart:

#### 方式一: 通过 Git Tag 触发

```bash
# 发布 opensandbox-controller chart 版本 0.1.0
git tag helm/opensandbox-controller/0.1.0
git push origin helm/opensandbox-controller/0.1.0
```

Tag 命名规则: `helm/{component}/{version}`
- `helm`: 前缀,表示这是 Helm Chart 发布
- `{component}`: 组件名称,如 `opensandbox-controller`
- `{version}`: 版本号,如 `0.1.0`

这将自动触发 workflow:
1. 解析 tag 获取 component 和 version
2. 更新对应 Chart.yaml 中的版本号
3. 打包 Helm Chart
4. 创建 GitHub Release
5. 发布 .tgz 包到 Release

#### 方式二: 手动触发

1. 访问 GitHub Actions 页面
2. 选择 "Publish Helm Chart" workflow
3. 点击 "Run workflow"
4. 选择 component (如: opensandbox-controller)
5. 输入 chart_version (如: 0.1.0) 和 app_version (如: 0.0.1)
6. 点击运行

### 发布后的 URL 格式

发布后,用户可以通过以下 URL 访问 Helm Chart:

```
https://github.com/alibaba/OpenSandbox/releases/download/helm/{COMPONENT}/{VERSION}/{COMPONENT}-{VERSION}.tgz
```

例如:
```
https://github.com/alibaba/OpenSandbox/releases/download/helm/opensandbox-controller/0.1.0/opensandbox-controller-0.1.0.tgz
```

### 添加新的 Helm Chart 组件

如果需要为新组件添加 Helm Chart 发布支持:

1. 在 `charts/` 目录下创建新组件的 chart 目录
2. 更新 `.github/workflows/publish-helm-chart.yml`:
   - 在 `workflow_dispatch.inputs.component.options` 中添加新组件
   - 在 "Set chart path" step 中添加组件路径映射

示例:
```yaml
# 在 workflow_dispatch inputs 中添加
options:
  - opensandbox-controller
  - new-component  # 新增

# 在 Set chart path step 中添加
if [ "$COMPONENT" == "opensandbox-controller" ]; then
  CHART_PATH="kubernetes/charts/opensandbox-controller"
elif [ "$COMPONENT" == "new-component" ]; then
  CHART_PATH="path/to/new-component/chart"
fi
```

### 本地测试发布流程

在发布前,建议本地测试:

```bash
# 打包 Chart
make helm-package

# 验证打包的 Chart
helm lint opensandbox-controller-*.tgz

# 测试安装
helm install test-release opensandbox-controller-*.tgz \
  --namespace test \
  --create-namespace \
  --dry-run
```

## 参考资料

- [Helm Chart README](charts/opensandbox-controller/README.md) - 完整的参数列表
- [OpenSandbox 文档](README.md) - 项目主文档
- [配置示例](config/samples/) - 资源配置示例


================================================
FILE: kubernetes/docs/logging.md
================================================
# 日志配置说明

## 功能特性

OpenSandbox Kubernetes Controller 支持灵活的日志配置，包括：

- ✅ **日志输出到控制台**（默认启用）
- ✅ **日志输出到文件**（可选）
- ✅ **自动日志轮转**（按文件大小）
- ✅ **自动压缩旧日志**（gzip）
- ✅ **自动清理过期日志**（按时间或数量）
- ✅ **支持 zap 所有标准选项**（日志级别、格式等）

## 命令行参数

### 日志文件相关参数

| 参数 | 类型 | 默认值 | 说明 |
|------|------|--------|------|
| `--enable-file-log` | bool | false | 是否启用日志输出到文件 |
| `--log-file-path` | string | `/var/log/sandbox-controller/controller.log` | 日志文件路径 |
| `--log-max-size` | int | 100 | 日志文件最大大小（MB），超过后自动轮转 |
| `--log-max-backups` | int | 10 | 保留的旧日志文件最大数量 |
| `--log-max-age` | int | 30 | 保留旧日志文件的最大天数 |
| `--log-compress` | bool | true | 是否压缩轮转后的日志文件（gzip） |

### zap 标准参数（继承自 controller-runtime）

| 参数 | 说明 |
|------|------|
| `--zap-devel` | 启用开发模式（彩色输出、更详细的堆栈跟踪） |
| `--zap-encoder` | 日志编码格式：json 或 console |
| `--zap-log-level` | 日志级别：debug, info, error 等 |
| `--zap-stacktrace-level` | 打印堆栈跟踪的最低级别 |
| `--zap-time-encoding` | 时间编码格式：iso8601, millis, nano 等 |

## 使用示例

### 1. 仅输出到控制台（默认）

```bash
./controller
```

### 2. 同时输出到控制台和文件

```bash
./controller \
  --enable-file-log=true \
  --log-file-path=/var/log/sandbox-controller/controller.log
```

### 3. 自定义日志轮转配置

```bash
./controller \
  --enable-file-log=true \
  --log-file-path=/var/log/sandbox-controller/controller.log \
  --log-max-size=50 \
  --log-max-backups=5 \
  --log-max-age=7 \
  --log-compress=true
```

这将：
- 每个日志文件最大 50MB
- 最多保留 5 个旧日志文件
- 日志文件最多保留 7 天
- 压缩旧日志文件

### 4. 开发模式 + 文件输出

```bash
./controller \
  --zap-devel=true \
  --enable-file-log=true \
  --log-file-path=/tmp/controller-dev.log
```

### 5. JSON 格式 + 文件输出

```bash
./controller \
  --zap-encoder=json \
  --enable-file-log=true \
  --log-file-path=/var/log/sandbox-controller/controller.log
```

### 6. 调试级别 + 文件输出

```bash
./controller \
  --zap-log-level=debug \
  --enable-file-log=true \
  --log-file-path=/var/log/sandbox-controller/debug.log
```

## Kubernetes 部署配置

在 Kubernetes 中部署时，可以通过 Deployment 的 `args` 配置日志选项：

```yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: sandbox-controller
spec:
  template:
    spec:
      containers:
      - name: controller
        image: sandbox-controller:latest
        args:
        - --enable-file-log=true
        - --log-file-path=/var/log/controller/controller.log
        - --log-max-size=100
        - --log-max-backups=10
        - --log-max-age=30
        - --log-compress=true
        - --zap-encoder=json
        volumeMounts:
        - name: log-volume
          mountPath: /var/log/controller
      volumes:
      - name: log-volume
        emptyDir: {}
        # 或使用 PersistentVolumeClaim
        # persistentVolumeClaim:
        #   claimName: controller-logs
```

## 日志文件格式

### 开发模式（--zap-devel=true）

```
2026-02-12T10:30:45.123+0800	INFO	setup	starting manager
2026-02-12T10:30:45.456+0800	INFO	controller	Reconciling	{"namespace": "default", "name": "example"}
```

### 生产模式（JSON）

```json
{"level":"info","ts":"2026-02-12T10:30:45.123+0800","logger":"setup","msg":"starting manager"}
{"level":"info","ts":"2026-02-12T10:30:45.456+0800","logger":"controller","msg":"Reconciling","namespace":"default","name":"example"}
```

## 日志轮转机制

日志轮转由 [lumberjack](https://github.com/natefinch/lumberjack) 实现，支持：

1. **按大小轮转**：当日志文件达到 `--log-max-size` 指定的大小时，自动创建新文件
2. **文件命名**：轮转后的文件名格式为 `controller.log.2026-02-12T10-30-45.123`
3. **自动压缩**：如果启用 `--log-compress`，旧日志文件会被压缩为 `.gz` 格式
4. **自动清理**：
   - 根据 `--log-max-backups` 保留最新的 N 个文件
   - 根据 `--log-max-age` 删除超过指定天数的文件

## 目录权限

确保日志目录存在且有写入权限：

```bash
# 创建日志目录
mkdir -p /var/log/sandbox-controller

# 设置权限（根据实际运行用户调整）
chown controller:controller /var/log/sandbox-controller
chmod 755 /var/log/sandbox-controller
```

在 Kubernetes 中，可以使用 `initContainer` 或 `securityContext` 确保权限正确：

```yaml
spec:
  initContainers:
  - name: setup-log-dir
    image: busybox
    command: ['sh', '-c', 'mkdir -p /var/log/controller && chmod 755 /var/log/controller']
    volumeMounts:
    - name: log-volume
      mountPath: /var/log/controller
  containers:
  - name: controller
    securityContext:
      runAsUser: 1000
      runAsGroup: 1000
```

## 监控和查看日志

### 查看当前日志

```bash
tail -f /var/log/sandbox-controller/controller.log
```

### 查看压缩的日志

```bash
zcat /var/log/sandbox-controller/controller.log.2026-02-12T10-30-45.123.gz | less
```

### 搜索日志

```bash
# 搜索错误日志
grep -i error /var/log/sandbox-controller/controller.log

# 在所有日志文件中搜索（包括压缩文件）
zgrep -i error /var/log/sandbox-controller/*.log*
```

## 最佳实践

1. **生产环境建议**：
   ```bash
   --enable-file-log=true
   --log-file-path=/var/log/sandbox-controller/controller.log
   --log-max-size=100
   --log-max-backups=10
   --log-max-age=30
   --log-compress=true
   --zap-encoder=json
   ```

2. **开发环境建议**：
   ```bash
   --zap-devel=true
   --enable-file-log=true
   --log-file-path=/tmp/controller-dev.log
   --log-compress=false
   ```

3. **调试问题时**：
   ```bash
   --zap-log-level=debug
   --enable-file-log=true
   --log-max-size=500
   --log-compress=false
   ```

4. **磁盘空间有限时**：
   ```bash
   --enable-file-log=true
   --log-max-size=50
   --log-max-backups=3
   --log-max-age=7
   --log-compress=true
   ```

## 故障排查

### 日志文件未创建

1. 检查目录是否存在：`ls -la /var/log/sandbox-controller/`
2. 检查权限：`ls -ld /var/log/sandbox-controller/`
3. 检查进程是否有写入权限
4. 查看 controller 启动日志中是否有错误

### 日志文件不轮转

1. 确认 `--enable-file-log=true` 已设置
2. 检查文件大小是否达到 `--log-max-size` 限制
3. 确认 lumberjack 库已正确安装：`go list -m gopkg.in/natefinch/lumberjack.v2`

### 磁盘空间占用过大

1. 减小 `--log-max-size` 的值
2. 减少 `--log-max-backups` 的数量
3. 减小 `--log-max-age` 的天数
4. 确保 `--log-compress=true` 已启用


================================================
FILE: kubernetes/examples/controller/README-ZH.md
================================================
# Controller 示例

这个示例演示了如何使用生成的 clientset、informer 和 lister 来操作 BatchSandbox 和 Pool 自定义资源。

## 功能介绍

### 1. Clientset (客户端集)
用于直接与 Kubernetes API Server 交互,执行 CRUD 操作:
- **Create**: 创建新的资源
- **Get**: 获取特定资源
- **List**: 列出所有资源
- **Update**: 更新现有资源
- **Delete**: 删除资源

### 2. Informer (通知器)
用于监听资源变化并维护本地缓存:
- 自动监听 API Server 的资源变化
- 触发事件处理器 (Add/Update/Delete)
- 维护资源的本地缓存,减少 API Server 压力

### 3. Lister (列表器)
用于从 Informer 的本地缓存中读取资源:
- 高性能的本地缓存读取
- 避免频繁访问 API Server
- 支持按命名空间和标签过滤

## 运行示例

### 前提条件
1. 已安装 CRD 定义到 Kubernetes 集群
2. 有访问集群的 kubeconfig 文件

### 安装 CRD
```bash
# 从项目根目录运行
kubectl apply -f config/crd/bases/
```

### 运行示例程序
```bash
# 使用默认 kubeconfig (~/.kube/config)
go run examples/controller/main.go

# 或指定 kubeconfig 路径
go run examples/controller/main.go -kubeconfig=/path/to/kubeconfig
```

## 示例输出

程序将执行以下操作:

1. **创建 Pool 资源**
   ```
   Successfully created Pool: example-pool
   ```

2. **获取 Pool 资源**
   ```
   Successfully retrieved Pool: example-pool, PoolMin: 2, PoolMax: 10
   ```

3. **列出所有 Pool 资源**
   ```
   Found 1 Pool(s):
     - example-pool (PoolMin: 2, PoolMax: 10)
   ```

4. **更新 Pool 资源**
   ```
   Successfully updated Pool: example-pool, new PoolMax: 20
   ```

5. **创建 BatchSandbox 资源**
   ```
   Successfully created BatchSandbox: example-batchsandbox, Replicas: 3
   ```

6. **获取和更新 BatchSandbox**
   ```
   Successfully updated BatchSandbox: example-batchsandbox, new Replicas: 5
   ```

7. **使用 Lister 从缓存读取**
   ```
   Retrieved Pool from cache: example-pool, PoolMax: 20
   Found 1 BatchSandbox(es) from cache
   ```

8. **清理资源**
   ```
   Successfully deleted BatchSandbox: example-batchsandbox
   Successfully deleted Pool: example-pool
   ```

## 代码结构

```
main.go
├── Controller struct          # 控制器结构
├── NewController()           # 创建控制器并注册事件处理器
├── DemonstrateClientsetUsage() # 演示 Clientset CRUD 操作
└── DemonstrateListerUsage()   # 演示 Lister 缓存读取
```

## 关键概念

### Clientset vs Lister

**何时使用 Clientset:**
- 需要创建、更新或删除资源
- 需要获取资源的最新状态
- 执行写操作

**何时使用 Lister:**
- 只需要读取资源
- 可以接受轻微的数据延迟
- 需要高性能的批量读取
- 减少 API Server 负载

### Informer 事件处理

Informer 会在资源变化时触发相应的事件处理器:
```go
AddFunc: func(obj interface{}) {
    // 资源被创建时调用
}
UpdateFunc: func(old, new interface{}) {
    // 资源被更新时调用
}
DeleteFunc: func(obj interface{}) {
    // 资源被删除时调用
}
```

## 生产环境建议

1. **使用 Lister 而不是频繁调用 Clientset.Get()**
   - Lister 从本地缓存读取,性能更好
   - 减少对 API Server 的压力

2. **正确处理 Informer 重新同步**
   - 设置合理的 resync 周期 (如 30 秒)
   - 在事件处理器中使用幂等操作

3. **使用 Workqueue 处理事件**
   - 避免在事件处理器中执行耗时操作
   - 使用 workqueue 实现重试机制

4. **处理资源版本冲突**
   - Update 操作时使用 optimistic locking
   - 捕获 Conflict 错误并重试

## 扩展阅读

- [Kubernetes Client-go 文档](https://github.com/kubernetes/client-go)
- [编写 Kubernetes 控制器](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/)
- [Sample Controller](https://github.com/kubernetes/sample-controller)


================================================
FILE: kubernetes/examples/controller/README.md
================================================
# Controller Example

This example demonstrates how to use the generated clientset, informer, and lister to operate BatchSandbox and Pool custom resources.

## Features

### 1. Clientset (Client Set)
Used to interact directly with the Kubernetes API Server for CRUD operations:
- **Create**: Create new resources
- **Get**: Retrieve specific resources
- **List**: List all resources
- **Update**: Update existing resources
- **Delete**: Delete resources

### 2. Informer (Informer)
Used to watch resource changes and maintain local cache:
- Automatically watches resource changes from the API Server
- Triggers event handlers (Add/Update/Delete)
- Maintains a local cache of resources to reduce API Server load

### 3. Lister (Lister)
Used to read resources from the Informer's local cache:
- High-performance local cache reads
- Avoids frequent API Server access
- Supports filtering by namespace and labels

## Running the Example

### Prerequisites
1. CRDs are installed in the Kubernetes cluster
2. Have a kubeconfig file to access the cluster

### Install CRDs
```bash
# Run from project root directory
kubectl apply -f config/crd/bases/
```

### Run the Example Program
```bash
# Use default kubeconfig (~/.kube/config)
go run examples/controller/main.go

# Or specify kubeconfig path
go run examples/controller/main.go -kubeconfig=/path/to/kubeconfig
```

## Example Output

The program will perform the following operations:

1. **Create Pool resource**
   ```
   Successfully created Pool: example-pool
   ```

2. **Get Pool resource**
   ```
   Successfully retrieved Pool: example-pool, PoolMin: 2, PoolMax: 10
   ```

3. **List all Pool resources**
   ```
   Found 1 Pool(s):
     - example-pool (PoolMin: 2, PoolMax: 10)
   ```

4. **Update Pool resource**
   ```
   Successfully updated Pool: example-pool, new PoolMax: 20
   ```

5. **Create BatchSandbox resource**
   ```
   Successfully created BatchSandbox: example-batchsandbox, Replicas: 3
   ```

6. **Get and update BatchSandbox**
   ```
   Successfully updated BatchSandbox: example-batchsandbox, new Replicas: 5
   ```

7. **Use Lister to read from cache**
   ```
   Retrieved Pool from cache: example-pool, PoolMax: 20
   Found 1 BatchSandbox(es) from cache
   ```

8. **Cleanup resources**
   ```
   Successfully deleted BatchSandbox: example-batchsandbox
   Successfully deleted Pool: example-pool
   ```

## Code Structure

```
main.go
├── Controller struct          # Controller structure
├── NewController()           # Create controller and register event handlers
├── DemonstrateClientsetUsage() # Demonstrate Clientset CRUD operations
└── DemonstrateListerUsage()   # Demonstrate Lister cache reads
```

## Key Concepts

### Clientset vs Lister

**When to use Clientset:**
- Need to create, update, or delete resources
- Need to get the latest state of resources
- Performing write operations

**When to use Lister:**
- Only need to read resources
- Can tolerate slight data staleness
- Need high-performance batch reads
- Want to reduce API Server load

### Informer Event Handling

Informer triggers corresponding event handlers when resources change:
```go
AddFunc: func(obj interface{}) {
    // Called when resource is created
}
UpdateFunc: func(old, new interface{}) {
    // Called when resource is updated
}
DeleteFunc: func(obj interface{}) {
    // Called when resource is deleted
}
```

## Production Recommendations

1. **Use Lister instead of frequent Clientset.Get() calls**
   - Lister reads from local cache with better performance
   - Reduces pressure on the API Server

2. **Properly handle Informer resync**
   - Set a reasonable resync period (e.g., 30 seconds)
   - Use idempotent operations in event handlers

3. **Use Workqueue to process events**
   - Avoid time-consuming operations in event handlers
   - Use workqueue to implement retry mechanisms

4. **Handle resource version conflicts**
   - Use optimistic locking during Update operations
   - Catch Conflict errors and retry

## Further Reading

- [Kubernetes Client-go Documentation](https://github.com/kubernetes/client-go)
- [Writing Kubernetes Controllers](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/)
- [Sample Controller](https://github.com/kubernetes/sample-controller)


================================================
FILE: kubernetes/examples/controller/main.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"context"
	"flag"
	"fmt"
	"time"

	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/labels"
	"k8s.io/apimachinery/pkg/util/wait"
	"k8s.io/client-go/tools/cache"
	"k8s.io/client-go/tools/clientcmd"
	"k8s.io/client-go/util/workqueue"
	"k8s.io/klog/v2"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	clientset "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned"
	informers "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/informers/externalversions"
	listers "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/listers/sandbox/v1alpha1"
)

// Controller demonstrates how to use the generated clientset, informer, and lister
type Controller struct {
	// clientset is used to directly manipulate API objects
	clientset clientset.Interface

	// listers are used to read objects from local cache, avoiding frequent API Server access
	batchSandboxLister listers.BatchSandboxLister
	poolLister         listers.PoolLister

	// informer cache is used to check if objects are synced
	batchSandboxSynced cache.InformerSynced
	poolSynced         cache.InformerSynced

	// workqueue is used to process events
	workqueue workqueue.RateLimitingInterface
}

func NewController(
	clientset clientset.Interface,
	informerFactory informers.SharedInformerFactory,
) *Controller {
	// Get BatchSandbox and Pool informers
	batchSandboxInformer := informerFactory.Sandbox().V1alpha1().BatchSandboxes()
	poolInformer := informerFactory.Sandbox().V1alpha1().Pools()

	controller := &Controller{
		clientset:          clientset,
		batchSandboxLister: batchSandboxInformer.Lister(),
		poolLister:         poolInformer.Lister(),
		batchSandboxSynced: batchSandboxInformer.Informer().HasSynced,
		poolSynced:         poolInformer.Informer().HasSynced,
		workqueue:          workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "Example"),
	}

	// Register event handlers
	batchSandboxInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
		AddFunc: func(obj interface{}) {
			bs := obj.(*sandboxv1alpha1.BatchSandbox)
			klog.Infof("BatchSandbox added: %s/%s", bs.Namespace, bs.Name)
		},
		UpdateFunc: func(old, new interface{}) {
			bs := new.(*sandboxv1alpha1.BatchSandbox)
			klog.Infof("BatchSandbox updated: %s/%s", bs.Namespace, bs.Name)
		},
		DeleteFunc: func(obj interface{}) {
			bs := obj.(*sandboxv1alpha1.BatchSandbox)
			klog.Infof("BatchSandbox deleted: %s/%s", bs.Namespace, bs.Name)
		},
	})

	poolInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
		AddFunc: func(obj interface{}) {
			pool := obj.(*sandboxv1alpha1.Pool)
			klog.Infof("Pool added: %s/%s", pool.Namespace, pool.Name)
		},
		UpdateFunc: func(old, new interface{}) {
			pool := new.(*sandboxv1alpha1.Pool)
			klog.Infof("Pool updated: %s/%s", pool.Namespace, pool.Name)
		},
		DeleteFunc: func(obj interface{}) {
			pool := obj.(*sandboxv1alpha1.Pool)
			klog.Infof("Pool deleted: %s/%s", pool.Namespace, pool.Name)
		},
	})

	return controller
}

func (c *Controller) Run(ctx context.Context, workers int) error {
	defer c.workqueue.ShutDown()

	klog.Info("Waiting for cache sync...")
	if ok := cache.WaitForCacheSync(ctx.Done(), c.batchSandboxSynced, c.poolSynced); !ok {
		return fmt.Errorf("failed to sync cache")
	}

	klog.Info("Cache synced, starting controller")

	// Start worker goroutines
	for i := 0; i < workers; i++ {
		go wait.UntilWithContext(ctx, c.runWorker, time.Second)
	}

	<-ctx.Done()
	klog.Info("Stopping controller")
	return nil
}

func (c *Controller) runWorker(ctx context.Context) {
	for c.processNextWorkItem(ctx) {
	}
}

func (c *Controller) processNextWorkItem(ctx context.Context) bool {
	obj, shutdown := c.workqueue.Get()
	if shutdown {
		return false
	}

	defer c.workqueue.Done(obj)
	// Process actual business logic here
	return true
}

// DemonstrateClientsetUsage demonstrates how to use clientset for CRUD operations
func DemonstrateClientsetUsage(ctx context.Context, client clientset.Interface) {
	namespace := "default"

	klog.Info("========================================")
	klog.Info("Demonstrating Clientset Usage")
	klog.Info("========================================")

	// 1. Create Pool
	klog.Info("\n1. Creating Pool resource")
	pool := &sandboxv1alpha1.Pool{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "example-pool",
			Namespace: namespace,
		},
		Spec: sandboxv1alpha1.PoolSpec{
			Template: &corev1.PodTemplateSpec{
				Spec: corev1.PodSpec{
					Containers: []corev1.Container{
						{
							Name:  "nginx",
							Image: "nginx:latest",
						},
					},
				},
			},
			CapacitySpec: sandboxv1alpha1.CapacitySpec{
				PoolMin:   2,
				PoolMax:   10,
				BufferMin: 1,
				BufferMax: 5,
			},
		},
	}

	createdPool, err := client.SandboxV1alpha1().Pools(namespace).Create(ctx, pool, metav1.CreateOptions{})
	if err != nil {
		if errors.IsAlreadyExists(err) {
			klog.Infof("Pool already exists: %s", pool.Name)
		} else {
			klog.Errorf("Failed to create Pool: %v", err)
		}
	} else {
		klog.Infof("Successfully created Pool: %s", createdPool.Name)
	}

	// 2. Get Pool
	klog.Info("\n2. Getting Pool resource")
	getPool, err := client.SandboxV1alpha1().Pools(namespace).Get(ctx, "example-pool", metav1.GetOptions{})
	if err != nil {
		klog.Errorf("Failed to get Pool: %v", err)
	} else {
		klog.Infof("Successfully retrieved Pool: %s, PoolMin: %d, PoolMax: %d",
			getPool.Name, getPool.Spec.CapacitySpec.PoolMin, getPool.Spec.CapacitySpec.PoolMax)
	}

	// 3. List all Pools
	klog.Info("\n3. Listing all Pool resources")
	poolList, err := client.SandboxV1alpha1().Pools(namespace).List(ctx, metav1.ListOptions{})
	if err != nil {
		klog.Errorf("Failed to list Pools: %v", err)
	} else {
		klog.Infof("Found %d Pool(s):", len(poolList.Items))
		for _, p := range poolList.Items {
			klog.Infof("  - %s (PoolMin: %d, PoolMax: %d)",
				p.Name, p.Spec.CapacitySpec.PoolMin, p.Spec.CapacitySpec.PoolMax)
		}
	}

	// 4. Update Pool
	klog.Info("\n4. Updating Pool resource")
	if getPool != nil {
		getPool.Spec.CapacitySpec.PoolMax = 20
		updatedPool, err := client.SandboxV1alpha1().Pools(namespace).Update(ctx, getPool, metav1.UpdateOptions{})
		if err != nil {
			klog.Errorf("Failed to update Pool: %v", err)
		} else {
			klog.Infof("Successfully updated Pool: %s, new PoolMax: %d", updatedPool.Name, updatedPool.Spec.CapacitySpec.PoolMax)
		}
	}

	// 5. Create BatchSandbox
	klog.Info("\n5. Creating BatchSandbox resource")
	replicas := int32(3)
	batchSandbox := &sandboxv1alpha1.BatchSandbox{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "example-batchsandbox",
			Namespace: namespace,
		},
		Spec: sandboxv1alpha1.BatchSandboxSpec{
			Replicas: &replicas,
			PoolRef:  "example-pool",
		},
	}

	createdBS, err := client.SandboxV1alpha1().BatchSandboxes(namespace).Create(ctx, batchSandbox, metav1.CreateOptions{})
	if err != nil {
		if errors.IsAlreadyExists(err) {
			klog.Infof("BatchSandbox already exists: %s", batchSandbox.Name)
		} else {
			klog.Errorf("Failed to create BatchSandbox: %v", err)
		}
	} else {
		klog.Infof("Successfully created BatchSandbox: %s, Replicas: %d", createdBS.Name, *createdBS.Spec.Replicas)
	}

	// 6. Get BatchSandbox
	klog.Info("\n6. Getting BatchSandbox resource")
	getBS, err := client.SandboxV1alpha1().BatchSandboxes(namespace).Get(ctx, "example-batchsandbox", metav1.GetOptions{})
	if err != nil {
		klog.Errorf("Failed to get BatchSandbox: %v", err)
	} else {
		klog.Infof("Successfully retrieved BatchSandbox: %s, Replicas: %d, PoolRef: %s",
			getBS.Name, *getBS.Spec.Replicas, getBS.Spec.PoolRef)
	}

	// 7. Update BatchSandbox
	klog.Info("\n7. Updating BatchSandbox resource")
	if getBS != nil {
		newReplicas := int32(5)
		getBS.Spec.Replicas = &newReplicas
		updatedBS, err := client.SandboxV1alpha1().BatchSandboxes(namespace).Update(ctx, getBS, metav1.UpdateOptions{})
		if err != nil {
			klog.Errorf("Failed to update BatchSandbox: %v", err)
		} else {
			klog.Infof("Successfully updated BatchSandbox: %s, new Replicas: %d", updatedBS.Name, *updatedBS.Spec.Replicas)
		}
	}

	// 8. List all BatchSandboxes
	klog.Info("\n8. Listing all BatchSandbox resources")
	bsList, err := client.SandboxV1alpha1().BatchSandboxes(namespace).List(ctx, metav1.ListOptions{})
	if err != nil {
		klog.Errorf("Failed to list BatchSandboxes: %v", err)
	} else {
		klog.Infof("Found %d BatchSandbox(es):", len(bsList.Items))
		for _, bs := range bsList.Items {
			klog.Infof("  - %s (Replicas: %d, PoolRef: %s)",
				bs.Name, *bs.Spec.Replicas, bs.Spec.PoolRef)
		}
	}

	// Wait for informer to process events
	klog.Info("\nWaiting 3 seconds for informer to process events...")
	time.Sleep(3 * time.Second)

	// 9. Delete BatchSandbox
	klog.Info("\n9. Deleting BatchSandbox resource")
	err = client.SandboxV1alpha1().BatchSandboxes(namespace).Delete(ctx, "example-batchsandbox", metav1.DeleteOptions{})
	if err != nil {
		klog.Errorf("Failed to delete BatchSandbox: %v", err)
	} else {
		klog.Infof("Successfully deleted BatchSandbox: example-batchsandbox")
	}

	// 10. Delete Pool
	klog.Info("\n10. Deleting Pool resource")
	err = client.SandboxV1alpha1().Pools(namespace).Delete(ctx, "example-pool", metav1.DeleteOptions{})
	if err != nil {
		klog.Errorf("Failed to delete Pool: %v", err)
	} else {
		klog.Infof("Successfully deleted Pool: example-pool")
	}
}

// DemonstrateListerUsage demonstrates how to use lister to read objects from cache
func DemonstrateListerUsage(
	batchSandboxLister listers.BatchSandboxLister,
	poolLister listers.PoolLister,
) {
	klog.Info("\n========================================")
	klog.Info("Demonstrating Lister Usage (reading from local cache)")
	klog.Info("========================================")

	namespace := "default"

	// 1. Use lister to get a specific Pool
	klog.Info("\n1. Using Lister to get Pool")
	pool, err := poolLister.Pools(namespace).Get("example-pool")
	if err != nil {
		if errors.IsNotFound(err) {
			klog.Info("Pool not found (may have been deleted)")
		} else {
			klog.Errorf("Lister failed to get Pool: %v", err)
		}
	} else {
		klog.Infof("Retrieved Pool from cache: %s, PoolMax: %d", pool.Name, pool.Spec.CapacitySpec.PoolMax)
	}

	// 2. Use lister to list all Pools
	klog.Info("\n2. Using Lister to list all Pools")
	pools, err := poolLister.Pools(namespace).List(labels.Everything())
	if err != nil {
		klog.Errorf("Lister failed to list Pools: %v", err)
	} else {
		klog.Infof("Found %d Pool(s) from cache:", len(pools))
		for _, p := range pools {
			klog.Infof("  - %s", p.Name)
		}
	}

	// 3. Use lister to get a specific BatchSandbox
	klog.Info("\n3. Using Lister to get BatchSandbox")
	bs, err := batchSandboxLister.BatchSandboxes(namespace).Get("example-batchsandbox")
	if err != nil {
		if errors.IsNotFound(err) {
			klog.Info("BatchSandbox not found (may have been deleted)")
		} else {
			klog.Errorf("Lister failed to get BatchSandbox: %v", err)
		}
	} else {
		klog.Infof("Retrieved BatchSandbox from cache: %s, Replicas: %d", bs.Name, *bs.Spec.Replicas)
	}

	// 4. Use lister to list all BatchSandboxes
	klog.Info("\n4. Using Lister to list all BatchSandboxes")
	batchSandboxes, err := batchSandboxLister.BatchSandboxes(namespace).List(labels.Everything())
	if err != nil {
		klog.Errorf("Lister failed to list BatchSandboxes: %v", err)
	} else {
		klog.Infof("Found %d BatchSandbox(es) from cache:", len(batchSandboxes))
		for _, bs := range batchSandboxes {
			klog.Infof("  - %s (Replicas: %d)", bs.Name, *bs.Spec.Replicas)
		}
	}
}

func main() {
	var kubeconfig string
	flag.StringVar(&kubeconfig, "kubeconfig", "", "Path to a kubeconfig file")
	flag.Parse()

	// Build configuration
	config, err := clientcmd.BuildConfigFromFlags("", kubeconfig)
	if err != nil {
		klog.Fatalf("Failed to build config: %v", err)
	}

	// Create clientset
	client, err := clientset.NewForConfig(config)
	if err != nil {
		klog.Fatalf("Failed to create clientset: %v", err)
	}

	// Create informer factory
	informerFactory := informers.NewSharedInformerFactory(client, time.Second*30)

	// Create controller
	controller := NewController(client, informerFactory)

	// Start informers
	ctx := context.Background()
	informerFactory.Start(ctx.Done())

	// Wait for cache sync
	klog.Info("Waiting for informer cache sync...")
	if ok := cache.WaitForCacheSync(ctx.Done(), controller.batchSandboxSynced, controller.poolSynced); !ok {
		klog.Fatal("Failed to sync cache")
	}
	klog.Info("Informer cache synced successfully")

	// Demonstrate clientset usage
	DemonstrateClientsetUsage(ctx, client)

	// Demonstrate lister usage
	DemonstrateListerUsage(controller.batchSandboxLister, controller.poolLister)

	klog.Info("\n========================================")
	klog.Info("Demonstration completed!")
	klog.Info("========================================")
}


================================================
FILE: kubernetes/examples/task-executor/README.md
================================================
# Task Executor Usage Guide

## Introduction

The `task-executor` is a lightweight component designed to run and manage short-lived tasks (processes or containers) within a Kubernetes Pod context. It acts as a local agent, receiving task specifications from a Kubernetes Controller (e.g., `BatchSandboxController`) and executing them on the node where it runs. It exposes a simple HTTP API for task creation, status inquiry, and management.

## Running the Task Executor

The `task-executor` can be started using the `cmd/task-executor/main.go` entry point. It supports various command-line flags and environment variables for configuration.

**Basic Startup:**

```bash
/path/to/cmd/task-executor/main --data-dir=/var/lib/sandbox/tasks --listen-addr=0.0.0.0:5758
```

**Key Configuration Parameters:**

| Flag / Environment Variable | Description                                                                                                                                                                                                                                                                                              | Default Value                 |
| :-------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------------- |
| `--data-dir` (DATA_DIR)     | Directory for persisting task state and logs.                                                                                                                                                                                                                                                            | `/var/lib/sandbox/tasks`      |
| `--listen-addr` (LISTEN_ADDR)| Address and port for the HTTP API server.                                                                                                                                                                                                                                                                | `0.0.0.0:5758`                |
| `--enable-sidecar-mode` (ENABLE_SIDECAR_MODE) | If `true`, enables sidecar mode execution, where tasks are run within the PID namespace of a specified main container. Requires `nsenter` and appropriate privileges.                                                                                                                                                            | `false`                       |
| `--main-container-name` (MAIN_CONTAINER_NAME)| When `enable-sidecar-mode` is `true`, specifies the name of the main container whose PID namespace should be used.                                                                                                                                                                       | `main`                        |
| `--enable-container-mode` (ENABLE_CONTAINER_MODE) | If `true`, enables container mode execution using the CRI runtime. (Note: Current implementation may be a placeholder).                                                                                                                                                                | `false`                       |
| `--cri-socket` (CRI_SOCKET) | Path to the CRI socket (e.g., `containerd.sock`) when `enable-container-mode` is `true`.                                                                                                                                                                                                                | `/var/run/containerd/containerd.sock` |
| `--reconcile-interval`      | The interval at which the internal task manager reconciles task states.                                                                                                                                                                                                                                  | `500ms`                       |

## HTTP API Endpoints

The `task-executor` exposes a RESTful HTTP API. All API calls expect JSON request bodies (where applicable) and return JSON responses.

### 1. `POST /tasks` - Create a new task

Creates and starts a single task.

*   **Method:** `POST`
*   **Path:** `/tasks`
*   **Request Body (application/json):** An object representing the desired task.

    ```json
    {
      "name": "my-first-task",
      "spec": {
        "process": {
          "command": ["sh", "-c"],
          "args": ["echo 'Hello from my task!' && sleep 5 && echo 'Task finished.'"]
        }
      }
    }
    ```

*   **Response Body (application/json):** The created task object with its initial status.

    ```json
    {
      "name": "my-first-task",
      "spec": {
        "process": {
          "command": ["sh", "-c"],
          "args": ["echo 'Hello from my task!' && sleep 5 && echo 'Task finished.'"]
        }
      },
      "status": {
        "state": {
          "waiting": {
            "reason": "Initialized"
          }
        }
      }
    }
    ```

**Example (using `curl`):**

```bash
curl -X POST -H "Content-Type: application/json" -d '{
  "name": "my-first-task",
  "spec": {
    "process": {
      "command": ["sh", "-c"],
      "args": ["echo \"Hello from my task!\" && sleep 5 && echo \"Task finished.\""]
    }
  }
}' http://localhost:5758/tasks
```

### 2. `GET /tasks/{id}` - Get task status

Retrieves the current status of a specific task by its name.

*   **Method:** `GET`
*   **Path:** `/tasks/{taskName}`
*   **Response Body (application/json):** The task object, including its current status.

    ```json
    {
      "name": "my-first-task",
      "spec": {
        "process": {
          "command": ["sh", "-c"],
          "args": ["echo 'Hello from my task!' && sleep 5 && echo 'Task finished.'"]
        }
      },
      "status": {
        "state": {
          "running": {
            "startedAt": "2025-12-17T10:00:00Z"
          }
        }
      }
    }
    ```

**Example (using `curl`):**

```bash
curl http://localhost:5758/tasks/my-first-task
```

### 3. `DELETE /tasks/{id}` - Delete a task

Marks a task for deletion. The `task-executor` will attempt to gracefully stop the task and then remove its state.

*   **Method:** `DELETE`
*   **Path:** `/tasks/{taskName}`
*   **Response:** `204 No Content` on successful marking for deletion.

**Example (using `curl`):**

```bash
curl -X DELETE http://localhost:5758/tasks/my-first-task
```

### 4. `POST /setTasks` - Synchronize tasks

This endpoint is typically used by controllers to synchronize a desired set of tasks. Tasks not present in the desired list will be marked for deletion; new tasks will be created.

*   **Method:** `POST`
*   **Path:** `/setTasks`
*   **Request Body (application/json):** An array of task objects representing the desired state.

    ```json
    [
      {
        "name": "task-alpha",
        "spec": {
          "process": {
            "command": ["sleep", "10"]
          }
        }
      },
      {
        "name": "task-beta",
        "spec": {
          "process": {
            "command": ["ls", "-l", "/tmp"]
          }
        }
      }
    ]
    ```

*   **Response Body (application/json):** The current list of tasks managed by the executor after synchronization.

    ```json
    [
      {
        "name": "task-alpha",
        "spec": {
          "process": {
            "command": ["sleep", "10"]
          }
        },
        "status": {
          "state": {
            "waiting": {
              "reason": "Initialized"
            }
          }
        }
      },
      {
        "name": "task-beta",
        "spec": {
          "process": {
            "command": ["ls", "-l", "/tmp"]
          }
        },
        "status": {
          "state": {
            "waiting": {
              "reason": "Initialized"
            }
          }
        }
      }
    ]
    ```

**Example (using `curl`):**

```bash
curl -X POST -H "Content-Type: application/json" -d \
'[
  {
    "name": "task-alpha",
    "spec": { "process": { "command": ["sleep", "10"] } }
  },
  {
    "name": "task-beta",
    "spec": { "process": { "command": ["ls", "-l", "/tmp"] } }
  }
]' http://localhost:5758/setTasks
```

### 5. `GET /getTasks` - List all tasks

Retrieves a list of all tasks currently managed by the `task-executor`.

*   **Method:** `GET`
*   **Path:** `/getTasks`
*   **Response Body (application/json):** An array of task objects.

    ```json
    [
      {
        "name": "task-alpha",
        "spec": {
          "process": {
            "command": ["sleep", "10"]
          }
        },
        "status": {
          "state": {
            "running": {
              "startedAt": "2025-12-17T10:05:00Z"
            }
          }
        }
      },
      {
        "name": "task-beta",
        "spec": {
          "process": {
            "command": ["ls", "-l", "/tmp"]
          }
        },
        "status": {
          "state": {
            "terminated": {
              "exitCode": 0,
              "reason": "Succeeded",
              "startedAt": "2025-12-17T10:06:00Z",
              "finishedAt": "2025-12-17T10:06:01Z"
            }
          }
        }
      }
    ]
    ```

**Example (using `curl`):**

```bash
curl http://localhost:5758/getTasks
```

### 6. `GET /health` - Health check

Returns the health status of the `task-executor`.

*   **Method:** `GET`
*   **Path:** `/health`
*   **Response Body (application/json):**

    ```json
    {
      "status": "healthy"
    }
    ```

**Example (using `curl`):**

```bash
curl http://localhost:5758/health
```

## Task Specification (`TaskSpec`) Structure

The `spec` field within a task object (`api/v1alpha1.TaskSpec`) defines how the task should be executed. It currently supports `process` and `container` execution modes.

### Process Task Example

This mode executes a command directly as a process.

```json
{
  "name": "my-process-task",
  "spec": {
    "process": {
      "command": ["python3", "my_script.py"],
      "args": ["--config", "/etc/app/config.yaml"],
      "env": [
        { "name": "DEBUG_MODE", "value": "true" }
      ],
      "workingDir": "/app"
    }
  }
}
```

### Container Task Example (Placeholder/Future Feature)

This mode is intended for executing tasks within containers managed by the CRI runtime. Note that as per `internal/task-executor/runtime/container.go`, this mode might still be a placeholder.

```json
{
  "name": "my-container-task",
  "spec": {
    "container": {
      "image": "ubuntu:latest",
      "command": ["/bin/bash", "-c"],
      "args": ["apt update && apt install -y curl"],
      "env": [
        { "name": "http_proxy", "value": "http://myproxy.com:5758" }
      ],
      "volumeMounts": [
        {
          "name": "data-volume",
          "mountPath": "/data"
        }
      ]
    }
  }
}
```

## Task Status (`TaskStatus`) Structure

The `status` field within a task object (`internal/task-executor/types/Status` mapped to `api/v1alpha1.TaskStatus` for external API) provides details about the task's current execution state.

```json
{
  "name": "my-task",
  "spec": { ... },
  "status": {
    "state": {
      "waiting": {
        "reason": "Initialized"
      }
    },
    // or
    "state": {
      "running": {
        "startedAt": "2025-12-17T10:00:00Z"
      }
    },
    // or
    "state": {
      "terminated": {
        "exitCode": 0,
        "reason": "Succeeded",
        "message": "Task completed successfully",
        "startedAt": "2025-12-17T10:00:00Z",
        "finishedAt": "2025-12-17T10:00:05Z"
      }
    }
  }
}
```

**State Types:**

*   `waiting`: Task is pending execution.
*   `running`: Task is currently executing.
*   `terminated`: Task has finished (succeeded or failed).

## Example Scenario: Running a Sidecar Task

If `task-executor` is configured with `--enable-sidecar-mode=true` and `--main-container-name=my-main-app`, it can execute tasks within the PID namespace of `my-main-app`.

```bash
# Assume task-executor is running in sidecar mode on a pod with 'my-main-app'
# This task will execute 'ls /proc/self/ns' from within the main container's namespace
curl -X POST -H "Content-Type: application/json" -d '{
  "name": "sidecar-namespace-check",
  "spec": {
    "process": {
      "command": ["ls", "/proc/self/ns"]
    }
  }
}' http://localhost:5758/tasks
```


================================================
FILE: kubernetes/examples/task-executor/README_zh-CN.md
================================================
# Task Executor 使用指南

## 简介

`task-executor` 是一个轻量级组件，旨在 Kubernetes Pod 环境中运行和管理短期任务（进程或容器）。它充当本地代理，从 Kubernetes 控制器（例如 `BatchSandboxController`）接收任务规范，并在其运行的节点上执行这些任务。它暴露了一个简单的 HTTP API 用于任务创建、状态查询和管理。

## 运行 Task Executor

可以使用 `cmd/task-executor/main.go` 入口点启动 `task-executor`。它支持各种命令行标志和环境变量进行配置。

**基本启动：**

```bash
/path/to/cmd/task-executor/main --data-dir=/var/lib/sandbox/tasks --listen-addr=0.0.0.0:5758
```

**关键配置参数：**

| 标志 / 环境变量 | 描述 | 默认值 |
| :--- | :--- | :--- |
| `--data-dir` (DATA_DIR) | 用于持久化任务状态和日志的目录。 | `/var/lib/sandbox/tasks` |
| `--listen-addr` (LISTEN_ADDR) | HTTP API 服务器的地址和端口。 | `0.0.0.0:5758` |
| `--enable-sidecar-mode` (ENABLE_SIDECAR_MODE) | 如果为 `true`，则启用 sidecar 模式执行，任务将在指定主容器的 PID 命名空间内运行。需要 `nsenter` 和适当的权限。 | `false` |
| `--main-container-name` (MAIN_CONTAINER_NAME) | 当 `enable-sidecar-mode` 为 `true` 时，指定应使用其 PID 命名空间的主容器的名称。 | `main` |
| `--enable-container-mode` (ENABLE_CONTAINER_MODE) | 如果为 `true`，则启用使用 CRI 运行时的容器模式执行。（注意：当前实现可能只是占位符）。 | `false` |
| `--cri-socket` (CRI_SOCKET) | 当 `enable-container-mode` 为 `true` 时，CRI 套接字的路径（例如 `containerd.sock`）。 | `/var/run/containerd/containerd.sock` |
| `--reconcile-interval` | 内部任务管理器协调任务状态的间隔。 | `500ms` |

## HTTP API 端点

`task-executor` 暴露了一个 RESTful HTTP API。所有 API 调用都期望 JSON 请求体（如适用）并返回 JSON 响应。

### 1. `POST /tasks` - 创建新任务

创建并启动单个任务。

*   **方法：** `POST`
*   **路径：** `/tasks`
*   **请求体 (application/json)：** 代表所需任务的对象。

    ```json
    {
      "name": "my-first-task",
      "spec": {
        "process": {
          "command": ["sh", "-c"],
          "args": ["echo 'Hello from my task!' && sleep 5 && echo 'Task finished.'"]
        }
      }
    }
    ```

*   **响应体 (application/json)：** 创建的任务对象及其初始状态。

    ```json
    {
      "name": "my-first-task",
      "spec": {
        "process": {
          "command": ["sh", "-c"],
          "args": ["echo 'Hello from my task!' && sleep 5 && echo 'Task finished.'"]
        }
      },
      "status": {
        "state": {
          "waiting": {
            "reason": "Initialized"
          }
        }
      }
    }
    ```

**示例 (使用 `curl`)：**

```bash
curl -X POST -H "Content-Type: application/json" -d 
'{
  "name": "my-first-task",
  "spec": {
    "process": {
      "command": ["sh", "-c"],
      "args": ["echo \"Hello from my task!\" && sleep 5 && echo \"Task finished.\""]
    }
  }
}' http://localhost:5758/tasks
```

### 2. `GET /tasks/{id}` - 获取任务状态

通过名称检索特定任务的当前状态。

*   **方法：** `GET`
*   **路径：** `/tasks/{taskName}`
*   **响应体 (application/json)：** 任务对象，包括其当前状态。

    ```json
    {
      "name": "my-first-task",
      "spec": {
        "process": {
          "command": ["sh", "-c"],
          "args": ["echo 'Hello from my task!' && sleep 5 && echo 'Task finished.'"]
        }
      },
      "status": {
        "state": {
          "running": {
            "startedAt": "2025-12-17T10:00:00Z"
          }
        }
      }
    }
    ```

**示例 (使用 `curl`)：**

```bash
curl http://localhost:5758/tasks/my-first-task
```

### 3. `DELETE /tasks/{id}` - 删除任务

标记要删除的任务。`task-executor` 将尝试优雅地停止任务，然后删除其状态。

*   **方法：** `DELETE`
*   **路径：** `/tasks/{taskName}`
*   **响应：** 成功标记删除时返回 `204 No Content`。

**示例 (使用 `curl`)：**

```bash
curl -X DELETE http://localhost:5758/tasks/my-first-task
```

### 4. `POST /setTasks` - 同步任务

此端点通常由控制器用于同步所需的任务集。不在所需列表中的任务将被标记为删除；新任务将被创建。

*   **方法：** `POST`
*   **路径：** `/setTasks`
*   **请求体 (application/json)：** 代表所需状态的任务对象数组。

    ```json
    [
      {
        "name": "task-alpha",
        "spec": {
          "process": {
            "command": ["sleep", "10"]
          }
        }
      },
      {
        "name": "task-beta",
        "spec": {
          "process": {
            "command": ["ls", "-l", "/tmp"]
          }
        }
      }
    ]
    ```

*   **响应体 (application/json)：** 同步后执行器管理的当前任务列表。

    ```json
    [
      {
        "name": "task-alpha",
        "spec": {
          "process": {
            "command": ["sleep", "10"]
          }
        },
        "status": {
          "state": {
            "waiting": {
              "reason": "Initialized"
            }
          }
        }
      },
      {
        "name": "task-beta",
        "spec": {
          "process": {
            "command": ["ls", "-l", "/tmp"]
          }
        },
        "status": {
          "state": {
            "waiting": {
              "reason": "Initialized"
            }
          }
        }
      }
    ]
    ```

**示例 (使用 `curl`)：**

```bash
curl -X POST -H "Content-Type: application/json" -d \
'[
  {
    "name": "task-alpha",
    "spec": { "process": { "command": ["sleep", "10"] } }
  },
  {
    "name": "task-beta",
    "spec": { "process": { "command": ["ls", "-l", "/tmp"] } }
  }
]' http://localhost:5758/setTasks
```

### 5. `GET /getTasks` - 列出所有任务

检索 `task-executor` 当前管理的所有任务的列表。

*   **方法：** `GET`
*   **路径：** `/getTasks`
*   **响应体 (application/json)：** 任务对象数组。

    ```json
    [
      {
        "name": "task-alpha",
        "spec": {
          "process": {
            "command": ["sleep", "10"]
          }
        },
        "status": {
          "state": {
            "running": {
              "startedAt": "2025-12-17T10:05:00Z"
            }
          }
        }
      },
      {
        "name": "task-beta",
        "spec": {
          "process": {
            "command": ["ls", "-l", "/tmp"]
          }
        },
        "status": {
          "state": {
            "terminated": {
              "exitCode": 0,
              "reason": "Succeeded",
              "startedAt": "2025-12-17T10:06:00Z",
              "finishedAt": "2025-12-17T10:06:01Z"
            }
          }
        }
      }
    ]
    ```

**示例 (使用 `curl`)：**

```bash
curl http://localhost:5758/getTasks
```

### 6. `GET /health` - 健康检查

返回 `task-executor` 的健康状态。

*   **方法：** `GET`
*   **路径：** `/health`
*   **响应体 (application/json)：**

    ```json
    {
      "status": "healthy"
    }
    ```

**示例 (使用 `curl`)：**

```bash
curl http://localhost:5758/health
```

## 任务规范 (`TaskSpec`) 结构

任务对象中的 `spec` 字段 (`api/v1alpha1.TaskSpec`) 定义了应如何执行任务。它目前支持 `process` 和 `container` 执行模式。

### 进程任务示例

此模式直接作为进程执行命令。

```json
{
  "name": "my-process-task",
  "spec": {
    "process": {
      "command": ["python3", "my_script.py"],
      "args": ["--config", "/etc/app/config.yaml"],
      "env": [
        { "name": "DEBUG_MODE", "value": "true" }
      ],
      "workingDir": "/app"
    }
  }
}
```

### 容器任务示例（占位符/未来特性）

此模式旨在执行由 CRI 运行时管理的容器中的任务。请注意，根据 `internal/task-executor/runtime/container.go`，此模式可能仍是一个占位符。

```json
{
  "name": "my-container-task",
  "spec": {
    "container": {
      "image": "ubuntu:latest",
      "command": ["/bin/bash", "-c"],
      "args": ["apt update && apt install -y curl"],
      "env": [
        { "name": "http_proxy", "value": "http://myproxy.com:5758" }
      ],
      "volumeMounts": [
        {
          "name": "data-volume",
          "mountPath": "/data"
        }
      ]
    }
  }
}
```

## 任务状态 (`TaskStatus`) 结构

任务对象中的 `status` 字段 (`internal/task-executor/types/Status` 映射到 `api/v1alpha1.TaskStatus` 用于外部 API) 提供了有关任务当前执行状态的详细信息。

```json
{
  "name": "my-task",
  "spec": { ... },
  "status": {
    "state": {
      "waiting": {
        "reason": "Initialized"
      }
    },
    // 或者
    "state": {
      "running": {
        "startedAt": "2025-12-17T10:00:00Z"
      }
    },
    // 或者
    "state": {
      "terminated": {
        "exitCode": 0,
        "reason": "Succeeded",
        "message": "Task completed successfully",
        "startedAt": "2025-12-17T10:00:00Z",
        "finishedAt": "2025-12-17T10:00:05Z"
      }
    }
  }
}
```

**状态类型：**

*   `waiting`：任务正在等待执行。
*   `running`：任务当前正在执行。
*   `terminated`：任务已完成（成功或失败）。

## 示例场景：运行 Sidecar 任务

如果 `task-executor` 配置了 `--enable-sidecar-mode=true` 和 `--main-container-name=my-main-app`，它可以在 `my-main-app` 的 PID 命名空间内执行任务。

```bash
# 假设 task-executor 在 sidecar 模式下运行在一个包含 'my-main-app' 的 pod 上
# 此任务将从主容器的命名空间内执行 'ls /proc/self/ns'
curl -X POST -H "Content-Type: application/json" -d 
'{
  "name": "sidecar-namespace-check",
  "spec": {
    "process": {
      "command": ["ls", "/proc/self/ns"]
    }
  }
}' http://localhost:5758/tasks
```


================================================
FILE: kubernetes/examples/task-executor/main.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"context"
	"fmt"
	"log"
	"time"

	taskexecutor "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

func main() {
	baseURL := "http://localhost:5758"
	client := taskexecutor.NewClient(baseURL)
	ctx := context.Background()

	fmt.Printf("Connecting to Task Executor at %s...\n", baseURL)

	taskName := "example-task"
	newTask := &taskexecutor.Task{
		Name: taskName,
		Process: &taskexecutor.Process{
			Command: []string{"sh", "-c"},
			Args:    []string{"echo 'Hello from SDK example!' && sleep 2 && echo 'Task done.'"},
		},
	}

	fmt.Printf("Submitting task '%s'...\n", taskName)
	createdTask, err := client.Set(ctx, newTask)
	if err != nil {
		log.Fatalf("Failed to set task: %v", err)
	}
	fmt.Printf("Task submitted successfully. Initial state: %v\n", getTaskState(createdTask))

	fmt.Println("Polling task status...")
	for i := 0; i < 10; i++ {
		currentTask, err := client.Get(ctx)
		if err != nil {
			log.Printf("Error getting task: %v", err)
			continue
		}

		if currentTask == nil {
			fmt.Println("No task found.")
			break
		}

		state := getTaskState(currentTask)
		fmt.Printf("Current state: %s\n", state)

		// Check if task is finished
		if currentTask.ProcessStatus.Terminated != nil {
			fmt.Printf("Task finished with exit code: %d\n", currentTask.ProcessStatus.Terminated.ExitCode)
			break
		}

		time.Sleep(500 * time.Millisecond)
	}

	// Clean up (pass nil to clear tasks)
	fmt.Println("Cleaning up...")
	_, err = client.Set(ctx, nil)
	if err != nil {
		log.Printf("Failed to clear tasks: %v", err)
	} else {
		fmt.Println("Tasks cleared.")
	}
}

// getTaskState returns a string representation of the task state
func getTaskState(task *taskexecutor.Task) string {
	if task == nil {
		return "Unknown"
	}
	if task.ProcessStatus.Running != nil {
		return "Running"
	}
	if task.ProcessStatus.Terminated != nil {
		return "Terminated"
	}
	if task.ProcessStatus.Waiting != nil {
		return fmt.Sprintf("Waiting (%s)", task.ProcessStatus.Waiting.Reason)
	}
	return "Pending"
}


================================================
FILE: kubernetes/go.mod
================================================
module github.com/alibaba/OpenSandbox/sandbox-k8s

go 1.24.0

require (
	github.com/golang/mock v1.6.0
	github.com/onsi/ginkgo/v2 v2.22.0
	github.com/onsi/gomega v1.36.1
	github.com/stretchr/testify v1.11.1
	k8s.io/api v0.33.0
	k8s.io/apimachinery v0.33.0
	k8s.io/client-go v0.33.0
	k8s.io/klog/v2 v2.130.1
	k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738
	sigs.k8s.io/controller-runtime v0.21.0
)

require (
	cel.dev/expr v0.19.1 // indirect
	github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
	github.com/beorn7/perks v1.0.1 // indirect
	github.com/blang/semver/v4 v4.0.0 // indirect
	github.com/cenkalti/backoff/v4 v4.3.0 // indirect
	github.com/cespare/xxhash/v2 v2.3.0 // indirect
	github.com/davecgh/go-spew v1.1.1 // indirect
	github.com/emicklei/go-restful/v3 v3.11.0 // indirect
	github.com/evanphx/json-patch v4.12.0+incompatible // indirect
	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
	github.com/felixge/httpsnoop v1.0.4 // indirect
	github.com/fsnotify/fsnotify v1.7.0 // indirect
	github.com/fxamacker/cbor/v2 v2.7.0 // indirect
	github.com/go-logr/logr v1.4.3
	github.com/go-logr/stdr v1.2.2 // indirect
	github.com/go-logr/zapr v1.3.0 // indirect
	github.com/go-openapi/jsonpointer v0.21.0 // indirect
	github.com/go-openapi/jsonreference v0.20.2 // indirect
	github.com/go-openapi/swag v0.23.0 // indirect
	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
	github.com/gogo/protobuf v1.3.2 // indirect
	github.com/google/btree v1.1.3 // indirect
	github.com/google/cel-go v0.23.2 // indirect
	github.com/google/gnostic-models v0.6.9 // indirect
	github.com/google/go-cmp v0.7.0 // indirect
	github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect
	github.com/google/uuid v1.6.0 // indirect
	github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0 // indirect
	github.com/inconshreveable/mousetrap v1.1.0 // indirect
	github.com/josharian/intern v1.0.0 // indirect
	github.com/json-iterator/go v1.1.12 // indirect
	github.com/mailru/easyjson v0.7.7 // indirect
	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
	github.com/modern-go/reflect2 v1.0.2 // indirect
	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
	github.com/pkg/errors v0.9.1 // indirect
	github.com/pmezard/go-difflib v1.0.0 // indirect
	github.com/prometheus/client_golang v1.22.0 // indirect
	github.com/prometheus/client_model v0.6.1 // indirect
	github.com/prometheus/common v0.62.0 // indirect
	github.com/prometheus/procfs v0.15.1 // indirect
	github.com/spf13/cobra v1.8.1 // indirect
	github.com/spf13/pflag v1.0.5 // indirect
	github.com/stoewer/go-strcase v1.3.0 // indirect
	github.com/x448/float16 v0.8.4 // indirect
	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect
	go.opentelemetry.io/otel v1.40.0 // indirect
	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 // indirect
	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 // indirect
	go.opentelemetry.io/otel/metric v1.40.0 // indirect
	go.opentelemetry.io/otel/sdk v1.40.0 // indirect
	go.opentelemetry.io/otel/trace v1.40.0 // indirect
	go.opentelemetry.io/proto/otlp v1.4.0 // indirect
	go.uber.org/multierr v1.11.0 // indirect
	go.uber.org/zap v1.27.0
	golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
	golang.org/x/net v0.38.0 // indirect
	golang.org/x/oauth2 v0.27.0 // indirect
	golang.org/x/sync v0.12.0 // indirect
	golang.org/x/sys v0.40.0 // indirect
	golang.org/x/term v0.30.0 // indirect
	golang.org/x/text v0.23.0 // indirect
	golang.org/x/time v0.9.0 // indirect
	golang.org/x/tools v0.26.0 // indirect
	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
	google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 // indirect
	google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 // indirect
	google.golang.org/grpc v1.68.1 // indirect
	google.golang.org/protobuf v1.36.5 // indirect
	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
	gopkg.in/inf.v0 v0.9.1 // indirect
	gopkg.in/natefinch/lumberjack.v2 v2.2.1
	gopkg.in/yaml.v3 v3.0.1 // indirect
	k8s.io/apiextensions-apiserver v0.33.0 // indirect
	k8s.io/apiserver v0.33.0 // indirect
	k8s.io/component-base v0.33.0 // indirect
	k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect
	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
	sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
	sigs.k8s.io/randfill v1.0.0 // indirect
	sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
	sigs.k8s.io/yaml v1.4.0 // indirect
)


================================================
FILE: kubernetes/go.sum
================================================
cel.dev/expr v0.19.1 h1:NciYrtDRIR0lNCnH1LFJegdjspNx9fI59O7TWcua/W4=
cel.dev/expr v0.19.1/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw=
github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/evanphx/json-patch v4.12.0+incompatible h1:4onqiflcdA9EOZ4RxV643DvftH5pOlLGNtQ5lPWQu84=
github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc=
github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
github.com/google/cel-go v0.23.2 h1:UdEe3CvQh3Nv+E/j9r1Y//WO0K0cSyD7/y0bzyLIMI4=
github.com/google/cel-go v0.23.2/go.mod h1:52Pb6QsDbC5kvgxvZhiL9QX1oZEkcUF/ZqaPx1J5Wwo=
github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw=
github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo=
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0 h1:TmHmbvxPmaegwhDubVz0lICL0J5Ka2vwTzhoePEXsGE=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0/go.mod h1:qztMSjm835F2bXf+5HKAPIS5qsmQDqZna/PgVt4rWtI=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg=
github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw=
github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io=
github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs=
github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0/go.mod h1:umTcuxiv1n/s/S6/c2AT/g2CQ7u5C59sHDNmfSwgz7Q=
go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms=
go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 h1:Vh5HayB/0HHfOQA7Ctx69E/Y/DcQSMPpKANYVMQ7fBA=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0/go.mod h1:cpgtDBaqD/6ok/UG0jT15/uKjAY8mRA53diogHBg3UI=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 h1:5pojmb1U1AogINhN3SurB+zm/nIcusopeBNp42f45QM=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0/go.mod h1:57gTHJSE5S1tqg+EKsLPlTWhpHMsWlVmer+LA926XiA=
go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g=
go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc=
go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8=
go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE=
go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4AtAlbuWdCYw=
go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg=
go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw=
go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA=
go.opentelemetry.io/proto/otlp v1.4.0 h1:TA9WRvW6zMwP+Ssb6fLoUIuirti1gGbP28GcKG1jgeg=
go.opentelemetry.io/proto/otlp v1.4.0/go.mod h1:PPBWZIP98o2ElSqI35IHfu7hIhSwvc5N38Jw8pXuGFY=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw=
gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 h1:CkkIfIt50+lT6NHAVoRYEyAvQGFM7xEwXUUywFvEb3Q=
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576/go.mod h1:1R3kvZ1dtP3+4p4d3G8uJ8rFk/fWlScl38vanWACI08=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 h1:8ZmaLZE4XWrtU3MyClkYqqtl6Oegr3235h7jxsDyqCY=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU=
google.golang.org/grpc v1.68.1 h1:oI5oTa11+ng8r8XMMN7jAOmWfPZWbYpCFaMUTACxkM0=
google.golang.org/grpc v1.68.1/go.mod h1:+q1XYFJjShcqn0QZHvCyeR4CXPA+llXIeUIfIe00waw=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc=
gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
k8s.io/api v0.33.0 h1:yTgZVn1XEe6opVpP1FylmNrIFWuDqe2H0V8CT5gxfIU=
k8s.io/api v0.33.0/go.mod h1:CTO61ECK/KU7haa3qq8sarQ0biLq2ju405IZAd9zsiM=
k8s.io/apiextensions-apiserver v0.33.0 h1:d2qpYL7Mngbsc1taA4IjJPRJ9ilnsXIrndH+r9IimOs=
k8s.io/apiextensions-apiserver v0.33.0/go.mod h1:VeJ8u9dEEN+tbETo+lFkwaaZPg6uFKLGj5vyNEwwSzc=
k8s.io/apimachinery v0.33.0 h1:1a6kHrJxb2hs4t8EE5wuR/WxKDwGN1FKH3JvDtA0CIQ=
k8s.io/apimachinery v0.33.0/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM=
k8s.io/apiserver v0.33.0 h1:QqcM6c+qEEjkOODHppFXRiw/cE2zP85704YrQ9YaBbc=
k8s.io/apiserver v0.33.0/go.mod h1:EixYOit0YTxt8zrO2kBU7ixAtxFce9gKGq367nFmqI8=
k8s.io/client-go v0.33.0 h1:UASR0sAYVUzs2kYuKn/ZakZlcs2bEHaizrrHUZg0G98=
k8s.io/client-go v0.33.0/go.mod h1:kGkd+l/gNGg8GYWAPr0xF1rRKvVWvzh9vmZAMXtaKOg=
k8s.io/component-base v0.33.0 h1:Ot4PyJI+0JAD9covDhwLp9UNkUja209OzsJ4FzScBNk=
k8s.io/component-base v0.33.0/go.mod h1:aXYZLbw3kihdkOPMDhWbjGCO6sg+luw554KP51t8qCU=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4=
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro=
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM=
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8=
sigs.k8s.io/controller-runtime v0.21.0/go.mod h1:OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc=
sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=


================================================
FILE: kubernetes/hack/boilerplate.go.txt
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

================================================
FILE: kubernetes/hack/debug-task.sh
================================================
#!/bin/bash

# Copyright 2025 Alibaba Group Holding Ltd.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

echo "Stopping any running debug containers..."
docker stop task-executor-debug > /dev/null || true
echo "Building debug docker image (dev environment)..."
docker build -t task-executor-debug -f Dockerfile.debug .

echo "Starting debug container with Auto-Sync and Hot-Reload..."
echo "---------------------------------------------------------"
echo "  App URL:      http://localhost:8080"
echo "  Debugger:     localhost:2345"
echo "  Source Code:  Mounted from $(pwd)"
echo "---------------------------------------------------------"
echo "Usage:"
echo "  1. Connect GoLand to localhost:2345"
echo "  2. Edit code locally -> Container auto-recompiles (watch the logs)"
echo "  3. Re-connect Debugger in GoLand"
echo "---------------------------------------------------------"

# Create docker volumes for cache if they don't exist
docker volume create sandbox-k8s-gomod > /dev/null
docker volume create sandbox-k8s-gocache > /dev/null

# Run the container
# --rm: remove container after exit
# -v $(pwd):/workspace: Mount local code
# -v ...: Mount caches for speed
# reflex command:
#   -r '\.go$': Watch all .go files recursively
#   -s: Service mode (kill old process before starting new one)
#   --: Delimiter
#   dlv debug: Compile and run ./cmd/task
#     --headless: No terminal UI
#     --listen=:2345: Debugger port
#     --api-version=2: API v2
#     --accept-multiclient: Allow multiple connections
#     --continue: Start running immediately (Optional, remove if you want to hit 'Resume' first)
#     --output /tmp/debug_bin: Put binary in tmp to avoid clutter/loops

docker run --rm -it \
  --privileged \
  -p 5758:5758 \
  -p 2345:2345 \
  --security-opt seccomp=unconfined \
  --cap-add=SYS_PTRACE \
  -v "$(pwd):/workspace" \
  -v sandbox-k8s-gomod:/go/pkg/mod \
  -v sandbox-k8s-gocache:/go/.cache/go-build \
  --name task-executor-debug \
  -e SANDBOX_MAIN_CONTAINER=task-executor \
  task-executor-debug \
  reflex -r '\.go$' -s -- \
    dlv debug ./cmd/task-executor \
    --headless \
    --listen=:2345 \
    --api-version=2 \
    --accept-multiclient \
    --output /tmp/debug_bin \
    -- \
    -enable-sidecar-mode=true -main-container-name=task-executor

================================================
FILE: kubernetes/hack/pool-perf.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import time
import uuid
import sys
import argparse
from kubernetes import client, config
from kubernetes.client.rest import ApiException

# CRD configurations
GROUP = "sandbox.opensandbox.io"
VERSION = "v1alpha1"
POOL_PLURAL = "pools"
BSB_PLURAL = "batchsandboxes"
NAMESPACE = "default"

class PoolPerformanceTester:
    def __init__(self, pool_name, pool_size, replicas_per_bsb, total_bsb_count, timeout, poll_interval=0.00001):
        try:
            config.load_kube_config()
        except Exception:
            # Fall back to in-cluster config if kube config is not available
            config.load_incluster_config()
        self.custom_api = client.CustomObjectsApi()
        self.pool_name = pool_name
        self.pool_size = pool_size
        self.replicas_per_bsb = replicas_per_bsb
        self.total_bsb_count = total_bsb_count
        self.timeout = timeout
        self.poll_interval = poll_interval
        self.bsb_names = []
        self.results = {}

    def create_pool_manifest(self, size):
        return {
            "apiVersion": f"{GROUP}/{VERSION}",
            "kind": "Pool",
            "metadata": {"name": self.pool_name},
            "spec": {
                "template": {
                    "spec": {
                        "containers": [{"name": "nginx", "image": "nginx:alpine"}]
                    }
                },
                "capacitySpec": {
                    "bufferMin": 5,
                    "bufferMax": 10,
                    "poolMin": size,
                    "poolMax": size + 20
                }
            }
        }

    def create_bsb_manifest(self, name):
        return {
            "apiVersion": f"{GROUP}/{VERSION}",
            "kind": "BatchSandbox",
            "metadata": {"name": name},
            "spec": {
                "replicas": self.replicas_per_bsb,
                "poolRef": self.pool_name
            }
        }

    async def setup_pool(self):
        """Create and wait for the resource pool to be ready"""
        print(f"🚀 Setting up Pool: {self.pool_name} with size {self.pool_size}...")
        try:
            self.custom_api.delete_namespaced_custom_object(GROUP, VERSION, NAMESPACE, POOL_PLURAL, self.pool_name)
            await asyncio.sleep(5)
        except ApiException as e:
            if e.status != 404:
                print(f"⚠️  Failed to delete existing Pool: {e}")
        except Exception as e:
            print(f"⚠️  Error during Pool deletion: {e}")

        body = self.create_pool_manifest(self.pool_size)
        self.custom_api.create_namespaced_custom_object(GROUP, VERSION, NAMESPACE, POOL_PLURAL, body)
        
        # Wait for Available count to reach target
        while True:
            try:
                pool = self.custom_api.get_namespaced_custom_object(GROUP, VERSION, NAMESPACE, POOL_PLURAL, self.pool_name)
                available = pool.get("status", {}).get("available", 0)
                if available >= self.pool_size:
                    print(f"✅ Pool is Ready. Available: {available}")
                    break
                print(f"Waiting for Pool Ready... Available: {available}")
            except Exception as e:
                print(f"Waiting for Pool to be created... {e}")
            await asyncio.sleep(2)

    async def create_bsb(self, index):
        """Create BatchSandboxes concurrently"""
        name = f"perf-test-{uuid.uuid4().hex[:8]}"
        self.bsb_names.append(name)
        body = self.create_bsb_manifest(name)
        
        start_time = time.time()
        try:
            self.custom_api.create_namespaced_custom_object(GROUP, VERSION, NAMESPACE, BSB_PLURAL, body)
            self.results[name] = {"create_time": time.time() - start_time, "allocated_time": None}
        except ApiException as e:
            print(f"❌ Failed to create {name}: {e}")

    async def wait_for_allocation(self, name):
        """Poll for allocation completion"""
        start_polling = time.time()
        while True:
            try:
                bsb = self.custom_api.get_namespaced_custom_object(GROUP, VERSION, NAMESPACE, BSB_PLURAL, name)
                status = bsb.get("status", {})
                allocated = status.get("allocated", 0)
                
                if allocated >= self.replicas_per_bsb:
                    print("{0}, endpoint {1}".format(name, bsb.get("metadata", {}).get("annotations", {}).get("sandbox.opensandbox.io/endpoints", "")))
                    self.results[name]["allocated_time"] = time.time() - start_polling
                    break
            except Exception as e:
                pass
            
            await asyncio.sleep(self.poll_interval)
            if time.time() - start_polling > self.timeout:
                print(f"⏰ Timeout waiting for {name}")
                break

    async def run(self):
        await self.setup_pool()
        
        print(f"🔥 Starting concurrent allocation test: {self.total_bsb_count} BatchSandboxes...")
        start_all = time.time()
        
        # Concurrent creation
        await asyncio.gather(*(self.create_bsb(i) for i in range(self.total_bsb_count)))
        
        # Concurrent wait for allocation
        await asyncio.gather(*(self.wait_for_allocation(name) for name in self.bsb_names))
        
        total_duration = time.time() - start_all
        self.print_report(total_duration)

    def print_report(self, total_duration):
        print("\n" + "="*40)
        print("📊 PERFORMANCE REPORT")
        print("="*40)
        durations = [r["allocated_time"] for r in self.results.values() if r.get("allocated_time") is not None]
        
        if durations:
            avg_lat = sum(durations) / len(durations)
            max_lat = max(durations)
            p95 = sorted(durations)[int(len(durations) * 0.95)]
            
            print(f"Total BSB:      {self.total_bsb_count}")
            print(f"Total Duration: {total_duration:.2f}s")
            print(f"Throughput:     {len(durations)/total_duration:.2f} sandbox/s")
            print(f"Avg Latency:    {avg_lat:.2f}s")
            print(f"Max Latency:    {max_lat:.2f}s")
            print(f"P95 Latency:    {p95:.2f}s")
            print(f"Success Rate:   {len(durations)/self.total_bsb_count*100:.1f}%")
        else:
            print("No successful allocations recorded.")
        print("="*40)

    def cleanup(self):
        print("🧹 Cleaning up...")
        for name in self.bsb_names:
            try:
                self.custom_api.delete_namespaced_custom_object(GROUP, VERSION, NAMESPACE, BSB_PLURAL, name)
            except Exception as e:
                # Silently ignore deletion errors during cleanup
                pass
        try:
            self.custom_api.delete_namespaced_custom_object(GROUP, VERSION, NAMESPACE, POOL_PLURAL, self.pool_name)
        except Exception as e:
            # Silently ignore deletion errors during cleanup
            pass

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Pool Performance Tester")
    parser.add_argument("--pool-name", type=str, default="perf-pool", help="Pool name (default: perf-pool)")
    parser.add_argument("--pool-size", type=int, default=50, help="Pool size (default: 50)")
    parser.add_argument("--replicas", type=int, default=1, help="Replicas per BatchSandbox (default: 1)")
    parser.add_argument("--bsb-count", type=int, default=50, help="Number of BatchSandboxes to create concurrently (default: 50)")
    parser.add_argument("--namespace", type=str, default="default", help="Kubernetes namespace (default: default)")
    parser.add_argument("--timeout", type=int, default=120, help="Timeout in seconds for each BatchSandbox allocation (default: 120)")
    parser.add_argument("--poll-interval", type=float, default=0.00001, help="Poll interval in seconds for checking BatchSandbox status (default: 0.00001)")
    
    args = parser.parse_args()
    
    # Update global namespace
    NAMESPACE = args.namespace
    
    print(f"🔧 Test Configuration:")
    print(f"   Pool Name:    {args.pool_name}")
    print(f"   Pool Size:    {args.pool_size}")
    print(f"   Replicas:     {args.replicas}")
    print(f"   BSB Count:    {args.bsb_count}")
    print(f"   Namespace:    {args.namespace}")
    print(f"   Timeout:      {args.timeout}s")
    print(f"   Poll Interval: {args.poll_interval}s")
    print()
    
    tester = PoolPerformanceTester(
        pool_name=args.pool_name,
        pool_size=args.pool_size,
        replicas_per_bsb=args.replicas,
        total_bsb_count=args.bsb_count,
        timeout=args.timeout,
        poll_interval=args.poll_interval
    )
    try:
        asyncio.run(tester.run())
    except KeyboardInterrupt:
        print("\nInterrupted by user")
    finally:
        tester.cleanup()

================================================
FILE: kubernetes/hack/update-codegen.sh
================================================
#!/usr/bin/env bash
# Copyright 2025 Alibaba Group Holding Ltd.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit
set -o nounset
set -o pipefail

SCRIPT_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
CODEGEN_PKG=${CODEGEN_PKG:-$(cd "${SCRIPT_ROOT}"; go env GOPATH)/pkg/mod/k8s.io/code-generator@v0.33.0}

if [ ! -d "${CODEGEN_PKG}" ]; then
    echo "code-generator not found at ${CODEGEN_PKG}"
    echo "Installing k8s.io/code-generator@v0.33.0..."
    go install k8s.io/code-generator/cmd/client-gen@v0.33.0
    go install k8s.io/code-generator/cmd/lister-gen@v0.33.0
    go install k8s.io/code-generator/cmd/informer-gen@v0.33.0
fi

source "${CODEGEN_PKG}/kube_codegen.sh"

kube::codegen::gen_client \
    --with-watch \
    --output-dir "${SCRIPT_ROOT}/pkg/client" \
    --output-pkg "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client" \
    --boilerplate "${SCRIPT_ROOT}/hack/boilerplate.go.txt" \
    "${SCRIPT_ROOT}/apis"

echo "Code generation completed successfully!"


================================================
FILE: kubernetes/internal/controller/allocator.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"context"
	"encoding/json"
	gerrors "errors"
	"fmt"
	"slices"
	"strconv"

	corev1 "k8s.io/api/core/v1"
	"sigs.k8s.io/controller-runtime/pkg/client"
	logf "sigs.k8s.io/controller-runtime/pkg/log"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/expectations"
)

var (
	poolResExpectations = expectations.NewResourceVersionExpectation()
)

type AllocationStore interface {
	GetAllocation(ctx context.Context, pool *sandboxv1alpha1.Pool) (*PoolAllocation, error)
	SetAllocation(ctx context.Context, pool *sandboxv1alpha1.Pool, allocation *PoolAllocation) error
}

type annoAllocationStore struct {
	client client.Client
}

func NewAnnoAllocationStore(client client.Client) AllocationStore {
	return &annoAllocationStore{
		client: client,
	}
}

func (store *annoAllocationStore) GetAllocation(ctx context.Context, pool *sandboxv1alpha1.Pool) (*PoolAllocation, error) {
	alloc := &PoolAllocation{
		PodAllocation: make(map[string]string),
	}
	poolResExpectations.Observe(pool)
	anno := pool.GetAnnotations()
	if anno == nil {
		return alloc, nil
	}
	js, ok := anno[AnnoPoolAllocStatusKey]
	if !ok {
		return alloc, nil
	}
	err := json.Unmarshal([]byte(js), alloc)
	if err != nil {
		return nil, err
	}
	return alloc, nil
}

func (store *annoAllocationStore) SetAllocation(ctx context.Context, pool *sandboxv1alpha1.Pool, alloc *PoolAllocation) error {
	if satisfied, unsatisfiedDuration := poolResExpectations.IsSatisfied(pool); !satisfied {
		return fmt.Errorf("pool allocation is not ready, unsatisfiedDuration:%v", unsatisfiedDuration)
	}
	js, err := json.Marshal(alloc)
	if err != nil {
		return err
	}
	old := pool.DeepCopy()
	oldGen := int64(0)
	anno := pool.GetAnnotations()
	if anno == nil {
		anno = map[string]string{}
	}
	str, ok := anno[AnnoPoolAllocGenerationKey]
	if ok {
		oldGen, err = strconv.ParseInt(str, 10, 64)
		if err != nil {
			return err
		}
	}
	gen := strconv.FormatInt(oldGen+1, 10)
	anno[AnnoPoolAllocStatusKey] = string(js)
	anno[AnnoPoolAllocGenerationKey] = gen
	pool.SetAnnotations(anno)
	patch := client.MergeFrom(old)
	if err := store.client.Patch(ctx, pool, patch); err != nil {
		return err
	}
	poolResExpectations.Expect(pool)
	return nil
}

type AllocationSyncer interface {
	SetAllocation(ctx context.Context, sandbox *sandboxv1alpha1.BatchSandbox, allocation *SandboxAllocation) error
	GetAllocation(ctx context.Context, sandbox *sandboxv1alpha1.BatchSandbox) (*SandboxAllocation, error)
	GetRelease(ctx context.Context, sandbox *sandboxv1alpha1.BatchSandbox) (*AllocationRelease, error)
}
type annoAllocationSyncer struct {
	client client.Client
}

func NewAnnoAllocationSyncer(client client.Client) AllocationSyncer {
	return &annoAllocationSyncer{
		client: client,
	}
}

func (syncer *annoAllocationSyncer) SetAllocation(ctx context.Context, sandbox *sandboxv1alpha1.BatchSandbox, allocation *SandboxAllocation) error {
	old, ok := sandbox.DeepCopyObject().(*sandboxv1alpha1.BatchSandbox)
	if !ok {
		return fmt.Errorf("invalid object")
	}
	anno := sandbox.GetAnnotations()
	if anno == nil {
		anno = make(map[string]string)
	}
	js, err := json.Marshal(allocation)
	if err != nil {
		return err
	}
	anno[AnnoAllocStatusKey] = string(js)
	sandbox.SetAnnotations(anno)
	patch := client.MergeFrom(old)
	return syncer.client.Patch(ctx, sandbox, patch)
}

func (syncer *annoAllocationSyncer) GetAllocation(ctx context.Context, sandbox *sandboxv1alpha1.BatchSandbox) (*SandboxAllocation, error) {
	allocation := &SandboxAllocation{
		Pods: make([]string, 0),
	}
	anno := sandbox.GetAnnotations()
	if anno == nil {
		return allocation, nil
	}
	if raw := anno[AnnoAllocStatusKey]; raw != "" {
		err := json.Unmarshal([]byte(raw), allocation)
		if err != nil {
			return nil, err
		}
	}
	return allocation, nil
}

func (syncer *annoAllocationSyncer) GetRelease(ctx context.Context, sandbox *sandboxv1alpha1.BatchSandbox) (*AllocationRelease, error) {
	release := &AllocationRelease{
		Pods: make([]string, 0),
	}
	anno := sandbox.GetAnnotations()
	if anno == nil {
		return release, nil
	}
	if raw := anno[AnnoAllocReleaseKey]; raw != "" {
		err := json.Unmarshal([]byte(raw), release)
		if err != nil {
			return nil, err
		}
	}
	return release, nil
}

type AllocSpec struct {
	// sandboxes need to allocate
	Sandboxes []*sandboxv1alpha1.BatchSandbox
	// pool
	Pool *sandboxv1alpha1.Pool
	// all pods of pool
	Pods []*corev1.Pod
}

type AllocStatus struct {
	// pod allocated to sandbox
	PodAllocation map[string]string
	// pod request count
	PodSupplement int32
}

// Allocator is responsible for managing pod allocation from Pool to BatchSandboxes.
// It performs allocation calculations and persists the allocation state.
type Allocator interface {
	// Schedule computes the allocation of pods to BatchSandboxes based on the current pool state.
	// It returns:
	//   - AllocStatus: the computed allocation state (pod-to-sandbox mapping and required supplement count)
	//   - poolDirty: indicates whether the Pool's allocation state has changed and needs persistence
	//   - error: any error during the scheduling process
	// This method only performs calculation and does not modify the Pool CR directly.
	Schedule(ctx context.Context, spec *AllocSpec) (*AllocStatus, bool, error)

	// PersistPoolAllocation persists the allocation status.
	// This method should be called after Schedule() when poolDirty is true.
	PersistPoolAllocation(ctx context.Context, pool *sandboxv1alpha1.Pool, status *AllocStatus) error
}

type defaultAllocator struct {
	store  AllocationStore
	syncer AllocationSyncer
}

func NewDefaultAllocator(client client.Client) Allocator {
	return &defaultAllocator{
		store:  NewAnnoAllocationStore(client),
		syncer: NewAnnoAllocationSyncer(client),
	}
}

func (allocator *defaultAllocator) Schedule(ctx context.Context, spec *AllocSpec) (*AllocStatus, bool, error) {
	log := logf.FromContext(ctx)
	status, err := allocator.initAllocation(ctx, spec)
	if err != nil {
		return nil, false, err
	}
	availablePods := make([]string, 0)
	for _, pod := range spec.Pods {
		if _, ok := status.PodAllocation[pod.Name]; ok { // allocated
			continue
		}
		if pod.Status.Phase != corev1.PodRunning { // not running
			continue
		}
		availablePods = append(availablePods, pod.Name)
	}
	sandboxToPods := make(map[string][]string)
	for podName, sandboxName := range status.PodAllocation {
		sandboxToPods[sandboxName] = append(sandboxToPods[sandboxName], podName)
	}
	sandboxAlloc, dirtySandboxes, poolAllocate, err := allocator.allocate(ctx, status, sandboxToPods, availablePods, spec.Sandboxes, spec.Pods)
	if err != nil {
		log.Error(err, "allocate failed")
	}
	poolDeallocate, err := allocator.deallocate(ctx, status, sandboxToPods, spec.Sandboxes)
	if err != nil {
		log.Error(err, "deallocate failed")
	}

	poolDirty := poolDeallocate || poolAllocate

	if err := allocator.syncAllocResult(ctx, dirtySandboxes, sandboxAlloc, spec.Sandboxes); err != nil {
		log.Error(err, "sync alloc result failed")
	}
	return status, poolDirty, nil // Do not return the error of sandboxes witch will block pool schedule.
}

func (allocator *defaultAllocator) initAllocation(ctx context.Context, spec *AllocSpec) (*AllocStatus, error) {
	var err error
	status := &AllocStatus{
		PodAllocation: make(map[string]string),
	}
	status.PodAllocation, err = allocator.getPodAllocation(ctx, spec.Pool)
	if err != nil {
		return nil, err
	}
	return status, nil
}

func (allocator *defaultAllocator) allocate(ctx context.Context, status *AllocStatus, sandboxToPods map[string][]string, availablePods []string, sandboxes []*sandboxv1alpha1.BatchSandbox, pods []*corev1.Pod) (map[string][]string, []string, bool, error) {
	errs := make([]error, 0)
	sandboxAlloc := make(map[string][]string)
	dirtySandboxes := make([]string, 0)
	poolDirty := false
	for _, sbx := range sandboxes {
		alloc, remainAvailablePods, sandboxDirty, poolAllocate, err := allocator.doAllocate(ctx, status, sandboxToPods, availablePods, sbx, *sbx.Spec.Replicas)
		availablePods = remainAvailablePods
		if err != nil {
			errs = append(errs, err)
		} else {
			sandboxAlloc[sbx.Name] = alloc
			if sandboxDirty {
				dirtySandboxes = append(dirtySandboxes, sbx.Name)
			}
			if poolAllocate {
				poolDirty = true
			}
		}
	}
	return sandboxAlloc, dirtySandboxes, poolDirty, gerrors.Join(errs...)
}

func (allocator *defaultAllocator) doAllocate(ctx context.Context, status *AllocStatus, sandboxToPods map[string][]string, availablePods []string, sbx *sandboxv1alpha1.BatchSandbox, cnt int32) ([]string, []string, bool, bool, error) {
	sandboxDirty := false
	poolAllocate := false
	sandboxAlloc := make([]string, 0)
	remainAvailablePods := availablePods
	if sbx.DeletionTimestamp != nil {
		return sandboxAlloc, remainAvailablePods, false, false, nil
	}
	sbxAlloc, err := allocator.syncer.GetAllocation(ctx, sbx)
	if err != nil {
		return nil, remainAvailablePods, false, false, err
	}
	remoteAlloc := sbxAlloc.Pods
	allocatedPod := make([]string, 0)
	allocatedPod = append(allocatedPod, remoteAlloc...)
	name := sbx.Name
	if localAlloc, ok := sandboxToPods[name]; ok {
		for _, localPod := range localAlloc {
			if !slices.Contains(remoteAlloc, localPod) {
				sandboxDirty = true
				allocatedPod = append(allocatedPod, localPod)
			}
		}
	}
	sandboxAlloc = append(sandboxAlloc, allocatedPod...) // old allocation
	needAllocateCnt := cnt - int32(len(allocatedPod))
	canAllocateCnt := needAllocateCnt
	if int32(len(availablePods)) < canAllocateCnt {
		canAllocateCnt = int32(len(availablePods))
	}
	pods := availablePods[:canAllocateCnt]
	remainAvailablePods = availablePods[canAllocateCnt:]
	sandboxToPods[name] = pods
	for _, pod := range pods {
		sandboxDirty = true
		status.PodAllocation[pod] = name
		poolAllocate = true
		sandboxAlloc = append(sandboxAlloc, pod) // new allocation
	}
	if canAllocateCnt < needAllocateCnt {
		status.PodSupplement += needAllocateCnt - canAllocateCnt
	}
	return sandboxAlloc, remainAvailablePods, sandboxDirty, poolAllocate, nil
}

func (allocator *defaultAllocator) deallocate(ctx context.Context, status *AllocStatus, sandboxToPods map[string][]string, sandboxes []*sandboxv1alpha1.BatchSandbox) (bool, error) {
	poolDeallocate := false
	errs := make([]error, 0)
	sbxMap := make(map[string]*sandboxv1alpha1.BatchSandbox)
	for _, sandbox := range sandboxes {
		sbxMap[sandbox.Name] = sandbox
		deallocate, err := allocator.doDeallocate(ctx, status, sandboxToPods, sandbox)
		if err != nil {
			errs = append(errs, err)
		} else {
			if deallocate {
				poolDeallocate = true
			}
		}
	}
	// gc deleted sandbox and  batch sandbox
	SandboxGC := make([]string, 0)
	for name := range sandboxToPods {
		if _, ok := sbxMap[name]; !ok {
			SandboxGC = append(SandboxGC, name)
		}
	}
	for _, name := range SandboxGC {
		pods := sandboxToPods[name]
		for _, pod := range pods {
			delete(status.PodAllocation, pod)
			poolDeallocate = true
		}
		delete(sandboxToPods, name)
	}
	return poolDeallocate, gerrors.Join(errs...)
}

func (allocator *defaultAllocator) doDeallocate(ctx context.Context, status *AllocStatus, sandboxToPods map[string][]string, sbx *sandboxv1alpha1.BatchSandbox) (bool, error) {
	deallocate := false
	name := sbx.Name
	allocatedPods, ok := sandboxToPods[name]
	if !ok { // pods is already release to pool
		return false, nil
	}
	toRelease, err := allocator.syncer.GetRelease(ctx, sbx)
	if err != nil {
		return false, err
	}
	for _, pod := range toRelease.Pods {
		delete(status.PodAllocation, pod)
		deallocate = true
	}
	pods := make([]string, 0)
	for _, pod := range allocatedPods {
		if slices.Contains(toRelease.Pods, pod) {
			continue
		}
		pods = append(pods, pod)
	}
	sandboxToPods[name] = pods
	return deallocate, nil
}

func (allocator *defaultAllocator) getPodAllocation(ctx context.Context, pool *sandboxv1alpha1.Pool) (map[string]string, error) {
	alloc, err := allocator.store.GetAllocation(ctx, pool)
	if err != nil {
		return nil, err
	}
	if alloc == nil {
		return map[string]string{}, nil
	}
	return alloc.PodAllocation, nil
}

func (allocator *defaultAllocator) PersistPoolAllocation(ctx context.Context, pool *sandboxv1alpha1.Pool, status *AllocStatus) error {
	alloc := &PoolAllocation{}
	alloc.PodAllocation = status.PodAllocation
	return allocator.store.SetAllocation(ctx, pool, alloc)
}

func (allocator *defaultAllocator) syncAllocResult(ctx context.Context, dirtySandboxes []string, sandboxAlloc map[string][]string, sandboxes []*sandboxv1alpha1.BatchSandbox) error {
	if len(dirtySandboxes) == 0 {
		return nil
	}
	errs := make([]error, 0)
	sbxMap := make(map[string]*sandboxv1alpha1.BatchSandbox)
	for _, sbx := range sandboxes {
		sbxMap[sbx.Name] = sbx
	}
	for _, name := range dirtySandboxes {
		err := allocator.doSyncAllocResult(ctx, sandboxAlloc[name], sbxMap[name])
		if err != nil {
			errs = append(errs, err)
		}
	}
	return gerrors.Join(errs...)
}

func (allocator *defaultAllocator) doSyncAllocResult(ctx context.Context, allocatedPods []string, sbx *sandboxv1alpha1.BatchSandbox) error {
	allocation := &SandboxAllocation{}
	allocation.Pods = allocatedPods
	return allocator.syncer.SetAllocation(ctx, sbx, allocation)
}


================================================
FILE: kubernetes/internal/controller/allocator_mock.go
================================================
// Code generated by MockGen. DO NOT EDIT.
// Source: allocator.go

// Package controller is a generated GoMock package.
package controller

import (
	context "context"
	reflect "reflect"

	gomock "github.com/golang/mock/gomock"

	v1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
)

// MockAllocationStore is a mock of AllocationStore interface.
type MockAllocationStore struct {
	ctrl     *gomock.Controller
	recorder *MockAllocationStoreMockRecorder
}

// MockAllocationStoreMockRecorder is the mock recorder for MockAllocationStore.
type MockAllocationStoreMockRecorder struct {
	mock *MockAllocationStore
}

// NewMockAllocationStore creates a new mock instance.
func NewMockAllocationStore(ctrl *gomock.Controller) *MockAllocationStore {
	mock := &MockAllocationStore{ctrl: ctrl}
	mock.recorder = &MockAllocationStoreMockRecorder{mock}
	return mock
}

// EXPECT returns an object that allows the caller to indicate expected use.
func (m *MockAllocationStore) EXPECT() *MockAllocationStoreMockRecorder {
	return m.recorder
}

// GetAllocation mocks base method.
func (m *MockAllocationStore) GetAllocation(ctx context.Context, pool *v1alpha1.Pool) (*PoolAllocation, error) {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "GetAllocation", ctx, pool)
	ret0, _ := ret[0].(*PoolAllocation)
	ret1, _ := ret[1].(error)
	return ret0, ret1
}

// GetAllocation indicates an expected call of GetAllocation.
func (mr *MockAllocationStoreMockRecorder) GetAllocation(ctx, pool interface{}) *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAllocation", reflect.TypeOf((*MockAllocationStore)(nil).GetAllocation), ctx, pool)
}

// SetAllocation mocks base method.
func (m *MockAllocationStore) SetAllocation(ctx context.Context, pool *v1alpha1.Pool, allocation *PoolAllocation) error {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "SetAllocation", ctx, pool, allocation)
	ret0, _ := ret[0].(error)
	return ret0
}

// SetAllocation indicates an expected call of SetAllocation.
func (mr *MockAllocationStoreMockRecorder) SetAllocation(ctx, pool, allocation interface{}) *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetAllocation", reflect.TypeOf((*MockAllocationStore)(nil).SetAllocation), ctx, pool, allocation)
}

// MockAllocationSyncer is a mock of AllocationSyncer interface.
type MockAllocationSyncer struct {
	ctrl     *gomock.Controller
	recorder *MockAllocationSyncerMockRecorder
}

// MockAllocationSyncerMockRecorder is the mock recorder for MockAllocationSyncer.
type MockAllocationSyncerMockRecorder struct {
	mock *MockAllocationSyncer
}

// NewMockAllocationSyncer creates a new mock instance.
func NewMockAllocationSyncer(ctrl *gomock.Controller) *MockAllocationSyncer {
	mock := &MockAllocationSyncer{ctrl: ctrl}
	mock.recorder = &MockAllocationSyncerMockRecorder{mock}
	return mock
}

// EXPECT returns an object that allows the caller to indicate expected use.
func (m *MockAllocationSyncer) EXPECT() *MockAllocationSyncerMockRecorder {
	return m.recorder
}

// GetAllocation mocks base method.
func (m *MockAllocationSyncer) GetAllocation(ctx context.Context, sandbox *v1alpha1.BatchSandbox) (*SandboxAllocation, error) {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "GetAllocation", ctx, sandbox)
	ret0, _ := ret[0].(*SandboxAllocation)
	ret1, _ := ret[1].(error)
	return ret0, ret1
}

// GetAllocation indicates an expected call of GetAllocation.
func (mr *MockAllocationSyncerMockRecorder) GetAllocation(ctx, sandbox interface{}) *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAllocation", reflect.TypeOf((*MockAllocationSyncer)(nil).GetAllocation), ctx, sandbox)
}

// GetRelease mocks base method.
func (m *MockAllocationSyncer) GetRelease(ctx context.Context, sandbox *v1alpha1.BatchSandbox) (*AllocationRelease, error) {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "GetRelease", ctx, sandbox)
	ret0, _ := ret[0].(*AllocationRelease)
	ret1, _ := ret[1].(error)
	return ret0, ret1
}

// GetRelease indicates an expected call of GetRelease.
func (mr *MockAllocationSyncerMockRecorder) GetRelease(ctx, sandbox interface{}) *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetRelease", reflect.TypeOf((*MockAllocationSyncer)(nil).GetRelease), ctx, sandbox)
}

// SetAllocation mocks base method.
func (m *MockAllocationSyncer) SetAllocation(ctx context.Context, sandbox *v1alpha1.BatchSandbox, allocation *SandboxAllocation) error {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "SetAllocation", ctx, sandbox, allocation)
	ret0, _ := ret[0].(error)
	return ret0
}

// SetAllocation indicates an expected call of SetAllocation.
func (mr *MockAllocationSyncerMockRecorder) SetAllocation(ctx, sandbox, allocation interface{}) *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetAllocation", reflect.TypeOf((*MockAllocationSyncer)(nil).SetAllocation), ctx, sandbox, allocation)
}

// MockAllocator is a mock of Allocator interface.
type MockAllocator struct {
	ctrl     *gomock.Controller
	recorder *MockAllocatorMockRecorder
}

// MockAllocatorMockRecorder is the mock recorder for MockAllocator.
type MockAllocatorMockRecorder struct {
	mock *MockAllocator
}

// NewMockAllocator creates a new mock instance.
func NewMockAllocator(ctrl *gomock.Controller) *MockAllocator {
	mock := &MockAllocator{ctrl: ctrl}
	mock.recorder = &MockAllocatorMockRecorder{mock}
	return mock
}

// EXPECT returns an object that allows the caller to indicate expected use.
func (m *MockAllocator) EXPECT() *MockAllocatorMockRecorder {
	return m.recorder
}

// Schedule mocks base method.
func (m *MockAllocator) Schedule(ctx context.Context, spec *AllocSpec) (*AllocStatus, error) {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "Schedule", ctx, spec)
	ret0, _ := ret[0].(*AllocStatus)
	ret1, _ := ret[1].(error)
	return ret0, ret1
}

// Schedule indicates an expected call of Schedule.
func (mr *MockAllocatorMockRecorder) Schedule(ctx, spec interface{}) *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Schedule", reflect.TypeOf((*MockAllocator)(nil).Schedule), ctx, spec)
}


================================================
FILE: kubernetes/internal/controller/allocator_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"context"
	"reflect"
	"testing"

	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	"github.com/golang/mock/gomock"
	"github.com/stretchr/testify/assert"
)

func TestAllocatorSchedule(t *testing.T) {
	ctrl := gomock.NewController(t)
	store := NewMockAllocationStore(ctrl)
	syncer := NewMockAllocationSyncer(ctrl)
	allocator := &defaultAllocator{
		store:  store,
		syncer: syncer,
	}
	replica1 := int32(1)
	replica2 := int32(2)
	type TestCase struct {
		name         string
		spec         *AllocSpec
		poolAlloc    *PoolAllocation
		sandboxAlloc *SandboxAllocation
		release      *AllocationRelease
		wantStatus   *AllocStatus
	}
	cases := []TestCase{
		{
			name: "normal",
			spec: &AllocSpec{
				Pods: []*corev1.Pod{
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "pod1",
						},
						Status: corev1.PodStatus{
							Phase: corev1.PodRunning,
						},
					},
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "pod2",
						},
						Status: corev1.PodStatus{
							Phase: corev1.PodRunning,
						},
					},
				},
				Pool: &sandboxv1alpha1.Pool{
					ObjectMeta: metav1.ObjectMeta{
						Name: "pool1",
					},
				},
				Sandboxes: []*sandboxv1alpha1.BatchSandbox{
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "sbx1",
						},
						Spec: sandboxv1alpha1.BatchSandboxSpec{
							PoolRef:  "pool1",
							Replicas: &replica1,
						},
					},
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "sbx2",
						},
						Spec: sandboxv1alpha1.BatchSandboxSpec{
							PoolRef:  "pool1",
							Replicas: &replica1,
						},
					},
				},
			},
			poolAlloc: &PoolAllocation{
				PodAllocation: map[string]string{},
			},
			sandboxAlloc: &SandboxAllocation{
				Pods: []string{},
			},
			release: &AllocationRelease{
				Pods: []string{},
			},
			wantStatus: &AllocStatus{
				PodAllocation: map[string]string{
					"pod1": "sbx1",
					"pod2": "sbx2",
				},
				PodSupplement: 0,
			},
		},
		{
			name: "pod not running",
			spec: &AllocSpec{
				Pods: []*corev1.Pod{
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "pod1",
						},
						Status: corev1.PodStatus{
							Phase: corev1.PodRunning,
						},
					},
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "pod2",
						},
						Status: corev1.PodStatus{
							Phase: corev1.PodPending,
						},
					},
				},
				Pool: &sandboxv1alpha1.Pool{
					ObjectMeta: metav1.ObjectMeta{
						Name: "pool1",
					},
				},
				Sandboxes: []*sandboxv1alpha1.BatchSandbox{
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "sbx1",
						},
						Spec: sandboxv1alpha1.BatchSandboxSpec{
							PoolRef:  "pool1",
							Replicas: &replica1,
						},
					},
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "sbx2",
						},
						Spec: sandboxv1alpha1.BatchSandboxSpec{
							PoolRef:  "pool1",
							Replicas: &replica1,
						},
					},
				},
			},
			poolAlloc: &PoolAllocation{
				PodAllocation: map[string]string{},
			},
			sandboxAlloc: &SandboxAllocation{
				Pods: []string{},
			},
			release: &AllocationRelease{
				Pods: []string{},
			},
			wantStatus: &AllocStatus{
				PodAllocation: map[string]string{
					"pod1": "sbx1",
				},
				PodSupplement: 1,
			},
		},
		{
			name: "already partial allocated",
			spec: &AllocSpec{
				Pods: []*corev1.Pod{
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "pod1",
						},
						Status: corev1.PodStatus{
							Phase: corev1.PodRunning,
						},
					},
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "pod2",
						},
						Status: corev1.PodStatus{
							Phase: corev1.PodRunning,
						},
					},
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "pod3",
						},
						Status: corev1.PodStatus{
							Phase: corev1.PodRunning,
						},
					},
				},
				Pool: &sandboxv1alpha1.Pool{
					ObjectMeta: metav1.ObjectMeta{
						Name: "pool1",
					},
				},
				Sandboxes: []*sandboxv1alpha1.BatchSandbox{
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "sbx1",
						},
						Spec: sandboxv1alpha1.BatchSandboxSpec{
							PoolRef:  "pool1",
							Replicas: &replica2,
						},
					},
				},
			},
			poolAlloc: &PoolAllocation{
				PodAllocation: map[string]string{
					"pod1": "sbx1",
				},
			},
			sandboxAlloc: &SandboxAllocation{
				Pods: []string{
					"pod1",
				},
			},
			release: &AllocationRelease{
				Pods: []string{},
			},
			wantStatus: &AllocStatus{
				PodAllocation: map[string]string{
					"pod1": "sbx1",
					"pod2": "sbx1",
				},
				PodSupplement: 0,
			},
		},
		{
			name: "no need allocated with release",
			spec: &AllocSpec{
				Pods: []*corev1.Pod{
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "pod1",
						},
						Status: corev1.PodStatus{
							Phase: corev1.PodRunning,
						},
					},
				},
				Pool: &sandboxv1alpha1.Pool{
					ObjectMeta: metav1.ObjectMeta{
						Name: "pool1",
					},
				},
				Sandboxes: []*sandboxv1alpha1.BatchSandbox{
					{
						ObjectMeta: metav1.ObjectMeta{
							Name: "sbx1",
						},
						Spec: sandboxv1alpha1.BatchSandboxSpec{
							PoolRef:  "pool1",
							Replicas: &replica1,
						},
					},
				},
			},
			poolAlloc: &PoolAllocation{
				PodAllocation: map[string]string{},
			},
			sandboxAlloc: &SandboxAllocation{
				Pods: []string{
					"pod1",
				},
			},
			release: &AllocationRelease{
				Pods: []string{
					"pod1", "sbx1",
				},
			},
			wantStatus: &AllocStatus{
				PodAllocation: map[string]string{},
				PodSupplement: 0,
			},
		},
	}
	for _, c := range cases {
		t.Run(c.name, func(t *testing.T) {
			store.EXPECT().GetAllocation(gomock.Any(), gomock.Any()).Return(c.poolAlloc, nil).Times(1)
			store.EXPECT().SetAllocation(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
			syncer.EXPECT().GetAllocation(gomock.Any(), gomock.Any()).Return(c.sandboxAlloc, nil).Times(len(c.spec.Sandboxes))
			syncer.EXPECT().SetAllocation(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
			syncer.EXPECT().GetRelease(gomock.Any(), gomock.Any()).Return(c.release, nil).Times(len(c.spec.Sandboxes))
			status, _, err := allocator.Schedule(context.Background(), c.spec)
			assert.NoError(t, err)
			assert.True(t, reflect.DeepEqual(c.wantStatus, status))
		})
	}

}


================================================
FILE: kubernetes/internal/controller/apis.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"encoding/json"

	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils"
	pkgutils "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/utils"
)

const (
	AnnoAllocStatusKey           = "sandbox.opensandbox.io/alloc-status"
	AnnoAllocReleaseKey          = "sandbox.opensandbox.io/alloc-release"
	LabelBatchSandboxPodIndexKey = "batch-sandbox.sandbox.opensandbox.io/pod-index"

	AnnoPoolAllocStatusKey     = "pool.opensandbox.io/alloc-status"
	AnnoPoolAllocGenerationKey = "pool.opensandbox.io/alloc-generation"

	FinalizerTaskCleanup = "batch-sandbox.sandbox.opensandbox.io/task-cleanup"
)

// AnnotationSandboxEndpoints Use the exported constant from pkg/utils
var AnnotationSandboxEndpoints = pkgutils.AnnotationEndpoints

type SandboxAllocation struct {
	Pods []string `json:"pods"`
}

type AllocationRelease struct {
	Pods []string `json:"pods"`
}

type PoolAllocation struct {
	PodAllocation map[string]string `json:"podAllocation"`
}

func parseSandboxAllocation(obj metav1.Object) (SandboxAllocation, error) {
	ret := SandboxAllocation{}
	if raw := obj.GetAnnotations()[AnnoAllocStatusKey]; raw != "" {
		if err := json.Unmarshal([]byte(raw), &ret); err != nil {
			return ret, err
		}
	}
	return ret, nil
}

func setSandboxAllocation(obj metav1.Object, alloc SandboxAllocation) {
	if obj.GetAnnotations() == nil {
		obj.SetAnnotations(map[string]string{})
	}
	obj.GetAnnotations()[AnnoAllocStatusKey] = utils.DumpJSON(alloc)
}

func parseSandboxReleased(obj metav1.Object) (AllocationRelease, error) {
	ret := AllocationRelease{}
	if raw := obj.GetAnnotations()[AnnoAllocReleaseKey]; raw != "" {
		if err := json.Unmarshal([]byte(raw), &ret); err != nil {
			return ret, err
		}
	}
	return ret, nil
}


================================================
FILE: kubernetes/internal/controller/batchsandbox_controller.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"context"
	"encoding/json"
	gerrors "errors"
	"fmt"
	"reflect"
	"slices"
	"strconv"
	"strings"
	"sync"
	"time"

	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/fields"
	"k8s.io/apimachinery/pkg/runtime"
	"k8s.io/apimachinery/pkg/types"
	"k8s.io/apimachinery/pkg/util/sets"
	"k8s.io/apimachinery/pkg/util/strategicpatch"
	"k8s.io/client-go/tools/record"
	"k8s.io/client-go/util/retry"
	ctrl "sigs.k8s.io/controller-runtime"
	"sigs.k8s.io/controller-runtime/pkg/client"
	"sigs.k8s.io/controller-runtime/pkg/controller"
	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
	logf "sigs.k8s.io/controller-runtime/pkg/log"
	"sigs.k8s.io/controller-runtime/pkg/reconcile"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/controller/strategy"
	taskscheduler "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/scheduler"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils"
	controllerutils "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/controller"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/expectations"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/fieldindex"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/requeueduration"
)

var (
	BatchSandboxScaleExpectations = expectations.NewScaleExpectations()
	DurationStore                 = requeueduration.DurationStore{}
)

// BatchSandboxReconciler reconciles a BatchSandbox object
type BatchSandboxReconciler struct {
	client.Client
	Scheme         *runtime.Scheme
	Recorder       record.EventRecorder
	taskSchedulers sync.Map
}

// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=sandbox.opensandbox.io,resources=batchsandboxes,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=sandbox.opensandbox.io,resources=batchsandboxes/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=sandbox.opensandbox.io,resources=batchsandboxes/finalizers,verbs=update

// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
// TODO(user): Modify the Reconcile function to compare the state specified by
// the BatchSandbox object against the actual cluster state, and then
// perform operations to make the cluster state reflect the state specified by
// the user.
//
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/reconcile
func (r *BatchSandboxReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
	log := logf.FromContext(ctx)
	var aggErrors []error
	defer func() {
		_ = DurationStore.Pop(req.String())
	}()
	batchSbx := &sandboxv1alpha1.BatchSandbox{}
	if err := r.Get(ctx, client.ObjectKey{
		Namespace: req.Namespace,
		Name:      req.Name,
	}, batchSbx); err != nil {
		if errors.IsNotFound(err) {
			return ctrl.Result{}, nil
		}
		return ctrl.Result{}, err
	}
	// handle expire
	if expireAt := batchSbx.Spec.ExpireTime; expireAt != nil {
		now := time.Now()
		if expireAt.Time.Before(now) {
			if batchSbx.DeletionTimestamp == nil {
				log.Info("batch sandbox expired, delete", "expireAt", expireAt)
				if err := r.Delete(ctx, batchSbx); err != nil {
					if errors.IsNotFound(err) {
						return ctrl.Result{}, nil
					}
					return ctrl.Result{}, err
				}
			}
		} else {
			DurationStore.Push(types.NamespacedName{Namespace: batchSbx.Namespace, Name: batchSbx.Name}.String(), expireAt.Time.Sub(now))
		}
	}

	// task schedule
	taskStrategy := strategy.NewTaskSchedulingStrategy(batchSbx)

	// pool strategy
	poolStrategy := strategy.NewPoolStrategy(batchSbx)

	// handle finalizers
	if batchSbx.DeletionTimestamp == nil {
		if taskStrategy.NeedTaskScheduling() {
			if !controllerutil.ContainsFinalizer(batchSbx, FinalizerTaskCleanup) {
				err := utils.UpdateFinalizer(r.Client, batchSbx, utils.AddFinalizerOpType, FinalizerTaskCleanup)
				if err != nil {
					log.Error(err, "failed to add finalizer", "finalizer", FinalizerTaskCleanup)
				} else {
					log.Info("added finalizer", "finalizer", FinalizerTaskCleanup)
				}
				return ctrl.Result{}, err
			}
		}
	} else {
		if !taskStrategy.NeedTaskScheduling() {
			return ctrl.Result{}, nil
		}
	}

	pods, err := r.listPods(ctx, poolStrategy, batchSbx)
	if err != nil {
		return ctrl.Result{}, fmt.Errorf("failed to list pods %w", err)
	}
	podIndex, err := calPodIndex(poolStrategy, batchSbx, pods)
	if err != nil {
		return ctrl.Result{}, fmt.Errorf("failed to cal pod index %w", err)
	}
	slices.SortStableFunc(pods, utils.MultiPodSorter([]func(a, b *corev1.Pod) int{
		utils.WithPodIndexSorter(podIndex),
		utils.PodNameSorter,
	}).Sort)
	// Normal Mode need scale Pods
	if !poolStrategy.IsPooledMode() {
		err := r.scaleBatchSandbox(ctx, batchSbx, batchSbx.Spec.Template, pods)
		if err != nil {
			return ctrl.Result{}, fmt.Errorf("failed to scale batch sandbox %w", err)
		}
	}

	// TODO merge task status update
	newStatus := batchSbx.Status.DeepCopy()
	newStatus.ObservedGeneration = batchSbx.Generation
	newStatus.Replicas = 0
	newStatus.Allocated = 0
	newStatus.Ready = 0
	ipList := make([]string, len(pods))
	for i, pod := range pods {
		newStatus.Replicas++
		if utils.IsAssigned(pod) {
			newStatus.Allocated++
			ipList[i] = pod.Status.PodIP
		}
		if pod.Status.Phase == corev1.PodRunning && utils.IsPodReady(pod) {
			newStatus.Ready++
		}
	}
	raw, _ := json.Marshal(ipList)
	if batchSbx.Annotations[AnnotationSandboxEndpoints] != string(raw) {
		patchData, _ := json.Marshal(map[string]any{
			"metadata": map[string]any{
				"annotations": map[string]string{
					AnnotationSandboxEndpoints: string(raw),
				},
			},
		})
		obj := &sandboxv1alpha1.BatchSandbox{ObjectMeta: metav1.ObjectMeta{Namespace: batchSbx.Namespace, Name: batchSbx.Name}}
		if err := r.Patch(ctx, obj, client.RawPatch(types.MergePatchType, patchData)); err != nil {
			log.Error(err, "failed to patch annotation", "annotation", AnnotationSandboxEndpoints, "body", string(patchData))
			aggErrors = append(aggErrors, err)
		}
	}
	if !reflect.DeepEqual(newStatus, batchSbx.Status) {
		log.Info("To update BatchSandbox status", "replicas", newStatus.Replicas, "allocated", newStatus.Allocated, "ready", newStatus.Ready)
		if err := r.updateStatus(batchSbx, newStatus); err != nil {
			aggErrors = append(aggErrors, err)
		}
	}

	if taskStrategy.NeedTaskScheduling() {
		// Because tasks are in-memory and there is no event mechanism, periodic reconciliation is required.
		DurationStore.Push(types.NamespacedName{Namespace: batchSbx.Namespace, Name: batchSbx.Name}.String(), 3*time.Second)
		sch, err := r.getTaskScheduler(ctx, batchSbx, pods)
		if err != nil {
			return ctrl.Result{}, err
		}
		if batchSbx.DeletionTimestamp != nil {
			stoppingTasks := sch.StopTask()
			if len(stoppingTasks) > 0 {
				log.Info("stopping tasks", "count", len(stoppingTasks))
			}
		}
		now := time.Now()
		if err = r.scheduleTasks(ctx, sch, batchSbx); err != nil {
			return ctrl.Result{}, fmt.Errorf("failed to schedule tasks, err %w", err)
		} else {
			log.Info("schedule tasks completed", "costMs", time.Since(now).Milliseconds())
		}
		// check task cleanup is finished
		if batchSbx.DeletionTimestamp != nil {
			unfinishedTasks := r.getTasksCleanupUnfinished(batchSbx, sch)
			if len(unfinishedTasks) > 0 {
				log.Info("tasks cleanup is unfinished", "unfinishedCount", len(unfinishedTasks))
			} else {
				var err error
				if controllerutil.ContainsFinalizer(batchSbx, FinalizerTaskCleanup) {
					err = utils.UpdateFinalizer(r.Client, batchSbx, utils.RemoveFinalizerOpType, FinalizerTaskCleanup)
					if err != nil {
						if errors.IsNotFound(err) {
							err = nil
						} else {
							log.Error(err, "failed to remove finalizer", "finalizer", FinalizerTaskCleanup)
						}
					}
				}
				if err == nil {
					r.deleteTaskScheduler(ctx, batchSbx)
					log.Info("task cleanup is finished, removed finalizer", "finalizer", FinalizerTaskCleanup)
				}
				return ctrl.Result{}, err
			}
		}
	}

	return reconcile.Result{RequeueAfter: DurationStore.Pop(req.String())}, gerrors.Join(aggErrors...)
}

func calPodIndex(poolStrategy strategy.PoolStrategy, batchSbx *sandboxv1alpha1.BatchSandbox, pods []*corev1.Pod) (map[string]int, error) {
	podIndex := map[string]int{}
	if poolStrategy.IsPooledMode() {
		// cal index from pool alloc result while using pooling
		alloc, err := parseSandboxAllocation(batchSbx)
		if err != nil {
			return nil, err
		}
		for i := range alloc.Pods {
			podIndex[alloc.Pods[i]] = i
		}
	} else {
		for i := range pods {
			po := pods[i]
			idx, err := parseIndex(po)
			if err != nil {
				return nil, fmt.Errorf("batchsandbox: failed to parse %s/%s index %w", po.Namespace, po.Name, err)
			}
			podIndex[po.Name] = idx
		}
	}
	return podIndex, nil
}

func (r *BatchSandboxReconciler) listPods(ctx context.Context, poolStrategy strategy.PoolStrategy, batchSbx *sandboxv1alpha1.BatchSandbox) ([]*corev1.Pod, error) {
	var ret []*corev1.Pod
	if poolStrategy.IsPooledMode() {
		var (
			allocSet    = make(sets.Set[string])
			releasedSet = make(sets.Set[string])
		)
		alloc, err := parseSandboxAllocation(batchSbx)
		if err != nil {
			return nil, err
		}
		allocSet.Insert(alloc.Pods...)

		released, err := parseSandboxReleased(batchSbx)
		if err != nil {
			return nil, err
		}
		releasedSet.Insert(released.Pods...)

		activePods := allocSet.Difference(releasedSet)
		for name := range activePods {
			pod := &corev1.Pod{}
			// TODO maybe performance is problem
			if err := r.Client.Get(ctx, types.NamespacedName{Namespace: batchSbx.Namespace, Name: name}, pod); err != nil {
				if errors.IsNotFound(err) {
					continue
				}
				return nil, err
			}
			ret = append(ret, pod)
		}
	} else {
		podList := &corev1.PodList{}
		if err := r.Client.List(ctx, podList, &client.ListOptions{
			Namespace:     batchSbx.Namespace,
			FieldSelector: fields.SelectorFromSet(fields.Set{fieldindex.IndexNameForOwnerRefUID: string(batchSbx.UID)}),
		}); err != nil {
			return nil, err
		}
		for i := range podList.Items {
			ret = append(ret, &podList.Items[i])
		}
	}
	return ret, nil
}

func (r *BatchSandboxReconciler) getTaskScheduler(ctx context.Context, batchSbx *sandboxv1alpha1.BatchSandbox, pods []*corev1.Pod) (taskscheduler.TaskScheduler, error) {
	log := logf.FromContext(ctx)
	var tSch taskscheduler.TaskScheduler
	key := types.NamespacedName{Namespace: batchSbx.Namespace, Name: batchSbx.Name}.String()
	val, ok := r.taskSchedulers.Load(key)
	// The reconciler guarantees that it will not concurrently reconcile the same BatchSandbox.
	if !ok {
		policy := sandboxv1alpha1.TaskResourcePolicyRetain
		if batchSbx.Spec.TaskResourcePolicyWhenCompleted != nil {
			policy = *batchSbx.Spec.TaskResourcePolicyWhenCompleted
		}
		taskStrategy := strategy.NewTaskSchedulingStrategy(batchSbx)
		taskSpecs, err := taskStrategy.GenerateTaskSpecs()
		if err != nil {
			return nil, err
		}
		sc, err := taskscheduler.NewTaskScheduler(key, taskSpecs, pods, policy, log)
		if err != nil {
			return nil, fmt.Errorf("new task scheduler err %w", err)
		}
		log.Info("successfully created task scheduler")
		tSch = sc
		r.taskSchedulers.Store(key, sc)
	} else {
		tSch, ok = (val.(taskscheduler.TaskScheduler))
		if !ok {
			return nil, gerrors.New("invalid scheduler type stored")
		}
		// Update the pods list for this scheduler
		tSch.UpdatePods(pods)
	}
	return tSch, nil
}

func (r *BatchSandboxReconciler) deleteTaskScheduler(ctx context.Context, batchSbx *sandboxv1alpha1.BatchSandbox) {
	log := logf.FromContext(ctx)
	log.Info("delete task scheduler")
	key := types.NamespacedName{Namespace: batchSbx.Namespace, Name: batchSbx.Name}.String()
	r.taskSchedulers.Delete(key)
}

func (r *BatchSandboxReconciler) scheduleTasks(ctx context.Context, tSch taskscheduler.TaskScheduler, batchSbx *sandboxv1alpha1.BatchSandbox) error {
	log := logf.FromContext(ctx)
	if err := tSch.Schedule(); err != nil {
		return err
	}
	tasks := tSch.ListTask()
	toReleasedPods := []string{}
	var (
		running, failed, succeed, unknown int32
		pending                           int32
	)
	for i := range len(tasks) {
		task := tasks[i]
		if task.GetPodName() == "" {
			pending++
		} else {
			state := task.GetState()
			if task.IsResourceReleased() {
				toReleasedPods = append(toReleasedPods, task.GetPodName())
			}
			switch state {
			case taskscheduler.RunningTaskState:
				running++
			case taskscheduler.SucceedTaskState:
				succeed++
			case taskscheduler.FailedTaskState:
				failed++
			case taskscheduler.UnknownTaskState:
				unknown++
			}
		}
	}
	if len(toReleasedPods) > 0 {
		log.Info("try to release Pods", "count", len(toReleasedPods))
		if err := r.releasePods(ctx, batchSbx, toReleasedPods); err != nil {
			return err
		}
		log.Info("successfully released Pods", "count", len(toReleasedPods))
	}
	oldStatus := batchSbx.Status
	newStatus := oldStatus.DeepCopy()
	newStatus.ObservedGeneration = batchSbx.Generation
	newStatus.TaskRunning = running
	newStatus.TaskFailed = failed
	newStatus.TaskSucceed = succeed
	newStatus.TaskUnknown = unknown
	newStatus.TaskPending = pending
	if !reflect.DeepEqual(newStatus, oldStatus) {
		log.Info("To update BatchSandbox status", "replicas", newStatus.Replicas, "task_running", newStatus.TaskRunning, "task_succeed", newStatus.TaskSucceed, "task_failed", newStatus.TaskFailed, "task_unknown", newStatus.TaskUnknown, "task_pending", newStatus.TaskPending)
		if err := r.updateStatus(batchSbx, newStatus); err != nil {
			return err
		}
	}
	return nil
}

func (r *BatchSandboxReconciler) getTasksCleanupUnfinished(batchSbx *sandboxv1alpha1.BatchSandbox, tSch taskscheduler.TaskScheduler) []taskscheduler.Task {
	var notReleased []taskscheduler.Task
	for _, task := range tSch.ListTask() {
		if !task.IsResourceReleased() {
			notReleased = append(notReleased, task)
		}
	}
	return notReleased
}

func (r *BatchSandboxReconciler) releasePods(ctx context.Context, batchSbx *sandboxv1alpha1.BatchSandbox, toReleasePods []string) error {
	releasedSet := make(sets.Set[string])
	released, err := parseSandboxReleased(batchSbx)
	if err != nil {
		return err
	}
	releasedSet.Insert(released.Pods...)
	releasedSet.Insert(toReleasePods...)
	newRelease := AllocationRelease{
		Pods: sets.List(releasedSet),
	}
	raw, err := json.Marshal(newRelease)
	if err != nil {
		return fmt.Errorf("Failed to marshal released pod names: %v", err)
	}
	body := utils.DumpJSON(struct {
		MetaData metav1.ObjectMeta `json:"metadata"`
	}{
		MetaData: metav1.ObjectMeta{
			Annotations: map[string]string{
				AnnoAllocReleaseKey: string(raw),
			},
		},
	})
	b := &sandboxv1alpha1.BatchSandbox{
		ObjectMeta: metav1.ObjectMeta{
			Namespace: batchSbx.Namespace,
			Name:      batchSbx.Name,
		},
	}
	return r.Client.Patch(ctx, b, client.RawPatch(types.MergePatchType, []byte(body)))
}

// Normal Mode
func (r *BatchSandboxReconciler) scaleBatchSandbox(ctx context.Context, batchSandbox *sandboxv1alpha1.BatchSandbox, podTemplateSpec *corev1.PodTemplateSpec, pods []*corev1.Pod) error {
	log := logf.FromContext(ctx)
	indexedPodMap := map[int]*corev1.Pod{}
	for i := range pods {
		pod := pods[i]
		BatchSandboxScaleExpectations.ObserveScale(controllerutils.GetControllerKey(batchSandbox), expectations.Create, pod.Name)
		pods = append(pods, pod)
		idx, err := parseIndex(pod)
		if err != nil {
			return fmt.Errorf("failed to parse idx Pod %s, err %w", pod.Name, err)
		}
		indexedPodMap[idx] = pod
	}
	if satisfied, unsatisfiedDuration, dirtyPods := BatchSandboxScaleExpectations.SatisfiedExpectations(controllerutils.GetControllerKey(batchSandbox)); !satisfied {
		log.Info("scale expectation is not satisfied", "unsatisfiedDuration", unsatisfiedDuration, "dirtyPods", dirtyPods)
		DurationStore.Push(types.NamespacedName{Namespace: batchSandbox.Namespace, Name: batchSandbox.Name}.String(), expectations.ExpectationTimeout-unsatisfiedDuration)
		return nil
	}
	// TODO consider supply Pods if Pods is deleted unexpectedly
	var needCreateIndex []int
	// TODO var needDeleteIndex []int
	for i := 0; i < int(*batchSandbox.Spec.Replicas); i++ {
		_, ok := indexedPodMap[i]
		if !ok {
			needCreateIndex = append(needCreateIndex, i)
		}
	}
	// scale
	if len(needCreateIndex) > 0 {
		log.Info("try to create Pods", "count", len(needCreateIndex), "indexes", needCreateIndex)
	}
	for _, idx := range needCreateIndex {
		pod, err := utils.GetPodFromTemplate(podTemplateSpec, batchSandbox, metav1.NewControllerRef(batchSandbox, sandboxv1alpha1.SchemeBuilder.GroupVersion.WithKind("BatchSandbox")))
		if err != nil {
			return err
		}
		// Apply shard patch if available for this index
		if len(batchSandbox.Spec.ShardPatches) > 0 && idx < len(batchSandbox.Spec.ShardPatches) {
			podBytes, err := json.Marshal(pod)
			if err != nil {
				return fmt.Errorf("failed to marshal pod: %w", err)
			}
			patch := batchSandbox.Spec.ShardPatches[idx]
			modifiedPodBytes, err := strategicpatch.StrategicMergePatch(podBytes, patch.Raw, &corev1.Pod{})
			if err != nil {
				return fmt.Errorf("failed to apply shard patch for index %d: %w", idx, err)
			}
			if err := json.Unmarshal(modifiedPodBytes, pod); err != nil {
				return fmt.Errorf("failed to unmarshal patched pod for index %d: %w", idx, err)
			}
		}
		if err := ctrl.SetControllerReference(pod, batchSandbox, r.Scheme); err != nil {
			return err
		}
		pod.Labels[LabelBatchSandboxPodIndexKey] = strconv.Itoa(idx)
		pod.Namespace = batchSandbox.Namespace
		pod.Name = fmt.Sprintf("%s-%d", batchSandbox.Name, idx)
		BatchSandboxScaleExpectations.ExpectScale(controllerutils.GetControllerKey(batchSandbox), expectations.Create, pod.Name)
		if err := r.Create(ctx, pod); err != nil {
			BatchSandboxScaleExpectations.ObserveScale(controllerutils.GetControllerKey(batchSandbox), expectations.Create, pod.Name)
			r.Recorder.Eventf(batchSandbox, corev1.EventTypeWarning, "FailedCreate", "failed to create pod: %v, pod: %v", err, utils.DumpJSON(pod))
			return err
		}
		r.Recorder.Eventf(batchSandbox, corev1.EventTypeNormal, "SuccessfulCreate", "succeed to create pod %s", pod.Name)
	}
	return nil
}

func parseIndex(pod *corev1.Pod) (int, error) {
	if v := pod.Labels[LabelBatchSandboxPodIndexKey]; v != "" {
		return strconv.Atoi(v)
	}
	idx := strings.LastIndex(pod.Name, "-")
	if idx == -1 {
		return -1, gerrors.New("batchsandbox: Invalid pod Name")
	}
	return strconv.Atoi(pod.Name[idx+1:])
}

func (r *BatchSandboxReconciler) updateStatus(batchSandbox *sandboxv1alpha1.BatchSandbox, newStatus *sandboxv1alpha1.BatchSandboxStatus) error {
	return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
		clone := &sandboxv1alpha1.BatchSandbox{}
		if err := r.Get(context.TODO(), types.NamespacedName{Namespace: batchSandbox.Namespace, Name: batchSandbox.Name}, clone); err != nil {
			return err
		}
		clone.Status = *newStatus
		return r.Status().Update(context.TODO(), clone)
	})
}

// SetupWithManager sets up the controller with the Manager.
func (r *BatchSandboxReconciler) SetupWithManager(mgr ctrl.Manager) error {
	return ctrl.NewControllerManagedBy(mgr).
		For(&sandboxv1alpha1.BatchSandbox{}).
		Named("batchsandbox").
		Owns(&corev1.Pod{}).
		WithOptions(controller.Options{MaxConcurrentReconciles: 32}).
		Complete(r)
}


================================================
FILE: kubernetes/internal/controller/batchsandbox_controller_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"context"
	"encoding/json"
	gerrors "errors"
	"fmt"
	"net"
	"reflect"
	"strconv"
	"sync"
	"testing"
	"time"

	"github.com/golang/mock/gomock"
	. "github.com/onsi/ginkgo/v2"
	. "github.com/onsi/gomega"
	corev1 "k8s.io/api/core/v1"
	v1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/fields"
	"k8s.io/apimachinery/pkg/runtime"
	k8sruntime "k8s.io/apimachinery/pkg/runtime"
	"k8s.io/apimachinery/pkg/types"
	"k8s.io/apimachinery/pkg/util/rand"
	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
	"k8s.io/client-go/tools/record"
	"k8s.io/client-go/util/retry"
	"k8s.io/utils/ptr"
	"k8s.io/utils/set"
	"sigs.k8s.io/controller-runtime/pkg/client"
	"sigs.k8s.io/controller-runtime/pkg/client/fake"
	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/controller/strategy"
	taskscheduler "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/scheduler"
	mock_scheduler "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/scheduler/mock"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/fieldindex"
)

func init() {
	testscheme = k8sruntime.NewScheme()
	utilruntime.Must(corev1.AddToScheme(testscheme))
	utilruntime.Must(sandboxv1alpha1.AddToScheme(testscheme))
}

var testscheme *k8sruntime.Scheme

var _ = Describe("BatchSandbox Controller", func() {
	var (
		timeout  = 30 * time.Second
		interval = 5 * time.Second
	)
	// None Pooling Mode
	Context("When create new batch sandbox, create pod base on pod template", func() {
		const resourceBaseName = "test-batch-sandbox"

		ctx := context.Background()

		typeNamespacedName := types.NamespacedName{
			Name:      resourceBaseName,
			Namespace: "default",
		}

		BeforeEach(func() {
			typeNamespacedName.Name = fmt.Sprintf("%s-%s", resourceBaseName, rand.String(5))
			By(fmt.Sprintf("creating the custom resource %s for the Kind BatchSandbox", typeNamespacedName))
			resource := &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Name:      typeNamespacedName.Name,
					Namespace: typeNamespacedName.Namespace,
				},
				Spec: sandboxv1alpha1.BatchSandboxSpec{
					Replicas: ptr.To(int32(3)),
					Template: &v1.PodTemplateSpec{
						Spec: v1.PodSpec{
							Containers: []v1.Container{
								{
									Name:  "main",
									Image: "example.com",
								},
							},
						},
					},
				},
			}
			Expect(k8sClient.Create(ctx, resource)).Should(Succeed())
			bs := &sandboxv1alpha1.BatchSandbox{}
			Eventually(func(g Gomega) {
				g.Expect(k8sClient.Get(ctx, typeNamespacedName, bs)).To(Succeed())
			}, timeout, interval).Should(Succeed())
			By(fmt.Sprintf("wait the custom resource %s created", typeNamespacedName))
		})

		AfterEach(func() {
			resource := &sandboxv1alpha1.BatchSandbox{}
			err := k8sClient.Get(ctx, typeNamespacedName, resource)
			if !errors.IsNotFound(err) {
				Expect(err).NotTo(HaveOccurred())
			} else {
				return
			}
			By(fmt.Sprintf("Cleanup the specific resource instance BatchSandbox %s", typeNamespacedName))
			Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
		})
		It("should successfully create pod, update batch sandbox status, endpoints info", func() {
			wantIPSet := make(set.Set[string])
			podIPMap := make(map[string]string)
			Eventually(func(g Gomega) {
				bs := &sandboxv1alpha1.BatchSandbox{}
				if err := k8sClient.Get(ctx, typeNamespacedName, bs); err != nil {
					return
				}
				allPods := &corev1.PodList{}
				g.Expect(k8sClient.List(ctx, allPods, &client.ListOptions{Namespace: bs.Namespace})).Should(Succeed())
				pods := []*corev1.Pod{}
				for i := range allPods.Items {
					po := &allPods.Items[i]
					if metav1.IsControlledBy(po, bs) {
						pods = append(pods, po)
						if po.Status.PodIP != "" {
							continue
						}
						if i%2 == 0 {
							mockIP := randomIPv4().String()
							wantIPSet.Insert(mockIP)
							podIPMap[po.Name] = mockIP
							po.Status.PodIP = mockIP
							po.Status.Phase = corev1.PodRunning
							po.Status.Conditions = []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}}
							Expect(k8sClient.Status().Update(context.Background(), po)).To(Succeed())
						}
					}
				}
				g.Expect(len(pods)).To(Equal(int(*bs.Spec.Replicas)))
				g.Expect(bs.Status.ObservedGeneration).To(Equal(bs.Generation))
				g.Expect(bs.Status.Replicas).To(Equal(*bs.Spec.Replicas))

				gotIPs := []string{}
				if raw := bs.Annotations[AnnotationSandboxEndpoints]; raw != "" {
					json.Unmarshal([]byte(raw), &gotIPs)
				}

				podIndex, err := calPodIndex(strategy.NewPoolStrategy(bs), bs, pods)
				g.Expect(err).NotTo(HaveOccurred())
				expectedIPs := make([]string, len(pods))
				for _, pod := range pods {
					idx, ok := podIndex[pod.Name]
					g.Expect(ok).To(BeTrue(), fmt.Sprintf("pod %s should have index", pod.Name))
					if pod.Status.PodIP != "" {
						expectedIPs[idx] = pod.Status.PodIP
					} else {
						expectedIPs[idx] = ""
					}
				}
				g.Expect(gotIPs).To(Equal(expectedIPs), "endpoints should be ordered by pod index, unassigned pods should have empty string")
			}, timeout, interval).Should(Succeed())
		})
		It("should successfully correctly create new Pod and update batch sandbox status when user scale out", func() {
			bs := &sandboxv1alpha1.BatchSandbox{}
			Expect(k8sClient.Get(ctx, typeNamespacedName, bs)).Should(Succeed())
			*bs.Spec.Replicas = *bs.Spec.Replicas + 1 // scale out
			Expect(k8sClient.Update(ctx, bs)).Should(Succeed())
			Eventually(func(g Gomega) {
				batchsandbox := &sandboxv1alpha1.BatchSandbox{}
				if err := k8sClient.Get(ctx, typeNamespacedName, batchsandbox); err != nil {
					return
				}
				g.Expect(batchsandbox.Status.ObservedGeneration).To(Equal(batchsandbox.Generation))
				g.Expect(batchsandbox.Status.Replicas).To(Equal(*batchsandbox.Spec.Replicas))
			}, timeout, interval).Should(Succeed())
			Eventually(func(g Gomega) {
				pods := &v1.PodList{}
				g.Expect(k8sClient.List(ctx, pods, &client.ListOptions{
					Namespace:     bs.Namespace,
					FieldSelector: fields.SelectorFromSet(fields.Set{fieldindex.IndexNameForOwnerRefUID: string(bs.UID)}),
				})).Should(Succeed())
				g.Expect(int32(len(pods.Items))).To(Equal(*bs.Spec.Replicas))
			}, timeout, interval).Should(Succeed())
		})
		It("should successfully correctly supply Pod when pod is deleted unexpectedly", func() {
			Eventually(func(g Gomega) {
				bs := &sandboxv1alpha1.BatchSandbox{}
				if err := k8sClient.Get(ctx, typeNamespacedName, bs); err != nil {
					return
				}
				g.Expect(bs.Status.ObservedGeneration).To(Equal(bs.Generation))
				g.Expect(bs.Status.Replicas).To(Equal(*bs.Spec.Replicas))
			}, timeout, interval).Should(Succeed())
			bs := &sandboxv1alpha1.BatchSandbox{}
			Expect(k8sClient.Get(ctx, typeNamespacedName, bs)).Should(Succeed())
			pods := &v1.PodList{}
			Expect(k8sClient.List(ctx, pods, &client.ListOptions{
				Namespace:     bs.Namespace,
				FieldSelector: fields.SelectorFromSet(fields.Set{fieldindex.IndexNameForOwnerRefUID: string(bs.UID)}),
			})).Should(Succeed())
			Expect(int32(len(pods.Items))).To(Equal(*bs.Spec.Replicas))
			// delete first pod
			oldPod := pods.Items[0]
			Expect(k8sClient.Delete(ctx, &oldPod)).Should(Succeed())
			// wait supply pod
			Eventually(func(g Gomega) {
				newPod := &corev1.Pod{}
				if err := k8sClient.Get(ctx, types.NamespacedName{
					Namespace: bs.Namespace,
					Name:      oldPod.Name,
				}, newPod); err != nil {
					return
				}
				g.Expect(newPod.CreationTimestamp).NotTo(Equal(oldPod.CreationTimestamp))
			}, timeout, interval).Should(Succeed())
		})
		It("should delete batch sandbox and related Pods for expired batch sandbox", func() {
			Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error {
				bs := &sandboxv1alpha1.BatchSandbox{}
				if err := k8sClient.Get(ctx, typeNamespacedName, bs); err != nil {
					return err
				}
				bs.Spec.ExpireTime = &metav1.Time{Time: time.Now().Add(3 * time.Second)}
				return k8sClient.Update(ctx, bs)
			})).Should(Succeed())

			Eventually(
				func(g Gomega) {
					bs := &sandboxv1alpha1.BatchSandbox{}
					g.Expect(errors.IsNotFound(k8sClient.Get(ctx, typeNamespacedName, bs))).To(BeTrue())
					allPods := &corev1.PodList{}
					g.Expect(k8sClient.List(ctx, allPods, &client.ListOptions{Namespace: bs.Namespace})).Should(Succeed())
					pods := []*corev1.Pod{}
					for i := range allPods.Items {
						po := &allPods.Items[i]
						if metav1.IsControlledBy(po, bs) {
							pods = append(pods, po)
						}
					}
					g.Expect(len(pods)).To(BeZero())
				},
				timeout, interval).Should(Succeed())
		})
	})

	// None Pooling Mode - Heterogeneous Pods
	Context("When create new batch sandbox with ShardPatches, create heterogeneous pods", func() {
		const resourceBaseName = "test-batch-sandbox-shard"

		ctx := context.Background()

		typeNamespacedName := types.NamespacedName{
			Name:      resourceBaseName,
			Namespace: "default",
		}

		BeforeEach(func() {
			typeNamespacedName.Name = fmt.Sprintf("%s-%s", resourceBaseName, rand.String(5))
			By(fmt.Sprintf("creating the custom resource %s for the Kind BatchSandbox with ShardPatches", typeNamespacedName))
			resource := &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Name:      typeNamespacedName.Name,
					Namespace: typeNamespacedName.Namespace,
				},
				Spec: sandboxv1alpha1.BatchSandboxSpec{
					Replicas: ptr.To(int32(3)),
					Template: &v1.PodTemplateSpec{
						Spec: v1.PodSpec{
							Containers: []v1.Container{
								{
									Name:    "main",
									Image:   "example.com",
									Command: []string{"default-command"},
								},
							},
						},
					},
					ShardPatches: []runtime.RawExtension{
						{
							Raw: []byte(`{"spec":{"containers":[{"name":"main","command":["custom-command-0"]}]}}`),
						},
						{
							Raw: []byte(`{"spec":{"containers":[{"name":"main","command":["custom-command-1"]}]}}`),
						},
						{
							Raw: []byte(`{"spec":{"containers":[{"name":"main","command":["custom-command-2"]}]}}`),
						},
					},
				},
			}
			Expect(k8sClient.Create(ctx, resource)).Should(Succeed())
			bs := &sandboxv1alpha1.BatchSandbox{}
			Eventually(func(g Gomega) {
				g.Expect(k8sClient.Get(ctx, typeNamespacedName, bs)).To(Succeed())
			}, timeout, interval).Should(Succeed())
			By(fmt.Sprintf("wait the custom resource %s created", typeNamespacedName))
		})

		AfterEach(func() {
			resource := &sandboxv1alpha1.BatchSandbox{}
			err := k8sClient.Get(ctx, typeNamespacedName, resource)
			if !errors.IsNotFound(err) {
				Expect(err).NotTo(HaveOccurred())
			} else {
				return
			}
			By(fmt.Sprintf("Cleanup the specific resource instance BatchSandbox %s", typeNamespacedName))
			Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
		})

		It("should successfully create heterogeneous pods with different commands", func() {
			Eventually(func(g Gomega) {
				bs := &sandboxv1alpha1.BatchSandbox{}
				if err := k8sClient.Get(ctx, typeNamespacedName, bs); err != nil {
					return
				}
				allPods := &corev1.PodList{}
				g.Expect(k8sClient.List(ctx, allPods, &client.ListOptions{Namespace: bs.Namespace})).Should(Succeed())
				pods := []*corev1.Pod{}
				for i := range allPods.Items {
					po := &allPods.Items[i]
					if metav1.IsControlledBy(po, bs) {
						pods = append(pods, po)
					}
				}
				g.Expect(len(pods)).To(Equal(int(*bs.Spec.Replicas)))

				// Verify each pod has the correct patched command
				for _, pod := range pods {
					indexLabel := pod.Labels[LabelBatchSandboxPodIndexKey]
					g.Expect(indexLabel).NotTo(BeEmpty())
					idx, err := strconv.Atoi(indexLabel)
					g.Expect(err).NotTo(HaveOccurred())
					g.Expect(idx).To(BeNumerically(">=", 0))
					g.Expect(idx).To(BeNumerically("<", int(*bs.Spec.Replicas)))

					// Verify the command was patched
					g.Expect(len(pod.Spec.Containers)).To(BeNumerically(">", 0))
					mainContainer := pod.Spec.Containers[0]
					expectedCommand := fmt.Sprintf("custom-command-%d", idx)
					g.Expect(mainContainer.Command).To(Equal([]string{expectedCommand}))
				}

				g.Expect(bs.Status.ObservedGeneration).To(Equal(bs.Generation))
				g.Expect(bs.Status.Replicas).To(Equal(*bs.Spec.Replicas))
			}, timeout, interval).Should(Succeed())
		})
	})

	// Pooling Mode
	Context("When create new batch sandbox, get pod from pool", func() {
		const resourceBaseName = "test-batch-sandbox-pooling-mode"
		var replicas int32 = 3
		ctx := context.Background()

		typeNamespacedName := types.NamespacedName{
			Name:      resourceBaseName,
			Namespace: "default",
		}
		BeforeEach(func() {
			typeNamespacedName.Name = fmt.Sprintf("%s-%s", resourceBaseName, rand.String(5))
			By(fmt.Sprintf("creating the custom resource %s for the Kind BatchSandbox", typeNamespacedName))
			resource := &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Name:      typeNamespacedName.Name,
					Namespace: typeNamespacedName.Namespace,
				},
				Spec: sandboxv1alpha1.BatchSandboxSpec{
					Replicas: ptr.To(replicas),
					PoolRef:  "test-pool",
				},
			}
			Expect(k8sClient.Create(ctx, resource)).Should(Succeed())
			bs := &sandboxv1alpha1.BatchSandbox{}
			Eventually(func(g Gomega) {
				g.Expect(k8sClient.Get(ctx, typeNamespacedName, bs)).To(Succeed())
			}, timeout, interval).Should(Succeed())
			By(fmt.Sprintf("wait the custom resource %s created", typeNamespacedName))
		})

		AfterEach(func() {
			resource := &sandboxv1alpha1.BatchSandbox{}
			err := k8sClient.Get(ctx, typeNamespacedName, resource)
			if !errors.IsNotFound(err) {
				Expect(err).NotTo(HaveOccurred())
			}
			By(fmt.Sprintf("Cleanup the specific resource instance BatchSandbox %s", typeNamespacedName))
			Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
		})

		It("should successfully update batch sandbox status, sbx endpoints info when get pod from pool alloc", func() {
			// mock pool allocation
			mockPods := []string{}
			for i := range replicas {
				po := &corev1.Pod{
					ObjectMeta: metav1.ObjectMeta{
						Namespace: typeNamespacedName.Namespace,
						Name:      fmt.Sprintf("test-pod-%d", i),
					},
					Spec: v1.PodSpec{
						Containers: []v1.Container{
							{Name: "main", Image: "test", Command: []string{"hello"}},
						},
					},
				}
				mockPods = append(mockPods, po.Name)
				Expect(k8sClient.Create(context.Background(), po)).To(Succeed())
				if i%2 == 0 {
					po.Spec.NodeName = "node-1.2.3.4"
					po.Status.PodIP = fmt.Sprintf("1.2.3.%d", i+1)
					po.Status.Phase = corev1.PodRunning
					po.Status.Conditions = []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}}
				}
				Expect(k8sClient.Status().Update(context.Background(), po)).To(Succeed())
			}
			Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error {
				bs := &sandboxv1alpha1.BatchSandbox{}
				if err := k8sClient.Get(ctx, typeNamespacedName, bs); err != nil {
					return err
				}
				setSandboxAllocation(bs, SandboxAllocation{Pods: mockPods})
				return k8sClient.Update(ctx, bs)
			})).Should(Succeed())
			By(fmt.Sprintf("Mock pool allocate Pod %v for BatchSandbox %s", mockPods, typeNamespacedName))

			Eventually(func(g Gomega) {
				bs := &sandboxv1alpha1.BatchSandbox{}
				if err := k8sClient.Get(ctx, typeNamespacedName, bs); err != nil {
					return
				}
				g.Expect(bs.Status.ObservedGeneration).To(Equal(bs.Generation))
				g.Expect(bs.Status.Replicas).To(Equal(*bs.Spec.Replicas))

				gotIPs := []string{}
				if raw := bs.Annotations[AnnotationSandboxEndpoints]; raw != "" {
					json.Unmarshal([]byte(raw), &gotIPs)
				}

				alloc, err := parseSandboxAllocation(bs)
				g.Expect(err).NotTo(HaveOccurred())
				expectedIPs := make([]string, len(alloc.Pods))
				for idx, podName := range alloc.Pods {
					pod := &corev1.Pod{}
					err := k8sClient.Get(ctx, types.NamespacedName{Namespace: bs.Namespace, Name: podName}, pod)
					g.Expect(err).NotTo(HaveOccurred())
					if pod.Spec.NodeName != "" || pod.Status.PodIP != "" {
						expectedIPs[idx] = pod.Status.PodIP
					} else {
						expectedIPs[idx] = ""
					}
				}
				g.Expect(gotIPs).To(Equal(expectedIPs), "endpoints should be ordered by pool allocation order, unassigned pods should have empty string")
			}, timeout, interval).Should(Succeed())
		})
	})
})

func randomIPv4() net.IP {
	rand.Seed(time.Now().UnixNano())
	ip := make(net.IP, 4)
	for i := range ip {
		ip[i] = byte(rand.Intn(256))
	}
	return ip
}

var _ = Describe("BatchSandbox Task Scheduler", func() {
	var (
		timeout  = 30 * time.Second
		interval = 5 * time.Second
	)
	// None Pooling mode
	Context("When create new batch sandbox, create pod base on pod template", func() {
		const resourceBaseName = "test-task-batch-sandbox"

		ctx := context.Background()

		typeNamespacedName := types.NamespacedName{
			Name:      resourceBaseName,
			Namespace: "default",
		}

		BeforeEach(func() {
			typeNamespacedName.Name = fmt.Sprintf("%s-%s", resourceBaseName, rand.String(5))
			By(fmt.Sprintf("creating the custom resource %s for the Kind BatchSandbox", typeNamespacedName))
			resource := &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Name:      typeNamespacedName.Name,
					Namespace: typeNamespacedName.Namespace,
				},
				Spec: sandboxv1alpha1.BatchSandboxSpec{
					Replicas: ptr.To(int32(1)),
					Template: &v1.PodTemplateSpec{
						Spec: v1.PodSpec{
							Containers: []v1.Container{
								{
									Name:  "main",
									Image: "example.com",
								},
							},
						},
					},
					TaskTemplate: &sandboxv1alpha1.TaskTemplateSpec{
						Spec: sandboxv1alpha1.TaskSpec{
							Process: &sandboxv1alpha1.ProcessTask{
								Command: []string{"echo", "hello"},
							},
						},
					},
				},
			}
			Expect(k8sClient.Create(ctx, resource)).Should(Succeed())
			bs := &sandboxv1alpha1.BatchSandbox{}
			Eventually(func(g Gomega) {
				g.Expect(k8sClient.Get(ctx, typeNamespacedName, bs)).To(Succeed())
			}, timeout, interval).Should(Succeed())
			By(fmt.Sprintf("wait the custom resource %s created", typeNamespacedName))
		})

		AfterEach(func() {
			resource := &sandboxv1alpha1.BatchSandbox{}
			err := k8sClient.Get(ctx, typeNamespacedName, resource)
			if !errors.IsNotFound(err) {
				Expect(err).NotTo(HaveOccurred())
			} else {
				// resource is already deleted
				return
			}
			By(fmt.Sprintf("Cleanup the specific resource instance BatchSandbox %s", typeNamespacedName))
			Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
		})

		It("should successfully add task cleanup finalizer", func() {
			Eventually(func(g Gomega) {
				bs := &sandboxv1alpha1.BatchSandbox{}
				if err := k8sClient.Get(ctx, typeNamespacedName, bs); err != nil {
					return
				}
				g.Expect(controllerutil.ContainsFinalizer(bs, FinalizerTaskCleanup)).To(BeTrue())
			}, timeout, interval).Should(Succeed())
		})

		It("should successfully update task status(task_pending=1), because all pods is unassigned", func() {
			Eventually(func(g Gomega) {
				bs := &sandboxv1alpha1.BatchSandbox{}
				if err := k8sClient.Get(ctx, typeNamespacedName, bs); err != nil {
					return
				}
				g.Expect(bs.Status.ObservedGeneration).To(Equal(bs.Generation))
				g.Expect(bs.Status.Replicas).To(Equal(*bs.Spec.Replicas))

				g.Expect(bs.Status.TaskPending).To(Equal(*bs.Spec.Replicas))
				g.Expect(bs.Status.TaskRunning).To(Equal(int32(0)))
				g.Expect(bs.Status.TaskSucceed).To(Equal(int32(0)))
				g.Expect(bs.Status.TaskFailed).To(Equal(int32(0)))
				g.Expect(bs.Status.TaskUnknown).To(Equal(int32(0)))
			}, timeout, interval).Should(Succeed())
		})

		It("should successfully delete BatchSandbox when all tasks(including pending task) cleanup is finished", func() {
			bs := &sandboxv1alpha1.BatchSandbox{}
			Expect(k8sClient.Get(ctx, typeNamespacedName, bs)).To(Succeed())
			Eventually(func(g Gomega) {
				bs := &sandboxv1alpha1.BatchSandbox{}
				Expect(k8sClient.Get(ctx, typeNamespacedName, bs)).To(Succeed())
				g.Expect(controllerutil.ContainsFinalizer(bs, FinalizerTaskCleanup)).To(BeTrue())
			}, timeout, interval).Should(Succeed())

			By(fmt.Sprintf("try to Delete BatchSandbox %s", typeNamespacedName))
			Expect(k8sClient.Delete(ctx, bs)).To(Succeed())

			Eventually(func(g Gomega) {
				bs := &sandboxv1alpha1.BatchSandbox{}
				err := k8sClient.Get(ctx, typeNamespacedName, bs)
				g.Expect(errors.IsNotFound(err)).To(BeTrue())
			}, timeout, interval).Should(Succeed())
		})
	})
})

func TestBatchSandboxReconciler_scheduleTasks(t *testing.T) {
	ctrl := gomock.NewController(t)
	defer ctrl.Finish()
	var (
		fakeBatchSandbox = &sandboxv1alpha1.BatchSandbox{
			TypeMeta: metav1.TypeMeta{
				APIVersion: sandboxv1alpha1.GroupVersion.String(),
				Kind:       "BatchSandbox",
			},
			ObjectMeta: metav1.ObjectMeta{
				Name: "test-batch-sandbox",
			},
			Spec:   sandboxv1alpha1.BatchSandboxSpec{},
			Status: sandboxv1alpha1.BatchSandboxStatus{},
		}
	)
	type fields struct {
		Client         client.Client
		Scheme         *runtime.Scheme
		Recorder       record.EventRecorder
		taskSchedulers sync.Map
	}
	type args struct {
		ctx      context.Context
		tSch     taskscheduler.TaskScheduler
		batchSbx *sandboxv1alpha1.BatchSandbox
	}
	tests := []struct {
		name                string
		fields              fields
		args                args
		wantErr             bool
		batchSandboxChecker func(bsbx *sandboxv1alpha1.BatchSandbox) error
	}{
		{
			name: "schedule err",
			args: args{
				tSch: func() taskscheduler.TaskScheduler {
					mockSche := mock_scheduler.NewMockTaskScheduler(ctrl)
					mockSche.EXPECT().Schedule().Return(gerrors.New("err")).Times(1)
					return mockSche
				}(),
			},
			wantErr: true,
		},
		{
			name: "tasks, succeed=1; releasedPod=1",
			fields: fields{
				Client: fake.NewClientBuilder().WithScheme(testscheme).WithObjects(fakeBatchSandbox).WithStatusSubresource(fakeBatchSandbox).Build(),
			},
			args: args{
				tSch: func() taskscheduler.TaskScheduler {
					mockSche := mock_scheduler.NewMockTaskScheduler(ctrl)
					mockSche.EXPECT().Schedule().Return(nil).Times(1)
					mockTask := mock_scheduler.NewMockTask(ctrl)
					mockTask.EXPECT().GetState().Return(taskscheduler.SucceedTaskState).Times(1)
					mockTask.EXPECT().IsResourceReleased().Return(true).Times(1)
					mockTask.EXPECT().GetPodName().Return("pod-0").AnyTimes()
					mockSche.EXPECT().ListTask().Return([]taskscheduler.Task{mockTask}).Times(1)
					return mockSche
				}(),
				batchSbx: fakeBatchSandbox.DeepCopy(),
			},
			batchSandboxChecker: func(bsbx *sandboxv1alpha1.BatchSandbox) error {
				release, err := parseSandboxReleased(bsbx)
				if err != nil {
					return err
				}
				if len(release.Pods) != 1 || release.Pods[0] != "pod-0" {
					return fmt.Errorf("expect pod-0, actual %v", release.Pods)
				}
				//  check status
				if bsbx.Status.TaskSucceed != 1 {
					return fmt.Errorf("expect status.succeed=1, actual %d", bsbx.Status.TaskRunning)
				}
				if bsbx.Status.TaskRunning != 0 || bsbx.Status.TaskFailed != 0 || bsbx.Status.TaskUnknown != 0 {
					return fmt.Errorf("expect status.running=0,failed=0,unknown=0, actual %v", bsbx.Status)
				}
				return nil
			},
		},
	}
	for i := range tests {
		tt := &tests[i]
		t.Run(tt.name, func(t *testing.T) {
			r := &BatchSandboxReconciler{
				Client:   tt.fields.Client,
				Scheme:   tt.fields.Scheme,
				Recorder: tt.fields.Recorder,
			}
			if err := r.scheduleTasks(tt.args.ctx, tt.args.tSch, tt.args.batchSbx); (err != nil) != tt.wantErr {
				t.Errorf("BatchSandboxReconciler.scheduleTasks() error = %v, wantErr %v", err, tt.wantErr)
			}
			if tt.batchSandboxChecker != nil {
				bsbx := &sandboxv1alpha1.BatchSandbox{}
				if err := tt.fields.Client.Get(ctx, types.NamespacedName{Namespace: tt.args.batchSbx.Namespace, Name: tt.args.batchSbx.Name}, bsbx); err != nil {
					t.Errorf("BatchSandboxReconciler Get() error = %v, wantErr %v", err, nil)
				}
				if err := tt.batchSandboxChecker(bsbx); err != nil {
					t.Errorf("BatchSandboxReconciler batchSandboxChecker() error = %v, wantErr %v", err, nil)
				}
			}
		})
	}
}

func Test_parseIndex(t *testing.T) {
	type args struct {
		pod *corev1.Pod
	}
	tests := []struct {
		name    string
		args    args
		want    int
		wantErr bool
	}{
		{
			name: "from label",
			args: args{
				pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{LabelBatchSandboxPodIndexKey: "1"},
					Name: "sbx-0"}},
			},
			want: 1,
		},
		{
			name: "from name",
			args: args{
				pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "sbx-0"}},
			},
			want: 0,
		},
		{
			name: "invalid name",
			args: args{
				pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "sbx"}},
			},
			want:    -1,
			wantErr: true,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got, err := parseIndex(tt.args.pod)
			if (err != nil) != tt.wantErr {
				t.Errorf("parseIndex() error = %v, wantErr %v", err, tt.wantErr)
				return
			}
			if got != tt.want {
				t.Errorf("parseIndex() = %v, want %v", got, tt.want)
			}
		})
	}
}

func Test_calPodIndex(t *testing.T) {
	type args struct {
		batchSbx *sandboxv1alpha1.BatchSandbox
		pods     []*corev1.Pod
	}
	tests := []struct {
		name    string
		args    args
		want    map[string]int
		wantErr bool
	}{
		{
			name: "pool mode - valid allocation",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-batch",
						Namespace: "default",
						Annotations: map[string]string{
							AnnoAllocStatusKey: `{"pods":["pod-0","pod-1","pod-2"]}`,
						},
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						PoolRef: "test-pool",
					},
				},
				pods: []*corev1.Pod{
					{ObjectMeta: metav1.ObjectMeta{Name: "pod-0"}},
					{ObjectMeta: metav1.ObjectMeta{Name: "pod-1"}},
					{ObjectMeta: metav1.ObjectMeta{Name: "pod-2"}},
				},
			},
			want: map[string]int{
				"pod-0": 0,
				"pod-1": 1,
				"pod-2": 2,
			},
			wantErr: false,
		},
		{
			name: "pool mode - allocation annotation missing",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-batch",
						Namespace: "default",
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						PoolRef: "test-pool",
					},
				},
				pods: []*corev1.Pod{
					{ObjectMeta: metav1.ObjectMeta{Name: "pod-0"}},
				},
			},
			want:    map[string]int{},
			wantErr: false,
		},
		{
			name: "pool mode - invalid allocation json",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-batch",
						Namespace: "default",
						Annotations: map[string]string{
							AnnoAllocStatusKey: `invalid-json`,
						},
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						PoolRef: "test-pool",
					},
				},
				pods: []*corev1.Pod{},
			},
			want:    nil,
			wantErr: true,
		},
		{
			name: "pool mode - pods not in allocation list",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-batch",
						Namespace: "default",
						Annotations: map[string]string{
							AnnoAllocStatusKey: `{"pods":["pod-0","pod-1"]}`,
						},
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						PoolRef: "test-pool",
					},
				},
				pods: []*corev1.Pod{
					{ObjectMeta: metav1.ObjectMeta{Name: "pod-0"}},
					{ObjectMeta: metav1.ObjectMeta{Name: "pod-1"}},
					{ObjectMeta: metav1.ObjectMeta{Name: "pod-2"}},
				},
			},
			want: map[string]int{
				"pod-0": 0,
				"pod-1": 1,
			},
			wantErr: false,
		},
		{
			name: "non-pool mode - parse from pod labels",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-batch",
						Namespace: "default",
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						Replicas: ptr.To(int32(3)),
					},
				},
				pods: []*corev1.Pod{
					{ObjectMeta: metav1.ObjectMeta{
						Name:   "test-batch-0",
						Labels: map[string]string{LabelBatchSandboxPodIndexKey: "0"},
					}},
					{ObjectMeta: metav1.ObjectMeta{
						Name:   "test-batch-1",
						Labels: map[string]string{LabelBatchSandboxPodIndexKey: "1"},
					}},
					{ObjectMeta: metav1.ObjectMeta{
						Name:   "test-batch-2",
						Labels: map[string]string{LabelBatchSandboxPodIndexKey: "2"},
					}},
				},
			},
			want: map[string]int{
				"test-batch-0": 0,
				"test-batch-1": 1,
				"test-batch-2": 2,
			},
			wantErr: false,
		},
		{
			name: "non-pool mode - parse from pod names",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-batch",
						Namespace: "default",
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						Replicas: ptr.To(int32(3)),
					},
				},
				pods: []*corev1.Pod{
					{ObjectMeta: metav1.ObjectMeta{Name: "test-batch-0"}},
					{ObjectMeta: metav1.ObjectMeta{Name: "test-batch-1"}},
					{ObjectMeta: metav1.ObjectMeta{Name: "test-batch-2"}},
				},
			},
			want: map[string]int{
				"test-batch-0": 0,
				"test-batch-1": 1,
				"test-batch-2": 2,
			},
			wantErr: false,
		},
		{
			name: "non-pool mode - invalid pod name",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-batch",
						Namespace: "default",
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						Replicas: ptr.To(int32(1)),
					},
				},
				pods: []*corev1.Pod{
					{ObjectMeta: metav1.ObjectMeta{Name: "invalid-name-no-index"}},
				},
			},
			want:    nil,
			wantErr: true,
		},
		{
			name: "non-pool mode - empty pods list",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-batch",
						Namespace: "default",
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						Replicas: ptr.To(int32(0)),
					},
				},
				pods: []*corev1.Pod{},
			},
			want:    map[string]int{},
			wantErr: false,
		},
		{
			name: "non-pool mode - mixed label and name parsing",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-batch",
						Namespace: "default",
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						Replicas: ptr.To(int32(3)),
					},
				},
				pods: []*corev1.Pod{
					{ObjectMeta: metav1.ObjectMeta{
						Name:   "test-batch-0",
						Labels: map[string]string{LabelBatchSandboxPodIndexKey: "5"},
					}},
					{ObjectMeta: metav1.ObjectMeta{Name: "test-batch-1"}},
				},
			},
			want: map[string]int{
				"test-batch-0": 5,
				"test-batch-1": 1,
			},
			wantErr: false,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			poolStrategy := strategy.NewPoolStrategy(tt.args.batchSbx)
			got, err := calPodIndex(poolStrategy, tt.args.batchSbx, tt.args.pods)
			if (err != nil) != tt.wantErr {
				t.Errorf("calPodIndex() error = %v, wantErr %v", err, tt.wantErr)
				return
			}
			if !reflect.DeepEqual(got, tt.want) {
				t.Errorf("calPodIndex() = %v, want %v", got, tt.want)
			}
		})
	}
}


================================================
FILE: kubernetes/internal/controller/pool_controller.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"context"
	"crypto/sha256"
	"encoding/hex"
	gerrors "errors"
	"fmt"
	"sort"
	"time"

	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/equality"
	"k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/fields"
	"k8s.io/apimachinery/pkg/runtime"
	"k8s.io/apimachinery/pkg/types"
	"k8s.io/apimachinery/pkg/util/json"
	"k8s.io/client-go/tools/record"
	"k8s.io/client-go/util/retry"
	ctrl "sigs.k8s.io/controller-runtime"
	"sigs.k8s.io/controller-runtime/pkg/builder"
	"sigs.k8s.io/controller-runtime/pkg/client"
	"sigs.k8s.io/controller-runtime/pkg/event"
	"sigs.k8s.io/controller-runtime/pkg/handler"
	logf "sigs.k8s.io/controller-runtime/pkg/log"
	"sigs.k8s.io/controller-runtime/pkg/predicate"
	"sigs.k8s.io/controller-runtime/pkg/reconcile"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils"
	controllerutils "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/controller"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/expectations"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/fieldindex"
)

const (
	defaultRetryTime = 5 * time.Second
)

const (
	LabelPoolName     = "sandbox.opensandbox.io/pool-name"
	LabelPoolRevision = "sandbox.opensandbox.io/pool-revision"
)

var (
	PoolScaleExpectations = expectations.NewScaleExpectations()
)

// PoolReconciler reconciles a Pool object
type PoolReconciler struct {
	client.Client
	Scheme    *runtime.Scheme
	Recorder  record.EventRecorder
	Allocator Allocator
}

// +kubebuilder:rbac:groups=sandbox.opensandbox.io,resources=pools,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=sandbox.opensandbox.io,resources=pools/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=sandbox.opensandbox.io,resources=pools/finalizers,verbs=update
// +kubebuilder:rbac:groups=sandbox.opensandbox.io,resources=batchsandboxes,verbs=get;list;watch;patch
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=pods/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;update;patch;delete

func (r *PoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
	log := logf.FromContext(ctx)
	// Fetch the Pool instance
	pool := &sandboxv1alpha1.Pool{}
	if err := r.Get(ctx, req.NamespacedName, pool); err != nil {
		if errors.IsNotFound(err) {
			// Pool resource not found, could have been deleted
			controllerKey := req.NamespacedName.String()
			PoolScaleExpectations.DeleteExpectations(controllerKey)
			log.Info("Pool resource not found, cleaned up scale expectations", "pool", controllerKey)
			return ctrl.Result{}, nil
		}
		// Error reading the object - requeue the request
		log.Error(err, "Failed to get Pool")
		return ctrl.Result{}, err
	}
	if !pool.DeletionTimestamp.IsZero() {
		controllerKey := controllerutils.GetControllerKey(pool)
		PoolScaleExpectations.DeleteExpectations(controllerKey)
		log.Info("Pool resource is being deleted, cleaned up scale expectations", "pool", controllerKey)
		return ctrl.Result{}, nil
	}

	// List all pods of the pool
	podList := &corev1.PodList{}
	if err := r.List(ctx, podList, &client.ListOptions{
		Namespace:     pool.Namespace,
		FieldSelector: fields.SelectorFromSet(fields.Set{fieldindex.IndexNameForOwnerRefUID: string(pool.UID)}),
	}); err != nil {
		log.Error(err, "Failed to list pods")
		return reconcile.Result{}, err
	}
	pods := make([]*corev1.Pod, 0, len(podList.Items))
	for i := range podList.Items {
		pod := podList.Items[i]
		PoolScaleExpectations.ObserveScale(controllerutils.GetControllerKey(pool), expectations.Create, pod.Name)
		if pod.DeletionTimestamp.IsZero() {
			pods = append(pods, &pod)
		}
	}

	// List all batch sandboxes  ref to the pool
	batchSandboxList := &sandboxv1alpha1.BatchSandboxList{}
	if err := r.List(ctx, batchSandboxList, &client.ListOptions{
		Namespace:     pool.Namespace,
		FieldSelector: fields.SelectorFromSet(fields.Set{fieldindex.IndexNameForPoolRef: pool.Name}),
	}); err != nil {
		log.Error(err, "Failed to list batch sandboxes")
		return reconcile.Result{}, err
	}
	batchSandboxes := make([]*sandboxv1alpha1.BatchSandbox, 0, len(batchSandboxList.Items))
	for i := range batchSandboxList.Items {
		batchSandbox := batchSandboxList.Items[i]
		if batchSandbox.Spec.Template != nil {
			continue
		}
		batchSandboxes = append(batchSandboxes, &batchSandbox)
	} // Main reconciliation logic
	return r.reconcilePool(ctx, pool, batchSandboxes, pods)
}

// reconcilePool contains the main reconciliation logic
func (r *PoolReconciler) reconcilePool(ctx context.Context, pool *sandboxv1alpha1.Pool, batchSandboxes []*sandboxv1alpha1.BatchSandbox, pods []*corev1.Pod) (ctrl.Result, error) {
	log := logf.FromContext(ctx)
	var result ctrl.Result

	err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
		// 1. Get latest Pool CR
		latestPool := &sandboxv1alpha1.Pool{}
		if err := r.Get(ctx, client.ObjectKeyFromObject(pool), latestPool); err != nil {
			return err
		}

		// 2. Schedule and allocate
		podAllocation, idlePods, supplySandbox, poolDirty, err := r.scheduleSandbox(ctx, latestPool, batchSandboxes, pods)
		if err != nil {
			return err
		}

		needReconcile := false
		delay := time.Duration(0)
		if supplySandbox > 0 && len(idlePods) > 0 { // Some idle pods may be pending, retry schedule later.
			needReconcile = true
			delay = defaultRetryTime
		}
		if int32(len(idlePods)) >= supplySandbox { // Some pods may be pending, no need to create again.
			supplySandbox = 0
		} else {
			supplySandbox -= int32(len(idlePods))
		}

		// 3. Persist allocation if needed (Update Annotations)
		if poolDirty {
			if err := r.Allocator.PersistPoolAllocation(ctx, latestPool, &AllocStatus{PodAllocation: podAllocation}); err != nil {
				log.Error(err, "Failed to persist pool allocation")
				return err
			}
		}

		// 4. Update revision and scale (Scaling involves Pod creation/deletion, not Pool CR update)
		latestRevision, err := r.calculateRevision(latestPool)
		if err != nil {
			return err
		}
		latestIdlePods, deleteOld, supplyNew := r.updatePool(latestRevision, pods, idlePods)

		args := &scaleArgs{
			latestRevision: latestRevision,
			pool:           latestPool,
			pods:           pods,
			allocatedCnt:   int32(len(podAllocation)),
			idlePods:       latestIdlePods,
			redundantPods:  deleteOld,
			supplyCnt:      supplySandbox + supplyNew,
		}
		if err := r.scalePool(ctx, args); err != nil {
			return err
		}

		// 5. Update Status (using latestPool which has updated ResourceVersion)
		if err := r.updatePoolStatus(ctx, latestRevision, latestPool, pods, podAllocation); err != nil {
			return err
		}

		if needReconcile {
			result = ctrl.Result{RequeueAfter: delay}
		}
		return nil
	})

	return result, err
}

func (r *PoolReconciler) calculateRevision(pool *sandboxv1alpha1.Pool) (string, error) {
	template, err := json.Marshal(pool.Spec.Template)
	if err != nil {
		return "", err
	}
	revision := sha256.Sum256(template)
	return hex.EncodeToString(revision[:8]), nil
}

// SetupWithManager sets up the controller with the Manager.
// Todo pod deletion expectations
func (r *PoolReconciler) SetupWithManager(mgr ctrl.Manager) error {
	filterBatchSandbox := predicate.Funcs{
		CreateFunc: func(e event.CreateEvent) bool {
			bsb, ok := e.Object.(*sandboxv1alpha1.BatchSandbox)
			if !ok {
				return false
			}
			return bsb.Spec.PoolRef != ""
		},
		UpdateFunc: func(e event.UpdateEvent) bool {
			oldObj, okOld := e.ObjectOld.(*sandboxv1alpha1.BatchSandbox)
			newObj, okNew := e.ObjectNew.(*sandboxv1alpha1.BatchSandbox)
			if !okOld || !okNew {
				return false
			}
			if newObj.Spec.PoolRef == "" {
				return false
			}
			oldVal := oldObj.Annotations[AnnoAllocReleaseKey]
			newVal := newObj.Annotations[AnnoAllocReleaseKey]
			if oldVal != newVal {
				return true
			}
			if oldObj.Spec.Replicas != newObj.Spec.Replicas {
				return true
			}
			return false
		},
		DeleteFunc: func(e event.DeleteEvent) bool {
			bsb, ok := e.Object.(*sandboxv1alpha1.BatchSandbox)
			if !ok {
				return false
			}
			return bsb.Spec.PoolRef != ""
		},
		GenericFunc: func(e event.GenericEvent) bool {
			bsb, ok := e.Object.(*sandboxv1alpha1.BatchSandbox)
			if !ok {
				return false
			}
			return bsb.Spec.PoolRef != ""
		},
	}

	findPoolForBatchSandbox := func(ctx context.Context, obj client.Object) []reconcile.Request {
		log := logf.FromContext(ctx)
		batchSandbox, ok := obj.(*sandboxv1alpha1.BatchSandbox)
		if !ok {
			log.Error(nil, "Invalid object type, expected BatchSandbox")
			return nil
		}
		return []reconcile.Request{
			{
				NamespacedName: types.NamespacedName{
					Namespace: batchSandbox.Namespace,
					Name:      batchSandbox.Spec.PoolRef,
				},
			},
		}
	}

	return ctrl.NewControllerManagedBy(mgr).
		For(&sandboxv1alpha1.Pool{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})).
		Owns(&corev1.Pod{}).
		Watches(
			&sandboxv1alpha1.BatchSandbox{},
			handler.EnqueueRequestsFromMapFunc(findPoolForBatchSandbox),
			builder.WithPredicates(filterBatchSandbox),
		).
		Named("pool").
		Complete(r)
}

func (r *PoolReconciler) scheduleSandbox(ctx context.Context, pool *sandboxv1alpha1.Pool, batchSandboxes []*sandboxv1alpha1.BatchSandbox, pods []*corev1.Pod) (map[string]string, []string, int32, bool, error) {
	spec := &AllocSpec{
		Sandboxes: batchSandboxes,
		Pool:      pool,
		Pods:      pods,
	}
	status, poolDirty, err := r.Allocator.Schedule(ctx, spec)
	if err != nil {
		return nil, nil, 0, false, err
	}
	idlePods := make([]string, 0)
	for _, pod := range pods {
		if _, ok := status.PodAllocation[pod.Name]; !ok {
			idlePods = append(idlePods, pod.Name)
		}
	}
	return status.PodAllocation, idlePods, status.PodSupplement, poolDirty, nil
}

func (r *PoolReconciler) updatePool(latestRevision string, pods []*corev1.Pod, idlePods []string) ([]string, []string, int32) {
	podMap := make(map[string]*corev1.Pod)
	for _, pod := range pods {
		podMap[pod.Name] = pod
	}
	latestIdlePods := make([]string, 0)
	deleteOld := make([]string, 0)
	supplyNew := int32(0)

	for _, name := range idlePods {
		pod, ok := podMap[name]
		if !ok {
			continue
		}
		revision := pod.Labels[LabelPoolRevision]
		if revision == latestRevision {
			latestIdlePods = append(latestIdlePods, name)
		} else {
			// Rolling: (1) delete old idle pods (2) create latest pods
			deleteOld = append(deleteOld, name)
			supplyNew++
		}
	}
	return latestIdlePods, deleteOld, supplyNew
}

type scaleArgs struct {
	latestRevision string
	pool           *sandboxv1alpha1.Pool
	pods           []*corev1.Pod
	allocatedCnt   int32
	supplyCnt      int32 // to create
	idlePods       []string
	redundantPods  []string
}

func (r *PoolReconciler) scalePool(ctx context.Context, args *scaleArgs) error {
	log := logf.FromContext(ctx)
	errs := make([]error, 0)
	pool := args.pool
	pods := args.pods
	if satisfied, unsatisfiedDuration, dirtyPods := PoolScaleExpectations.SatisfiedExpectations(controllerutils.GetControllerKey(pool)); !satisfied {
		log.Info("Pool scale is not ready, requeue", "unsatisfiedDuration", unsatisfiedDuration, "dirtyPods", dirtyPods)
		return fmt.Errorf("pool scale is not ready, %v", pool.Name)
	}
	totalCnt := int32(len(args.pods))
	allocatedCnt := args.allocatedCnt
	supplyCnt := args.supplyCnt
	redundantPods := args.redundantPods
	bufferCnt := totalCnt - allocatedCnt

	// Calculate desired buffer cnt.
	desiredBufferCnt := bufferCnt
	if bufferCnt < pool.Spec.CapacitySpec.BufferMin || bufferCnt > pool.Spec.CapacitySpec.BufferMax {
		desiredBufferCnt = (pool.Spec.CapacitySpec.BufferMin + pool.Spec.CapacitySpec.BufferMax) / 2
	}

	// Calculate desired total cnt.
	desiredTotalCnt := allocatedCnt + supplyCnt + desiredBufferCnt
	if desiredTotalCnt < pool.Spec.CapacitySpec.PoolMin {
		desiredTotalCnt = pool.Spec.CapacitySpec.PoolMin
	} else if desiredTotalCnt > pool.Spec.CapacitySpec.PoolMax {
		desiredTotalCnt = pool.Spec.CapacitySpec.PoolMax
	}

	if desiredTotalCnt > totalCnt { // Need to create pod
		createCnt := desiredTotalCnt - totalCnt
		for i := int32(0); i < createCnt; i++ {
			if err := r.createPoolPod(ctx, pool, args.latestRevision); err != nil {
				log.Error(err, "Failed to create pool pod")
				errs = append(errs, err)
			}
		}
	} else if desiredTotalCnt < totalCnt || len(redundantPods) > 0 { // Need to delete pod
		scaleIn := int32(0)
		if desiredTotalCnt < totalCnt {
			scaleIn = totalCnt - desiredTotalCnt
		}
		podsToDelete := r.pickPodsToDelete(pods, args.idlePods, args.redundantPods, scaleIn)
		for _, pod := range podsToDelete {
			if err := r.Delete(ctx, pod); err != nil {
				log.Error(err, "Failed to delete pool pod")
				errs = append(errs, err)
			}
		}
	}
	return gerrors.Join(errs...)
}

func (r *PoolReconciler) updatePoolStatus(ctx context.Context, latestRevision string, pool *sandboxv1alpha1.Pool, pods []*corev1.Pod, podAllocation map[string]string) error {
	oldStatus := pool.Status.DeepCopy()
	availableCnt := int32(0)
	for _, pod := range pods {
		if _, ok := podAllocation[pod.Name]; ok {
			continue
		}
		if pod.Status.Phase != corev1.PodRunning {
			continue
		}
		availableCnt++
	}
	pool.Status.ObservedGeneration = pool.Generation
	pool.Status.Total = int32(len(pods))
	pool.Status.Allocated = int32(len(podAllocation))
	pool.Status.Available = availableCnt
	pool.Status.Revision = latestRevision
	if equality.Semantic.DeepEqual(oldStatus, pool.Status) {
		return nil
	}
	if err := r.Status().Update(ctx, pool); err != nil {
		return err
	}
	return nil
}

func (r *PoolReconciler) pickPodsToDelete(pods []*corev1.Pod, idlePodNames []string, redundantPodNames []string, scaleIn int32) []*corev1.Pod {
	var idlePods []*corev1.Pod
	podMap := make(map[string]*corev1.Pod)
	for _, pod := range pods {
		podMap[pod.Name] = pod
	}
	for _, name := range idlePodNames {
		pod, ok := podMap[name]
		if !ok {
			continue
		}
		idlePods = append(idlePods, pod)
	}

	sort.Slice(idlePods, func(i, j int) bool {
		return idlePods[i].CreationTimestamp.Before(&idlePods[j].CreationTimestamp)
	})
	var podsToDelete []*corev1.Pod
	for _, name := range redundantPodNames { // delete pod from pool update
		pod, ok := podMap[name]
		if !ok {
			continue
		}
		podsToDelete = append(podsToDelete, pod)
	}
	for _, pod := range idlePods { // delete pod from pool scale
		if scaleIn <= 0 {
			break
		}
		if pod.DeletionTimestamp == nil {
			podsToDelete = append(podsToDelete, pod)
		}
		scaleIn -= 1
	}
	return podsToDelete
}

func (r *PoolReconciler) createPoolPod(ctx context.Context, pool *sandboxv1alpha1.Pool, latestRevision string) error {
	pod, err := utils.GetPodFromTemplate(pool.Spec.Template, pool, metav1.NewControllerRef(pool, sandboxv1alpha1.SchemeBuilder.GroupVersion.WithKind("Pool")))
	if err != nil {
		return err
	}
	pod.Namespace = pool.Namespace
	pod.Name = ""
	pod.GenerateName = pool.Name + "-"
	pod.Labels[LabelPoolName] = pool.Name
	pod.Labels[LabelPoolRevision] = latestRevision
	if err := ctrl.SetControllerReference(pool, pod, r.Scheme); err != nil {
		return err
	}
	if err := r.Create(ctx, pod); err != nil {
		r.Recorder.Eventf(pool, corev1.EventTypeWarning, "FailedCreate", "Failed to create pool pod: %v", err)
		return err
	}
	PoolScaleExpectations.ExpectScale(controllerutils.GetControllerKey(pool), expectations.Create, pod.Name)
	r.Recorder.Eventf(pool, corev1.EventTypeNormal, "SuccessfulCreate", "Created pool pod: %v", pod.Name)
	return nil
}


================================================
FILE: kubernetes/internal/controller/pool_controller_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"context"
	"encoding/json"
	"time"

	v1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/fields"
	"k8s.io/apimachinery/pkg/types"
	"k8s.io/apimachinery/pkg/util/rand"
	"k8s.io/client-go/util/retry"
	"k8s.io/utils/ptr"
	kclient "sigs.k8s.io/controller-runtime/pkg/client"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/fieldindex"
	. "github.com/onsi/ginkgo/v2"
	. "github.com/onsi/gomega"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
)

var _ = Describe("Pool scale", func() {
	var (
		timeout  = 10 * time.Second
		interval = 1 * time.Second
	)
	Context("When reconciling a resource", func() {
		const resourceName = "pool-scale-test"

		ctx := context.Background()

		typeNamespacedName := types.NamespacedName{
			Name:      resourceName,
			Namespace: "default",
		}
		BeforeEach(func() {
			By("creating the custom resource for the Kind Pool")
			typeNamespacedName.Name = resourceName + "-" + rand.String(8)
			resource := &sandboxv1alpha1.Pool{
				ObjectMeta: metav1.ObjectMeta{
					Name:      typeNamespacedName.Name,
					Namespace: typeNamespacedName.Namespace,
				},
				Spec: sandboxv1alpha1.PoolSpec{
					Template: &v1.PodTemplateSpec{
						Spec: v1.PodSpec{
							Containers: []v1.Container{
								{
									Name:  "main",
									Image: "example.com",
								},
							},
						},
					},
					CapacitySpec: sandboxv1alpha1.CapacitySpec{
						PoolMin:   0,
						PoolMax:   2,
						BufferMin: 1,
						BufferMax: 1,
					},
				},
			}
			Expect(k8sClient.Create(ctx, resource)).To(Succeed())
			Eventually(func(g Gomega) {
				pool := &sandboxv1alpha1.Pool{}
				err := k8sClient.Get(ctx, typeNamespacedName, pool)
				g.Expect(err).NotTo(HaveOccurred())
				cnt := min(pool.Spec.CapacitySpec.PoolMax, pool.Spec.CapacitySpec.BufferMin)
				g.Expect(pool.Status.ObservedGeneration).To(Equal(pool.Generation))
				g.Expect(pool.Status.Total).To(Equal(cnt))
			}, timeout, interval).Should(Succeed())
		})

		AfterEach(func() {
			resource := &sandboxv1alpha1.Pool{}
			err := k8sClient.Get(ctx, typeNamespacedName, resource)
			if err != nil {
				if !errors.IsNotFound(err) {
					Expect(err).NotTo(HaveOccurred())
				} else {
					By("The specific resource instance Pool already deleted")
					return
				}
			}
			By("Cleanup the specific resource instance Pool")
			Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
		})
		It("should successfully update pool status", func() {
			pool := &sandboxv1alpha1.Pool{}
			Eventually(func(g Gomega) {
				if err := k8sClient.Get(ctx, typeNamespacedName, pool); err != nil {
					return
				}
				cnt := min(pool.Spec.CapacitySpec.PoolMax, pool.Spec.CapacitySpec.BufferMin)
				g.Expect(pool.Status.ObservedGeneration).To(Equal(pool.Generation))
				g.Expect(pool.Status.Total).To(Equal(cnt))
			}, timeout, interval).Should(Succeed())
		})
		It("should successfully scale out pool buffer size", func() {
			pool := &sandboxv1alpha1.Pool{}
			Expect(k8sClient.Get(ctx, typeNamespacedName, pool)).To(Succeed())
			pool.Spec.CapacitySpec.BufferMin = 2
			pool.Spec.CapacitySpec.BufferMax = 2
			Expect(k8sClient.Update(ctx, pool)).To(Succeed())
			Eventually(func(g Gomega) {
				if err := k8sClient.Get(ctx, typeNamespacedName, pool); err != nil {
					return
				}
				cnt := int32(2)
				g.Expect(pool.Status.ObservedGeneration).To(Equal(pool.Generation))
				g.Expect(pool.Status.Total).To(Equal(cnt))
			}, timeout, interval).Should(Succeed())
		})
		It("should successfully scale out buffer limit by pool max", func() {
			pool := &sandboxv1alpha1.Pool{}
			Expect(k8sClient.Get(ctx, typeNamespacedName, pool)).To(Succeed())
			pool.Spec.CapacitySpec.PoolMax = 2
			pool.Spec.CapacitySpec.BufferMin = 3
			pool.Spec.CapacitySpec.BufferMax = 3
			Expect(k8sClient.Update(ctx, pool)).To(Succeed())
			Eventually(func(g Gomega) {
				if err := k8sClient.Get(ctx, typeNamespacedName, pool); err != nil {
					return
				}
				cnt := int32(2)
				g.Expect(pool.Status.ObservedGeneration).To(Equal(pool.Generation))
				g.Expect(pool.Status.Total).To(Equal(cnt))
			}, timeout, interval).Should(Succeed())
		})
		It("should successfully scale in pool buffer size", func() {
			pool := &sandboxv1alpha1.Pool{}
			Expect(k8sClient.Get(ctx, typeNamespacedName, pool)).To(Succeed())
			pool.Spec.CapacitySpec.BufferMin = 0
			pool.Spec.CapacitySpec.BufferMax = 0
			Expect(k8sClient.Update(ctx, pool)).To(Succeed())
			Eventually(func(g Gomega) {
				pool := &sandboxv1alpha1.Pool{}
				if err := k8sClient.Get(ctx, typeNamespacedName, pool); err != nil {
					return
				}
				cnt := int32(0)
				g.Expect(pool.Status.ObservedGeneration).To(Equal(pool.Generation))
				g.Expect(pool.Status.Total).To(Equal(cnt))
			}, timeout, interval).Should(Succeed())
		})
		It("should successfully scale in buffer limit by pool min", func() {
			pool := &sandboxv1alpha1.Pool{}
			Expect(k8sClient.Get(ctx, typeNamespacedName, pool)).To(Succeed())
			pool.Spec.CapacitySpec.PoolMax = 1
			pool.Spec.CapacitySpec.PoolMin = 1
			pool.Spec.CapacitySpec.BufferMin = 0
			pool.Spec.CapacitySpec.BufferMax = 0
			Expect(k8sClient.Update(ctx, pool)).To(Succeed())
			Eventually(func(g Gomega) {
				if err := k8sClient.Get(ctx, typeNamespacedName, pool); err != nil {
					return
				}
				cnt := int32(1)
				g.Expect(pool.Status.ObservedGeneration).To(Equal(pool.Generation))
				g.Expect(pool.Status.Total).To(Equal(cnt))
			}, timeout, interval).Should(Succeed())
		})
	})
})

var _ = Describe("Pool update", func() {
	var (
		timeout  = 10 * time.Second
		interval = 1 * time.Second
	)
	Context("When reconciling a resource", func() {
		const resourceName = "pool-update-test"

		ctx := context.Background()

		typeNamespacedName := types.NamespacedName{
			Name:      resourceName,
			Namespace: "default",
		}

		BeforeEach(func() {
			By("creating the custom resource for the Kind Pool")
			typeNamespacedName.Name = resourceName + "-" + rand.String(8)
			resource := &sandboxv1alpha1.Pool{
				ObjectMeta: metav1.ObjectMeta{
					Name:      typeNamespacedName.Name,
					Namespace: typeNamespacedName.Namespace,
				},
				Spec: sandboxv1alpha1.PoolSpec{
					Template: &v1.PodTemplateSpec{
						Spec: v1.PodSpec{
							Containers: []v1.Container{
								{
									Name:  "main",
									Image: "example.com",
								},
							},
						},
					},
					CapacitySpec: sandboxv1alpha1.CapacitySpec{
						PoolMin:   0,
						PoolMax:   2,
						BufferMin: 1,
						BufferMax: 1,
					},
				},
			}
			Expect(k8sClient.Create(ctx, resource)).To(Succeed())
			Eventually(func(g Gomega) {
				pool := &sandboxv1alpha1.Pool{}
				err := k8sClient.Get(ctx, typeNamespacedName, pool)
				g.Expect(err).NotTo(HaveOccurred())
				cnt := min(pool.Spec.CapacitySpec.PoolMax, pool.Spec.CapacitySpec.BufferMin)
				g.Expect(pool.Status.ObservedGeneration).To(Equal(pool.Generation))
				g.Expect(pool.Status.Total).To(Equal(cnt))
			}, timeout, interval).Should(Succeed())
			pool := &sandboxv1alpha1.Pool{}
			err := k8sClient.Get(ctx, typeNamespacedName, pool)
			Expect(err).NotTo(HaveOccurred())
			pods := &v1.PodList{}
			Expect(k8sClient.List(ctx, pods, &kclient.ListOptions{
				Namespace:     typeNamespacedName.Namespace,
				FieldSelector: fields.SelectorFromSet(fields.Set{fieldindex.IndexNameForOwnerRefUID: string(pool.UID)}),
			})).To(Succeed())
			// Mock pod running
			for _, pod := range pods.Items {
				pod.Status.Phase = v1.PodRunning
				Expect(k8sClient.Status().Update(ctx, &pod)).To(Succeed())
			}
		})

		AfterEach(func() {
			resource := &sandboxv1alpha1.Pool{}
			err := k8sClient.Get(ctx, typeNamespacedName, resource)
			if err != nil {
				if !errors.IsNotFound(err) {
					Expect(err).NotTo(HaveOccurred())
				} else {
					By("The specific resource instance Pool already deleted")
					return
				}
			}
			By("Cleanup the specific resource instance Pool")
			Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
		})
		It("should successfully update pool revision", func() {
			var oldRevision string
			Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error {
				pool := &sandboxv1alpha1.Pool{}
				if err := k8sClient.Get(ctx, typeNamespacedName, pool); err != nil {
					return err
				}
				if oldRevision == "" {
					oldRevision = pool.Status.Revision
				}
				pool.Spec.Template.Labels = map[string]string{
					"test.pool.update": "v1",
				}
				return k8sClient.Update(ctx, pool)
			})).Should(Succeed())
			Eventually(func(g Gomega) {
				pool := &sandboxv1alpha1.Pool{}
				Expect(k8sClient.Get(ctx, typeNamespacedName, pool)).To(Succeed())
				cnt := int32(1)
				g.Expect(pool.Status.Revision).NotTo(Equal(oldRevision))
				g.Expect(pool.Status.Total).To(Equal(cnt))
			}, timeout, interval).Should(Succeed())
		})
		It("should successfully update pool with allocated pod", func() {
			pool := &sandboxv1alpha1.Pool{}
			sbxNamespaceName := types.NamespacedName{
				Name:      "sandbox-test-" + rand.String(8),
				Namespace: typeNamespacedName.Namespace,
			}
			sandbox := &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Name:      sbxNamespaceName.Name,
					Namespace: sbxNamespaceName.Namespace,
				},
				Spec: sandboxv1alpha1.BatchSandboxSpec{
					PoolRef: typeNamespacedName.Name,
				},
			}
			Expect(k8sClient.Create(ctx, sandbox)).To(Succeed())
			// wait allocation
			Eventually(func(g Gomega) {
				g.Expect(k8sClient.Get(ctx, sbxNamespaceName, sandbox)).To(Succeed())
				alloc, err := getSandboxAllocation(sandbox)
				Expect(err).NotTo(HaveOccurred())
				g.Expect(alloc.Pods).NotTo(BeEmpty())
			}, timeout, interval).Should(Succeed())
			Expect(k8sClient.Get(ctx, sbxNamespaceName, sandbox)).To(Succeed())
			sbxAlloc, err := getSandboxAllocation(sandbox)
			Expect(err).NotTo(HaveOccurred())
			Expect(len(sbxAlloc.Pods)).To(Equal(1))
			// check pool allocation
			err = k8sClient.Get(ctx, typeNamespacedName, pool)
			Expect(err).NotTo(HaveOccurred())
			allocation, err := getPoolAllocation(pool)
			Expect(err).NotTo(HaveOccurred())
			Expect(len(allocation.PodAllocation)).To(Equal(1))
			Expect(allocation.PodAllocation[sbxAlloc.Pods[0]]).To(Equal(sandbox.Name))
			// update pool
			Expect(k8sClient.Get(ctx, typeNamespacedName, pool)).To(Succeed())
			oldRevision := pool.Status.Revision
			pool.Spec.Template.Labels = map[string]string{
				"test.pool.update": "v1",
			}
			Expect(k8sClient.Update(ctx, pool)).To(Succeed())
			Eventually(func(g Gomega) {
				Expect(k8sClient.Get(ctx, typeNamespacedName, pool)).To(Succeed())
				cnt := int32(2)
				g.Expect(pool.Status.Revision).NotTo(Equal(oldRevision))
				g.Expect(pool.Status.Total).To(Equal(cnt))
				pods := &v1.PodList{}
				Expect(k8sClient.List(ctx, pods, &kclient.ListOptions{
					Namespace:     typeNamespacedName.Namespace,
					FieldSelector: fields.SelectorFromSet(fields.Set{fieldindex.IndexNameForOwnerRefUID: string(pool.UID)}),
				})).To(Succeed())
				for _, pod := range pods.Items {
					if pod.Name == sbxAlloc.Pods[0] {
						g.Expect(pod.DeletionTimestamp).To(BeNil())
						g.Expect(pod.Labels[LabelPoolRevision]).To(Equal(oldRevision))
						continue
					}
					if pod.DeletionTimestamp != nil {
						continue
					}
					g.Expect(pod.Labels[LabelPoolRevision]).NotTo(Equal(oldRevision))
				}
			}, timeout, interval).Should(Succeed())
			Expect(k8sClient.Delete(ctx, sandbox)).To(Succeed())
		})
	})
})

var _ = Describe("Pool allocate", func() {
	var (
		timeout  = 10 * time.Second
		interval = 1 * time.Second
	)
	Context("When reconciling a resource", func() {
		const resourceName = "pool-allocate-test"

		ctx := context.Background()

		typeNamespacedName := types.NamespacedName{
			Name:      resourceName,
			Namespace: "default",
		}

		BeforeEach(func() {
			By("creating the custom resource for the Kind Pool")
			typeNamespacedName.Name = resourceName + "-" + rand.String(8)
			resource := &sandboxv1alpha1.Pool{
				ObjectMeta: metav1.ObjectMeta{
					Name:      typeNamespacedName.Name,
					Namespace: typeNamespacedName.Namespace,
				},
				Spec: sandboxv1alpha1.PoolSpec{
					Template: &v1.PodTemplateSpec{
						Spec: v1.PodSpec{
							Containers: []v1.Container{
								{
									Name:  "main",
									Image: "example.com",
								},
							},
						},
					},
					CapacitySpec: sandboxv1alpha1.CapacitySpec{
						PoolMin:   0,
						PoolMax:   2,
						BufferMin: 1,
						BufferMax: 1,
					},
				},
			}
			Expect(k8sClient.Create(ctx, resource)).To(Succeed())
			Eventually(func(g Gomega) {
				pool := &sandboxv1alpha1.Pool{}
				err := k8sClient.Get(ctx, typeNamespacedName, pool)
				g.Expect(err).NotTo(HaveOccurred())
				cnt := min(pool.Spec.CapacitySpec.PoolMax, pool.Spec.CapacitySpec.BufferMin)
				g.Expect(pool.Status.ObservedGeneration).To(Equal(pool.Generation))
				g.Expect(pool.Status.Total).To(Equal(cnt))
			}, timeout, interval).Should(Succeed())
			pool := &sandboxv1alpha1.Pool{}
			err := k8sClient.Get(ctx, typeNamespacedName, pool)
			Expect(err).NotTo(HaveOccurred())
			pods := &v1.PodList{}
			Expect(k8sClient.List(ctx, pods, &kclient.ListOptions{
				Namespace:     typeNamespacedName.Namespace,
				FieldSelector: fields.SelectorFromSet(fields.Set{fieldindex.IndexNameForOwnerRefUID: string(pool.UID)}),
			})).To(Succeed())
			// Mock pod running
			for _, pod := range pods.Items {
				pod.Status.Phase = v1.PodRunning
				Expect(k8sClient.Status().Update(ctx, &pod)).To(Succeed())
			}
		})

		AfterEach(func() {
			resource := &sandboxv1alpha1.Pool{}
			err := k8sClient.Get(ctx, typeNamespacedName, resource)
			if err != nil {
				if !errors.IsNotFound(err) {
					Expect(err).NotTo(HaveOccurred())
				} else {
					By("The specific resource instance Pool already deleted")
					return
				}
			}
			By("Cleanup the specific resource instance Pool")
			Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
		})
		It("should successfully allocate pool pod to batch sandbox and release", func() {
			pool := &sandboxv1alpha1.Pool{}
			bsbxNamespaceName := types.NamespacedName{
				Name:      "batch-sandbox-test-" + rand.String(8),
				Namespace: typeNamespacedName.Namespace,
			}
			batchSandbox := &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Name:      bsbxNamespaceName.Name,
					Namespace: bsbxNamespaceName.Namespace,
				},
				Spec: sandboxv1alpha1.BatchSandboxSpec{
					Replicas: ptr.To(int32(1)),
					PoolRef:  typeNamespacedName.Name,
				},
			}
			Expect(k8sClient.Create(ctx, batchSandbox)).To(Succeed())
			// wait allocation
			Eventually(func(g Gomega) {
				g.Expect(k8sClient.Get(ctx, bsbxNamespaceName, batchSandbox)).To(Succeed())
				alloc, err := getSandboxAllocation(batchSandbox)
				Expect(err).NotTo(HaveOccurred())
				g.Expect(alloc.Pods).NotTo(BeEmpty())
			}, timeout, interval).Should(Succeed())
			Expect(k8sClient.Get(ctx, bsbxNamespaceName, batchSandbox)).To(Succeed())
			sbxAlloc, err := getSandboxAllocation(batchSandbox)
			Expect(err).NotTo(HaveOccurred())
			Expect(len(sbxAlloc.Pods)).To(Equal(1))
			// check pool allocation
			err = k8sClient.Get(ctx, typeNamespacedName, pool)
			Expect(err).NotTo(HaveOccurred())
			allocation, err := getPoolAllocation(pool)
			Expect(err).NotTo(HaveOccurred())
			Expect(len(allocation.PodAllocation)).To(Equal(1))
			Expect(allocation.PodAllocation[sbxAlloc.Pods[0]]).To(Equal(batchSandbox.Name))
			// release
			release := AllocationRelease{
				Pods: sbxAlloc.Pods,
			}
			js, err := json.Marshal(release)
			Expect(err).NotTo(HaveOccurred())
			batchSandbox.Annotations[AnnoAllocReleaseKey] = string(js)
			err = k8sClient.Update(ctx, batchSandbox)
			Expect(err).NotTo(HaveOccurred())
			// wait release
			Eventually(func(g Gomega) {
				err = k8sClient.Get(ctx, typeNamespacedName, pool)
				Expect(err).NotTo(HaveOccurred())
				allocation, err = getPoolAllocation(pool)
				Expect(err).NotTo(HaveOccurred())
				g.Expect(len(allocation.PodAllocation)).To(Equal(0))
			}, timeout, interval).Should(Succeed())
			Expect(k8sClient.Delete(ctx, batchSandbox)).To(Succeed())
		})
	})
})

func getSandboxAllocation(obj kclient.Object) (*SandboxAllocation, error) {
	allocation := &SandboxAllocation{}
	anno := obj.GetAnnotations()
	if anno == nil {
		return allocation, nil
	}
	str, ok := anno[AnnoAllocStatusKey]
	if !ok {
		return allocation, nil
	}
	err := json.Unmarshal([]byte(str), allocation)
	if err != nil {
		return nil, err
	}
	return allocation, nil
}

func getPoolAllocation(pool *sandboxv1alpha1.Pool) (*PoolAllocation, error) {
	allocation := &PoolAllocation{}
	anno := pool.GetAnnotations()
	if anno == nil {
		return allocation, nil
	}
	str, ok := anno[AnnoPoolAllocStatusKey]
	if !ok {
		return allocation, nil
	}
	err := json.Unmarshal([]byte(str), allocation)
	if err != nil {
		return nil, err
	}
	return allocation, nil
}

var _ = Describe("Pool deletion and recreation", func() {
	var (
		timeout  = 10 * time.Second
		interval = 1 * time.Second
	)
	Context("When deleting and recreating a Pool with same name", func() {
		const resourceName = "pool-recreate-test"

		ctx := context.Background()

		typeNamespacedName := types.NamespacedName{
			Name:      resourceName,
			Namespace: "default",
		}

		BeforeEach(func() {
			By("creating the custom resource for the Kind Pool")
			typeNamespacedName.Name = resourceName + "-" + rand.String(8)
			resource := &sandboxv1alpha1.Pool{
				ObjectMeta: metav1.ObjectMeta{
					Name:      typeNamespacedName.Name,
					Namespace: typeNamespacedName.Namespace,
				},
				Spec: sandboxv1alpha1.PoolSpec{
					Template: &v1.PodTemplateSpec{
						Spec: v1.PodSpec{
							Containers: []v1.Container{
								{
									Name:  "main",
									Image: "example.com",
								},
							},
						},
					},
					CapacitySpec: sandboxv1alpha1.CapacitySpec{
						PoolMin:   0,
						PoolMax:   2,
						BufferMin: 1,
						BufferMax: 1,
					},
				},
			}
			Expect(k8sClient.Create(ctx, resource)).To(Succeed())
			Eventually(func(g Gomega) {
				pool := &sandboxv1alpha1.Pool{}
				err := k8sClient.Get(ctx, typeNamespacedName, pool)
				g.Expect(err).NotTo(HaveOccurred())
				cnt := min(pool.Spec.CapacitySpec.PoolMax, pool.Spec.CapacitySpec.BufferMin)
				g.Expect(pool.Status.ObservedGeneration).To(Equal(pool.Generation))
				g.Expect(pool.Status.Total).To(Equal(cnt))
			}, timeout, interval).Should(Succeed())
		})

		AfterEach(func() {
			resource := &sandboxv1alpha1.Pool{}
			err := k8sClient.Get(ctx, typeNamespacedName, resource)
			if err != nil {
				if !errors.IsNotFound(err) {
					Expect(err).NotTo(HaveOccurred())
				}
			} else {
				By("Cleanup the specific resource instance Pool")
				Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
			}
		})

		It("should allow recreating a Pool with the same name after deletion", func() {
			By("deleting the existing Pool")
			pool := &sandboxv1alpha1.Pool{}
			Expect(k8sClient.Get(ctx, typeNamespacedName, pool)).To(Succeed())
			Expect(k8sClient.Delete(ctx, pool)).To(Succeed())

			By("waiting for the Pool to be fully deleted")
			Eventually(func(g Gomega) {
				pool := &sandboxv1alpha1.Pool{}
				err := k8sClient.Get(ctx, typeNamespacedName, pool)
				g.Expect(errors.IsNotFound(err)).To(BeTrue(), "Pool should be deleted")
			}, timeout, interval).Should(Succeed())

			By("recreating a Pool with the same name")
			newPool := &sandboxv1alpha1.Pool{
				ObjectMeta: metav1.ObjectMeta{
					Name:      typeNamespacedName.Name,
					Namespace: typeNamespacedName.Namespace,
				},
				Spec: sandboxv1alpha1.PoolSpec{
					Template: &v1.PodTemplateSpec{
						Spec: v1.PodSpec{
							Containers: []v1.Container{
								{
									Name:  "main",
									Image: "example.com",
								},
							},
						},
					},
					CapacitySpec: sandboxv1alpha1.CapacitySpec{
						PoolMin:   0,
						PoolMax:   2,
						BufferMin: 1,
						BufferMax: 1,
					},
				},
			}
			Expect(k8sClient.Create(ctx, newPool)).To(Succeed())

			By("verifying the new Pool is successfully reconciled and creates expected pods")
			Eventually(func(g Gomega) {
				pool := &sandboxv1alpha1.Pool{}
				err := k8sClient.Get(ctx, typeNamespacedName, pool)
				g.Expect(err).NotTo(HaveOccurred())
				cnt := min(pool.Spec.CapacitySpec.PoolMax, pool.Spec.CapacitySpec.BufferMin)
				g.Expect(pool.Status.ObservedGeneration).To(Equal(pool.Generation))
				g.Expect(pool.Status.Total).To(Equal(cnt), "new Pool should have correct total pod count")
			}, timeout, interval).Should(Succeed())
		})
	})
})


================================================
FILE: kubernetes/internal/controller/strategy/pool_strategy.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package strategy

type PoolStrategy interface {
	IsPooledMode() bool
}


================================================
FILE: kubernetes/internal/controller/strategy/pool_strategy_default.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package strategy

import (
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
)

type DefaultPoolStrategy struct {
	*sandboxv1alpha1.BatchSandbox
}

func NewDefaultPoolStrategy(batchSandbox *sandboxv1alpha1.BatchSandbox) *DefaultPoolStrategy {
	return &DefaultPoolStrategy{
		BatchSandbox: batchSandbox,
	}
}

func (s *DefaultPoolStrategy) IsPooledMode() bool {
	return s.Spec.PoolRef != ""
}


================================================
FILE: kubernetes/internal/controller/strategy/pool_strategy_factory.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package strategy

import (
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
)

func NewPoolStrategy(batchSbx *sandboxv1alpha1.BatchSandbox) PoolStrategy {
	return NewDefaultPoolStrategy(batchSbx)
}


================================================
FILE: kubernetes/internal/controller/strategy/pool_strategy_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package strategy

import (
	"fmt"
	"testing"

	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
)

func TestDefaultPoolStrategy_IsPooledMode(t *testing.T) {
	tests := []struct {
		name     string
		batchSbx *sandboxv1alpha1.BatchSandbox
		want     bool
	}{
		{
			name: "with template - not pooled",
			batchSbx: &sandboxv1alpha1.BatchSandbox{
				Spec: sandboxv1alpha1.BatchSandboxSpec{
					Template: &corev1.PodTemplateSpec{
						Spec: corev1.PodSpec{
							Containers: []corev1.Container{
								{
									Name:  "test",
									Image: "nginx",
								},
							},
						},
					},
				},
			},
			want: false,
		},
		{
			name: "without template - pooled",
			batchSbx: &sandboxv1alpha1.BatchSandbox{
				Spec: sandboxv1alpha1.BatchSandboxSpec{
					Template: nil,
					PoolRef:  "test-pool",
				},
			},
			want: true,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			strategy := NewDefaultPoolStrategy(tt.batchSbx)
			if got := strategy.IsPooledMode(); got != tt.want {
				t.Errorf("DefaultPoolStrategy.IsPooledMode() = %v, want %v", got, tt.want)
			}
		})
	}
}

func TestNewPoolStrategy(t *testing.T) {
	tests := []struct {
		name         string
		batchSbx     *sandboxv1alpha1.BatchSandbox
		wantStrategy string
	}{
		{
			name: "without resource-speedup label - returns DefaultPoolStrategy",
			batchSbx: &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Labels: map[string]string{},
				},
				Spec: sandboxv1alpha1.BatchSandboxSpec{
					Template: nil,
				},
			},
			wantStrategy: "*strategy.DefaultPoolStrategy",
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got := NewPoolStrategy(tt.batchSbx)
			gotType := getTypeName(got)
			if gotType != tt.wantStrategy {
				t.Errorf("NewPoolStrategy() = %v, want %v", gotType, tt.wantStrategy)
			}
		})
	}
}

func getTypeName(i interface{}) string {
	return fmt.Sprintf("%T", i)
}


================================================
FILE: kubernetes/internal/controller/strategy/task_scheduling_strategy.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package strategy

import (
	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

// TaskSchedulingStrategy defines the strategy interface for task scheduling.
// Different implementations can provide custom logic for determining whether
// task scheduling is needed and how to generate task specifications.
type TaskSchedulingStrategy interface {
	// NeedTaskScheduling determines whether the BatchSandbox requires task scheduling.
	NeedTaskScheduling() bool

	// GenerateTaskSpecs generates the complete list of task specifications for the BatchSandbox.
	GenerateTaskSpecs() ([]*api.Task, error)
}


================================================
FILE: kubernetes/internal/controller/strategy/task_scheduling_strategy_default.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package strategy

import (
	"encoding/json"
	"fmt"

	"k8s.io/apimachinery/pkg/util/strategicpatch"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

// DefaultTaskSchedulingStrategy implements the default task scheduling strategy.
type DefaultTaskSchedulingStrategy struct {
	*sandboxv1alpha1.BatchSandbox
}

// NewDefaultTaskSchedulingStrategy creates a new default task scheduling strategy.
func NewDefaultTaskSchedulingStrategy(batchSbx *sandboxv1alpha1.BatchSandbox) *DefaultTaskSchedulingStrategy {
	return &DefaultTaskSchedulingStrategy{
		BatchSandbox: batchSbx,
	}
}

// NeedTaskScheduling determines whether task scheduling is needed based on TaskTemplate.
func (s *DefaultTaskSchedulingStrategy) NeedTaskScheduling() bool {
	return s.Spec.TaskTemplate != nil
}

// GenerateTaskSpecs generates task specifications for all replicas.
func (s *DefaultTaskSchedulingStrategy) GenerateTaskSpecs() ([]*api.Task, error) {
	ret := make([]*api.Task, *s.Spec.Replicas)
	for idx := range int(*s.Spec.Replicas) {
		task, err := s.getTaskSpec(idx)
		if err != nil {
			return ret, err
		}
		ret[idx] = task
	}
	return ret, nil
}

// getTaskSpec generates a single task specification for the given index.
// It applies ShardTaskPatches if available, otherwise uses the base TaskTemplate.
func (s *DefaultTaskSchedulingStrategy) getTaskSpec(idx int) (*api.Task, error) {
	task := &api.Task{
		Name: fmt.Sprintf("%s-%d", s.Name, idx),
	}
	if len(s.Spec.ShardTaskPatches) > 0 && idx < len(s.Spec.ShardTaskPatches) {
		taskTemplate := s.Spec.TaskTemplate.DeepCopy()
		cloneBytes, _ := json.Marshal(taskTemplate)
		patch := s.Spec.ShardTaskPatches[idx]
		modified, err := strategicpatch.StrategicMergePatch(cloneBytes, patch.Raw, &sandboxv1alpha1.TaskTemplateSpec{})
		if err != nil {
			return nil, fmt.Errorf("batchsandbox: failed to merge patch raw %s, idx %d, err %w", patch.Raw, idx, err)
		}
		newTaskTemplate := &sandboxv1alpha1.TaskTemplateSpec{}
		if err = json.Unmarshal(modified, newTaskTemplate); err != nil {
			return nil, fmt.Errorf("batchsandbox: failed to unmarshal %s to TaskTemplateSpec, idx %d, err %w", modified, idx, err)
		}
		task.Process = &api.Process{
			Command:        newTaskTemplate.Spec.Process.Command,
			Args:           newTaskTemplate.Spec.Process.Args,
			Env:            newTaskTemplate.Spec.Process.Env,
			WorkingDir:     newTaskTemplate.Spec.Process.WorkingDir,
			TimeoutSeconds: s.Spec.TaskTemplate.Spec.TimeoutSeconds,
		}
	} else if s.Spec.TaskTemplate != nil && s.Spec.TaskTemplate.Spec.Process != nil {
		task.Process = &api.Process{
			Command:        s.Spec.TaskTemplate.Spec.Process.Command,
			Args:           s.Spec.TaskTemplate.Spec.Process.Args,
			Env:            s.Spec.TaskTemplate.Spec.Process.Env,
			WorkingDir:     s.Spec.TaskTemplate.Spec.Process.WorkingDir,
			TimeoutSeconds: s.Spec.TaskTemplate.Spec.TimeoutSeconds,
		}
	}
	return task, nil
}


================================================
FILE: kubernetes/internal/controller/strategy/task_scheduling_strategy_default_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package strategy

import (
	"reflect"
	"testing"

	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/runtime"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

func TestDefaultTaskSchedulingStrategy_NeedTaskScheduling(t *testing.T) {
	tests := []struct {
		name     string
		batchSbx *sandboxv1alpha1.BatchSandbox
		want     bool
	}{
		{
			name: "with task template",
			batchSbx: &sandboxv1alpha1.BatchSandbox{
				Spec: sandboxv1alpha1.BatchSandboxSpec{
					TaskTemplate: &sandboxv1alpha1.TaskTemplateSpec{},
				},
			},
			want: true,
		},
		{
			name: "without task template",
			batchSbx: &sandboxv1alpha1.BatchSandbox{
				Spec: sandboxv1alpha1.BatchSandboxSpec{
					TaskTemplate: nil,
				},
			},
			want: false,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			strategy := NewDefaultTaskSchedulingStrategy(tt.batchSbx)
			if got := strategy.NeedTaskScheduling(); got != tt.want {
				t.Errorf("DefaultTaskSchedulingStrategy.NeedTaskScheduling() = %v, want %v", got, tt.want)
			}
		})
	}
}

func TestDefaultTaskSchedulingStrategy_getTaskSpec(t *testing.T) {
	type args struct {
		batchSbx *sandboxv1alpha1.BatchSandbox
		idx      int
	}
	tests := []struct {
		name    string
		args    args
		want    *api.Task
		wantErr bool
	}{
		{
			name: "basic task spec without patches",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-bs",
						Namespace: "default",
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						TaskTemplate: &sandboxv1alpha1.TaskTemplateSpec{
							Spec: sandboxv1alpha1.TaskSpec{
								Process: &sandboxv1alpha1.ProcessTask{
									Command: []string{"echo", "hello"},
								},
							},
						},
					},
				},
				idx: 0,
			},
			want: &api.Task{
				Name: "test-bs-0",
				Process: &api.Process{
					Command: []string{"echo", "hello"},
				},
			},
			wantErr: false,
		},
		{
			name: "task spec with shard patch",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-bs",
						Namespace: "default",
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						TaskTemplate: &sandboxv1alpha1.TaskTemplateSpec{
							Spec: sandboxv1alpha1.TaskSpec{
								Process: &sandboxv1alpha1.ProcessTask{
									Command: []string{"echo", "hello"},
								},
							},
						},
						ShardTaskPatches: []runtime.RawExtension{
							{
								Raw: []byte(`{"spec":{"process":{"command":["echo","world"]}}}`),
							},
						},
					},
				},
				idx: 0,
			},
			want: &api.Task{
				Name: "test-bs-0",
				Process: &api.Process{
					Command: []string{"echo", "world"},
				},
			},
			wantErr: false,
		},
		{
			name: "task spec with invalid patch",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-bs",
						Namespace: "default",
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						TaskTemplate: &sandboxv1alpha1.TaskTemplateSpec{
							Spec: sandboxv1alpha1.TaskSpec{
								Process: &sandboxv1alpha1.ProcessTask{
									Command: []string{"echo", "hello"},
								},
							},
						},
						ShardTaskPatches: []runtime.RawExtension{
							{
								Raw: []byte(`{"invalid json`),
							},
						},
					},
				},
				idx: 0,
			},
			want:    nil,
			wantErr: true,
		},
		{
			name: "task spec with index out of range patch",
			args: args{
				batchSbx: &sandboxv1alpha1.BatchSandbox{
					ObjectMeta: metav1.ObjectMeta{
						Name:      "test-bs",
						Namespace: "default",
					},
					Spec: sandboxv1alpha1.BatchSandboxSpec{
						TaskTemplate: &sandboxv1alpha1.TaskTemplateSpec{
							Spec: sandboxv1alpha1.TaskSpec{
								Process: &sandboxv1alpha1.ProcessTask{
									Command: []string{"echo", "hello"},
								},
							},
						},
						ShardTaskPatches: []runtime.RawExtension{
							{
								Raw: []byte(`{"spec":{"process":{"command":["echo","world"]}}}`),
							},
						},
					},
				},
				idx: 1,
			},
			want: &api.Task{
				Name: "test-bs-1",
				Process: &api.Process{
					Command: []string{"echo", "hello"},
				},
			},
			wantErr: false,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			strategy := NewDefaultTaskSchedulingStrategy(tt.args.batchSbx)
			got, err := strategy.getTaskSpec(tt.args.idx)
			if (err != nil) != tt.wantErr {
				t.Errorf("DefaultTaskSchedulingStrategy.getTaskSpec() error = %v, wantErr %v", err, tt.wantErr)
				return
			}
			if !tt.wantErr {
				if got.Name != tt.want.Name {
					t.Errorf("DefaultTaskSchedulingStrategy.getTaskSpec() name = %v, want %v", got.Name, tt.want.Name)
				}
				if !reflect.DeepEqual(got.Process, tt.want.Process) {
					t.Errorf("DefaultTaskSchedulingStrategy.getTaskSpec() spec = %v, want %v", got.Process, tt.want.Process)
				}
			}
		})
	}
}


================================================
FILE: kubernetes/internal/controller/strategy/task_scheduling_strategy_factory.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package strategy

import (
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
)

// NewTaskSchedulingStrategy creates a task scheduling strategy based on BatchSandbox properties.
// This function is designed to be easily customizable for different implementations:
func NewTaskSchedulingStrategy(batchSbx *sandboxv1alpha1.BatchSandbox) TaskSchedulingStrategy {
	return NewDefaultTaskSchedulingStrategy(batchSbx)
}


================================================
FILE: kubernetes/internal/controller/suite_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	"context"
	"os"
	"path/filepath"
	"sync"
	"testing"

	"k8s.io/client-go/kubernetes/scheme"
	"k8s.io/client-go/rest"
	ctrl "sigs.k8s.io/controller-runtime"
	"sigs.k8s.io/controller-runtime/pkg/client"
	"sigs.k8s.io/controller-runtime/pkg/envtest"
	logf "sigs.k8s.io/controller-runtime/pkg/log"
	"sigs.k8s.io/controller-runtime/pkg/log/zap"
	"sigs.k8s.io/controller-runtime/pkg/manager"

	. "github.com/onsi/ginkgo/v2"
	. "github.com/onsi/gomega"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/fieldindex"
	// +kubebuilder:scaffold:imports
)

// These tests use Ginkgo (BDD-style Go testing framework). Refer to
// http://onsi.github.io/ginkgo/ to learn more about Ginkgo.

var (
	ctx        context.Context
	cancel     context.CancelFunc
	testEnv    *envtest.Environment
	cfg        *rest.Config
	k8sClient  client.Client
	k8sManager ctrl.Manager
	mgrStopped *sync.WaitGroup
)

func TestControllers(t *testing.T) {
	RegisterFailHandler(Fail)

	RunSpecs(t, "Controller Suite")
}

var _ = BeforeSuite(func() {
	logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)))

	ctx, cancel = context.WithCancel(context.TODO())

	var err error
	err = sandboxv1alpha1.AddToScheme(scheme.Scheme)
	Expect(err).NotTo(HaveOccurred())

	// +kubebuilder:scaffold:scheme

	By("bootstrapping test environment")
	testEnv = &envtest.Environment{
		CRDDirectoryPaths:     []string{filepath.Join("..", "..", "config", "crd", "bases")},
		ErrorIfCRDPathMissing: true,
	}

	// Retrieve the first found binary directory to allow running tests from IDEs
	if getFirstFoundEnvTestBinaryDir() != "" {
		testEnv.BinaryAssetsDirectory = getFirstFoundEnvTestBinaryDir()
	}

	// cfg is defined in this file globally.
	cfg, err = testEnv.Start()
	Expect(err).NotTo(HaveOccurred())
	Expect(cfg).NotTo(BeNil())

	k8sManager, err = ctrl.NewManager(cfg, ctrl.Options{
		Scheme: scheme.Scheme,
	})
	Expect(err).ToNot(HaveOccurred())
	By("register field index")
	Expect(fieldindex.RegisterFieldIndexes(k8sManager.GetCache())).Should(Succeed(), "failed to register fieldindex")

	By("setup reconciler")
	Expect((&BatchSandboxReconciler{
		Client:   k8sManager.GetClient(),
		Scheme:   k8sManager.GetScheme(),
		Recorder: k8sManager.GetEventRecorderFor("test-batch-sandbox-controller"),
	}).SetupWithManager(k8sManager)).Should(Succeed())
	Expect((&PoolReconciler{
		Client:    k8sManager.GetClient(),
		Scheme:    k8sManager.GetScheme(),
		Recorder:  k8sManager.GetEventRecorderFor("test-pool-controller"),
		Allocator: NewDefaultAllocator(k8sManager.GetClient()),
	}).SetupWithManager(k8sManager)).Should(Succeed())
	// TODO more reconciler goes HERE

	By("try to start manager")
	mgrStopped = startTestManager(ctx, k8sManager)

	k8sManager.GetCache().WaitForCacheSync(ctx)
	By("waiting for manager cache synced")

	k8sClient = k8sManager.GetClient()
	Expect(k8sClient).NotTo(BeNil())
})

func startTestManager(ctx context.Context, mgr manager.Manager) *sync.WaitGroup {
	wg := &sync.WaitGroup{}
	wg.Add(1)
	go func() {
		defer wg.Done()
		Expect(mgr.Start(ctx)).Should(Succeed(), "failed to start manager")
	}()
	return wg
}

var _ = AfterSuite(func() {
	By("tearing down the test environment")
	cancel()
	if mgrStopped != nil {
		By("waiting manager exit")
		mgrStopped.Wait()
	}
	err := testEnv.Stop()
	Expect(err).NotTo(HaveOccurred())
})

// getFirstFoundEnvTestBinaryDir locates the first binary in the specified path.
// ENVTEST-based tests depend on specific binaries, usually located in paths set by
// controller-runtime. When running tests directly (e.g., via an IDE) without using
// Makefile targets, the 'BinaryAssetsDirectory' must be explicitly configured.
//
// This function streamlines the process by finding the required binaries, similar to
// setting the 'KUBEBUILDER_ASSETS' environment variable. To ensure the binaries are
// properly set up, run 'make setup-envtest' beforehand.
func getFirstFoundEnvTestBinaryDir() string {
	basePath := filepath.Join("..", "..", "bin", "k8s")
	entries, err := os.ReadDir(basePath)
	if err != nil {
		logf.Log.Error(err, "Failed to read directory", "path", basePath)
		return ""
	}
	for _, entry := range entries {
		if entry.IsDir() {
			return filepath.Join(basePath, entry.Name())
		}
	}
	return ""
}


================================================
FILE: kubernetes/internal/scheduler/default_scheduler.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package scheduler

import (
	"context"
	"fmt"
	"sync"
	"time"

	"github.com/go-logr/logr"
	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/utils/ptr"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils"
	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

var _ Task = &taskNode{}

var (
	timeNow = func() time.Time {
		return time.Now()
	}
)

type taskSpec struct {
	Process         *api.Process
	PodTemplateSpec *corev1.PodTemplateSpec
}

type taskNode struct {
	metav1.ObjectMeta
	Spec taskSpec

	// status
	Status  *api.Task
	IP      string
	PodName string

	// collect from endpoints
	tState              TaskState
	tStateLastTransTime *time.Time

	// inner sch state
	sStateLastTransTime *time.Time
	sState              string
}

func (t *taskNode) GetPodName() string {
	return t.PodName
}

func (t *taskNode) GetState() TaskState {
	return t.tState
}

func (t *taskNode) IsResourceReleased() bool {
	return t.sState == stateReleased
}

func (t *taskNode) isTaskCompleted() bool {
	return t.tState == SucceedTaskState || t.tState == FailedTaskState
}

func (t *taskNode) isTaskDeleted() bool {
	return t.Status == nil
}

func (t *taskNode) transSchState(to string, log logr.Logger) {
	if t.sState == to {
		return
	}
	from := t.sState
	t.sState = to
	var lat time.Duration
	now := timeNow()
	if t.sStateLastTransTime != nil {
		lat = now.Sub(*t.sStateLastTransTime)
	}
	t.sStateLastTransTime = ptr.To[time.Time](now)
	log.Info("task node trans sch state", "name", t.Name, "namespace", t.Namespace, "from", from, "to", to, "latencyMs", lat.Milliseconds())
}

func (t *taskNode) transTaskState(to TaskState, log logr.Logger) {
	if t.tState == to {
		return
	}
	from := t.tState
	t.tState = to
	var lat time.Duration
	now := timeNow()
	if t.tStateLastTransTime != nil {
		lat = now.Sub(*t.tStateLastTransTime)
	}
	t.tStateLastTransTime = ptr.To[time.Time](now)
	log.Info("task node trans task state", "name", t.Name, "namespace", t.Namespace, "from", from, "to", to, "latencyMs", lat.Milliseconds())
}

const (
	// FSM: TaskNode Sch State Machine
	/*
	   $start --> pending

	   pending -- "when task is assigned to Pod" --> assigned
	   pending -- "when BatchSandbox's deletion timestamp != 0" --> released

	   assigned -- "when BatchSandbox's deletion timestamp != 0" --> releasing
	   assigned -- "when task state is SUCCEED && policy is allowed" --> releasing
	   assigned -- "when task state is FAILED && policy is allowed" --> releasing
	   assigned -- "set Task"

	   releasing -- "when endpoint returns nil task or endpoint lost too many times  (e.g., force-deleted), endpoint is nil(unassigned)" --> released

	   released --> $end
	*/
	//statePending   = "pending", endpoint is empty means pending, otherwise means assigned
	//stateAssigned  = "assigned"
	stateReleasing = "releasing"
	stateReleased  = "released"
	stateUnknown   = "unknown"
)

type taskClient interface {
	Set(ctx context.Context, task *api.Task) (*api.Task, error)
	Get(ctx context.Context) (*api.Task, error)
}

const (
	defaultTimeout        time.Duration = 3 * time.Second
	defaultTaskPort                     = "5758"
	defaultSchConcurrency int           = 10
)

func newTaskClient(ip string) taskClient {
	return api.NewClient(fmtEndpoint(ip))
}

func fmtEndpoint(podIP string) string {
	return fmt.Sprintf("http://%s:%s", podIP, defaultTaskPort)
}

type defaultTaskScheduler struct {
	freePods []*corev1.Pod
	allPods  []*corev1.Pod

	taskNodes           []*taskNode
	taskNodeByNameIndex map[string]*taskNode

	maxConcurrency int
	once           sync.Once

	taskStatusCollector       taskStatusCollector
	taskClientCreator         taskClientCreator
	resPolicyWhenTaskComplete sandboxv1alpha1.TaskResourcePolicy
	name                      string
	logger                    logr.Logger
}

func newTaskScheduler(name string, tasks []*api.Task, pods []*corev1.Pod, resPolicyWhenTaskComplete sandboxv1alpha1.TaskResourcePolicy, logger logr.Logger) (*defaultTaskScheduler, error) {
	sch := &defaultTaskScheduler{
		allPods:                   pods,
		maxConcurrency:            defaultSchConcurrency,
		taskClientCreator:         newTaskClient,
		taskStatusCollector:       newTaskStatusCollector(newTaskClient, logger),
		resPolicyWhenTaskComplete: resPolicyWhenTaskComplete,
		name:                      name,
		logger:                    logger,
	}
	taskNodes, err := initTaskNodes(tasks)
	if err != nil {
		return nil, fmt.Errorf("scheduler: failed to init task node err %w", err)
	}
	sch.taskNodes = taskNodes
	sch.taskNodeByNameIndex = indexByName(taskNodes)
	logger.Info("successfully init task nodes", "scheduler", name, "size", len(taskNodes))
	// TODO: Optimization – skip recovery for a brand-new scheduler.
	// Recovery is unnecessary in this case and incurs significant overhead.
	if err := sch.recover(); err != nil {
		return nil, fmt.Errorf("scheduler: failed to recover, err %w", err)
	}
	logger.Info("successfully recover", "scheduler", name)
	return sch, nil
}

func indexByName(taskNodes []*taskNode) map[string]*taskNode {
	ret := make(map[string]*taskNode, len(taskNodes))
	for i := range taskNodes {
		ret[taskNodes[i].Name] = taskNodes[i]
	}
	return ret
}

func (sch *defaultTaskScheduler) Schedule() error {
	sch.refreshFreePods()
	sch.collectTaskStatus(sch.taskNodes)
	return sch.scheduleTaskNodes()
}

func (sch *defaultTaskScheduler) UpdatePods(pods []*corev1.Pod) {
	sch.allPods = pods
}

func (sch *defaultTaskScheduler) ListTask() []Task {
	ret := make([]Task, len(sch.taskNodes), len(sch.taskNodes))
	for i := range sch.taskNodes {
		ret[i] = sch.taskNodes[i]
	}
	return ret
}

func (sch *defaultTaskScheduler) StopTask() []Task {
	deletedTask := make([]Task, len(sch.taskNodes), len(sch.taskNodes))
	for i := range sch.taskNodes {
		if sch.taskNodes[i].DeletionTimestamp != nil {
			continue
		}
		sch.taskNodes[i].DeletionTimestamp = &metav1.Time{Time: timeNow()}
		deletedTask[i] = sch.taskNodes[i]
	}
	return deletedTask
}

func initTaskNodes(tasks []*api.Task) ([]*taskNode, error) {
	size := len(tasks)
	taskNodes := make([]*taskNode, size)
	for idx := 0; idx < size; idx++ {
		task := tasks[idx]
		tNode := &taskNode{
			ObjectMeta: metav1.ObjectMeta{
				Name: task.Name,
			},
			Spec: taskSpec{
				Process:         task.Process,
				PodTemplateSpec: task.PodTemplateSpec,
			},
		}
		taskNodes[idx] = tNode
	}
	return taskNodes, nil
}

// collectTaskStatus from Pod via endpoint
func (sch *defaultTaskScheduler) collectTaskStatus(taskNodes []*taskNode) {
	ips := []string{}
	for _, tNode := range taskNodes {
		// unassigned no need to collect task status
		if tNode.IP == "" {
			continue
		}
		ips = append(ips, tNode.IP)
	}
	if len(ips) == 0 {
		return
	}
	tasks := sch.taskStatusCollector.Collect(context.Background(), ips)
	for _, tNode := range taskNodes {
		task, ok := tasks[tNode.IP]
		tNode.Status = task
		if ok && task != nil {
			tNode.transTaskState(parseTaskState(task), sch.logger)
		}
	}
}

func parseTaskState(task *api.Task) TaskState {
	if task.ProcessStatus != nil {
		return parseProcessTaskState(task.ProcessStatus)
	}
	if task.PodStatus != nil {
		return parsePodTaskState(task.PodStatus)
	}
	return UnknownTaskState
}

func parseProcessTaskState(status *api.ProcessStatus) TaskState {
	if status.Running != nil {
		return RunningTaskState
	} else if status.Terminated != nil {
		if status.Terminated.ExitCode == 0 {
			return SucceedTaskState
		} else {
			return FailedTaskState
		}
	}
	return UnknownTaskState
}

func parsePodTaskState(status *corev1.PodStatus) TaskState {
	switch status.Phase {
	case corev1.PodRunning:
		if utils.IsPodReadyConditionTrue(*status) {
			return RunningTaskState
		}
	case corev1.PodSucceeded:
		return SucceedTaskState
	case corev1.PodFailed:
		return FailedTaskState
	}
	return UnknownTaskState
}

func (sch *defaultTaskScheduler) scheduleTaskNodes() error {
	sch.freePods = assignTaskNodes(sch.taskNodes, sch.freePods, sch.logger)
	semaphore := make(chan struct{}, sch.maxConcurrency)
	var wg sync.WaitGroup
	for idx := range sch.taskNodes {
		tNode := sch.taskNodes[idx]
		semaphore <- struct{}{}
		wg.Add(1)
		go func(node *taskNode) {
			defer func() {
				<-semaphore
				wg.Done()
			}()
			scheduleSingleTaskNode(node, sch.taskClientCreator, sch.resPolicyWhenTaskComplete, sch.logger)
		}(tNode)
	}
	wg.Wait()
	return nil
}

// refreshFreePods updates the freePods slice based on allPods and currently assigned pods
// This ensures that each pod is only assigned to one taskNode
// Only pods with IP addresses are considered free for assignment
func (sch *defaultTaskScheduler) refreshFreePods() {
	// Create a map of assigned pod names for quick lookup
	assignedPods := make(map[string]bool, len(sch.allPods)/2)
	for _, tNode := range sch.taskNodes {
		if tNode.IP != "" && tNode.PodName != "" {
			assignedPods[tNode.PodName] = true
		}
	}
	// Rebuild freePods list with only unassigned pods that have IP addresses
	sch.freePods = make([]*corev1.Pod, 0, len(sch.allPods)/2)
	for _, pod := range sch.allPods {
		// Only consider pods with IP addresses as free for assignment
		if !assignedPods[pod.Name] && pod.Status.PodIP != "" {
			sch.freePods = append(sch.freePods, pod)
		}
	}
}

// assignTaskNodes handles all unassigned tasks in batch
func assignTaskNodes(taskNodes []*taskNode, freePods []*corev1.Pod, log logr.Logger) []*corev1.Pod {
	for _, tNode := range taskNodes {
		if len(freePods) == 0 {
			break
		}
		if tNode.IP != "" {
			continue
		}
		pod := freePods[0]
		log.Info("assign Pod to task node", "podName", pod.Name, "podNamespace", pod.Namespace, "podIP", pod.Status.PodIP, "taskName", tNode.Name)
		tNode.IP = pod.Status.PodIP
		tNode.PodName = pod.Name
		freePods = freePods[1:]
	}
	return freePods
}

func needRelease(tNode *taskNode, policy sandboxv1alpha1.TaskResourcePolicy) bool {
	if tNode.DeletionTimestamp != nil {
		return true
	}
	if policy == sandboxv1alpha1.TaskResourcePolicyRelease && tNode.isTaskCompleted() {
		return true
	}
	return false
}

// scheduleSingleTaskNode handles scheduling for a single task node based on its state
func scheduleSingleTaskNode(tNode *taskNode, taskClientCreator func(endpoint string) taskClient, resPolicyWhenTaskComplete sandboxv1alpha1.TaskResourcePolicy, log logr.Logger) {
	// pending
	if tNode.IP == "" {
		if tNode.DeletionTimestamp != nil {
			tNode.transSchState(stateReleased, log)
		}
	} else {
		// assigned
		if needRelease(tNode, resPolicyWhenTaskComplete) {
			tNode.transSchState(stateReleasing, log)
		} else {
			// no need to setTask if task is completed to avoid unnecessary network overhead
			if !tNode.isTaskCompleted() {
				task := &api.Task{
					Name:            tNode.Name,
					Process:         tNode.Spec.Process,
					PodTemplateSpec: tNode.Spec.PodTemplateSpec,
				}
				_, err := setTask(taskClientCreator(tNode.IP), task, log)
				if err != nil {
					log.Error(err, "Failed to set task", "taskName", tNode.Name, "endpoint", tNode.IP)
				}
			}
		}
	}
	if tNode.sState == stateReleasing {
		if tNode.isTaskDeleted() {
			tNode.transSchState(stateReleased, log)
		} else {
			_, err := setTask(taskClientCreator(tNode.IP), nil, log)
			if err != nil {
				log.Error(err, "Failed to notify executor about releasing task", "taskName", tNode.Name, "endpoint", tNode.IP)
			} else {
				log.Info("Successfully to notify client to release task", "taskName", tNode.Name, "endpoint", tNode.IP)
			}
		}
	}
}

func setTask(client taskClient, task *api.Task, log logr.Logger) (*api.Task, error) {
	ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout)
	defer cancel()
	verboseLog := log.V(3)
	if verboseLog.Enabled() {
		verboseLog.Info("client set task", "task", utils.DumpJSON(task))
	}
	return client.Set(ctx, task)
}


================================================
FILE: kubernetes/internal/scheduler/default_scheduler_mock.go
================================================
package scheduler

// Code generated by MockGen. DO NOT EDIT.
// Source: internal/task/scheduler/default_scheduler.go

// Package mock_scheduler is a generated GoMock package.

import (
	context "context"
	reflect "reflect"

	gomock "github.com/golang/mock/gomock"

	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

// MocktaskClient is a mock of taskClient interface.
type MocktaskClient struct {
	ctrl     *gomock.Controller
	recorder *MocktaskClientMockRecorder
}

// MocktaskClientMockRecorder is the mock recorder for MocktaskClient.
type MocktaskClientMockRecorder struct {
	mock *MocktaskClient
}

// NewMocktaskClient creates a new mock instance.
func NewMocktaskClient(ctrl *gomock.Controller) *MocktaskClient {
	mock := &MocktaskClient{ctrl: ctrl}
	mock.recorder = &MocktaskClientMockRecorder{mock}
	return mock
}

// EXPECT returns an object that allows the caller to indicate expected use.
func (m *MocktaskClient) EXPECT() *MocktaskClientMockRecorder {
	return m.recorder
}

// Get mocks base method.
func (m *MocktaskClient) Get(ctx context.Context) (*api.Task, error) {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "Get", ctx)
	ret0, _ := ret[0].(*api.Task)
	ret1, _ := ret[1].(error)
	return ret0, ret1
}

// Get indicates an expected call of Get.
func (mr *MocktaskClientMockRecorder) Get(ctx interface{}) *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Get", reflect.TypeOf((*MocktaskClient)(nil).Get), ctx)
}

// Set mocks base method.
func (m *MocktaskClient) Set(ctx context.Context, task *api.Task) (*api.Task, error) {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "Set", ctx, task)
	ret0, _ := ret[0].(*api.Task)
	ret1, _ := ret[1].(error)
	return ret0, ret1
}

// Set indicates an expected call of Set.
func (mr *MocktaskClientMockRecorder) Set(ctx, task interface{}) *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Set", reflect.TypeOf((*MocktaskClient)(nil).Set), ctx, task)
}


================================================
FILE: kubernetes/internal/scheduler/default_scheduler_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package scheduler

import (
	"reflect"
	"testing"
	"time"

	"github.com/go-logr/logr"
	"github.com/golang/mock/gomock"
	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

// mockLogger is a simple logger implementation for testing
type mockLogger struct{}

func (m mockLogger) Init(info logr.RuntimeInfo)                                {}
func (m mockLogger) Info(level int, msg string, keysAndValues ...interface{})  {}
func (m mockLogger) Error(err error, msg string, keysAndValues ...interface{}) {}
func (m mockLogger) Enabled(level int) bool                                    { return false }
func (m mockLogger) WithValues(keysAndValues ...interface{}) logr.LogSink      { return m }
func (m mockLogger) WithName(name string) logr.LogSink                         { return m }

var testLogger = logr.New(mockLogger{})

func Test_scheduleSingleTaskNode(t *testing.T) {
	ctl := gomock.NewController(t)
	defer ctl.Finish()
	mockTimeNow := time.Now()
	o := timeNow
	timeNow = func() time.Time {
		return mockTimeNow
	}
	defer func() {
		timeNow = o
	}()
	type args struct {
		tNode             *taskNode
		taskClientCreator func(endpoint string) taskClient
	}
	tests := []struct {
		name           string
		args           args
		expectTaskNode *taskNode
	}{
		{
			name: "pending task node, deleting ",
			args: args{
				tNode: &taskNode{
					ObjectMeta: v1.ObjectMeta{
						Name:              "test-batch-sandbox-0",
						DeletionTimestamp: &metav1.Time{Time: mockTimeNow},
					},
				},
			},
			expectTaskNode: &taskNode{
				ObjectMeta: v1.ObjectMeta{
					Name:              "test-batch-sandbox-0",
					DeletionTimestamp: &metav1.Time{Time: mockTimeNow},
				},
				sState:              stateReleased,
				sStateLastTransTime: &mockTimeNow,
			},
		},
		{
			name: "assigned task node, task state=Running, deleting; setTask(nil)",
			args: args{
				tNode: &taskNode{
					ObjectMeta: v1.ObjectMeta{
						Name:              "test-batch-sandbox-0",
						DeletionTimestamp: &metav1.Time{Time: mockTimeNow},
					},
					IP: "1.2.3.4",
					Status: &api.Task{
						ProcessStatus: &api.ProcessStatus{
							Running: &api.Running{
								StartedAt: metav1.NewTime(mockTimeNow),
							},
						},
					},
					tState: RunningTaskState,
				},
				taskClientCreator: func(endpoint string) taskClient {
					mock := NewMocktaskClient(ctl)
					mock.EXPECT().Set(gomock.Any(), nil).Return(nil, nil).Times(1)
					return mock
				},
			},
			expectTaskNode: &taskNode{
				ObjectMeta: v1.ObjectMeta{
					Name:              "test-batch-sandbox-0",
					DeletionTimestamp: &metav1.Time{Time: mockTimeNow},
				},
				IP: "1.2.3.4",
				Status: &api.Task{
					ProcessStatus: &api.ProcessStatus{
						Running: &api.Running{
							StartedAt: metav1.NewTime(mockTimeNow),
						},
					},
				},
				tState:              RunningTaskState,
				sState:              stateReleasing,
				sStateLastTransTime: &mockTimeNow,
			},
		},
		{
			name: "assigned task node, task state=Running; setTask(task)",
			args: args{
				tNode: &taskNode{
					ObjectMeta: v1.ObjectMeta{
						Name: "test-batch-sandbox-0",
					},
					IP: "1.2.3.4",
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"hello"},
						},
					},
					Status: &api.Task{
						ProcessStatus: &api.ProcessStatus{
							Running: &api.Running{
								StartedAt: metav1.NewTime(mockTimeNow),
							},
						},
					},
					tState: RunningTaskState,
				},
				taskClientCreator: func(endpoint string) taskClient {
					mock := NewMocktaskClient(ctl)
					mock.EXPECT().Set(gomock.Any(), &api.Task{
						Name: "test-batch-sandbox-0",
						Process: &api.Process{
							Command: []string{"hello"},
						},
					}).Return(nil, nil).Times(1)
					return mock
				},
			},
			expectTaskNode: &taskNode{
				ObjectMeta: v1.ObjectMeta{
					Name: "test-batch-sandbox-0",
				},
				IP: "1.2.3.4",
				Spec: taskSpec{
					Process: &api.Process{
						Command: []string{"hello"},
					},
				},
				Status: &api.Task{
					ProcessStatus: &api.ProcessStatus{
						Running: &api.Running{
							StartedAt: metav1.NewTime(mockTimeNow),
						},
					},
				},
				tState: RunningTaskState,
			},
		},
		{
			name: "assigned task node, task state=Succeed, endpoint return nil task; sState trans from releasing -> released ",
			args: args{
				tNode: &taskNode{
					ObjectMeta: v1.ObjectMeta{
						Name: "test-batch-sandbox-0",
					},
					IP: "1.2.3.4",
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"hello"},
						},
					},
					Status: nil,
					tState: SucceedTaskState,
					sState: stateReleasing,
				},
			},
			expectTaskNode: &taskNode{
				ObjectMeta: v1.ObjectMeta{
					Name: "test-batch-sandbox-0",
				},
				IP: "1.2.3.4",
				Spec: taskSpec{
					Process: &api.Process{
						Command: []string{"hello"},
					},
				},
				Status:              nil,
				tState:              SucceedTaskState,
				sState:              stateReleased,
				sStateLastTransTime: &mockTimeNow,
			},
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			scheduleSingleTaskNode(tt.args.tNode, tt.args.taskClientCreator, "", testLogger)
			if !reflect.DeepEqual(tt.expectTaskNode, tt.args.tNode) {
				t.Errorf("scheduleSingleTaskNode, want %+v, got %+v", tt.expectTaskNode, tt.args.tNode)
			}
		})
	}
}

func Test_assignTaskNodes(t *testing.T) {
	type args struct {
		taskNodes []*taskNode
		freePods  []*corev1.Pod
	}
	tests := []struct {
		name            string
		args            args
		want            []*corev1.Pod
		expectTaskNodes []*taskNode
	}{
		{
			name: "empty free pods, no assignment",
			args: args{
				taskNodes: []*taskNode{
					{
						ObjectMeta: v1.ObjectMeta{Name: "test-0"},
					},
				},
			},
			expectTaskNodes: []*taskNode{
				{
					ObjectMeta: v1.ObjectMeta{Name: "test-0"},
				},
			},
		},
		{
			name: "free pods, assign",
			args: args{
				taskNodes: []*taskNode{
					{
						ObjectMeta: v1.ObjectMeta{Name: "test-0"},
					},
				},
				freePods: []*corev1.Pod{
					{
						ObjectMeta: v1.ObjectMeta{Name: "pod-hello-world"},
						Status:     corev1.PodStatus{PodIP: "1.2.3.4"},
					},
				},
			},
			want: []*corev1.Pod{},
			expectTaskNodes: []*taskNode{
				{
					ObjectMeta: v1.ObjectMeta{Name: "test-0"},
					IP:         "1.2.3.4",
					PodName:    "pod-hello-world",
				},
			},
		},
		{
			name: "free pods, no unassigned task nodes, no assignment",
			args: args{
				taskNodes: []*taskNode{
					{
						ObjectMeta: v1.ObjectMeta{Name: "test-0"},
						IP:         "4.3.2.1",
						PodName:    "pod-foo-bar",
					},
				},
				freePods: []*corev1.Pod{
					{
						ObjectMeta: v1.ObjectMeta{Name: "pod-hello-world"},
						Status:     corev1.PodStatus{PodIP: "1.2.3.4"},
					},
				},
			},
			want: []*corev1.Pod{
				{
					ObjectMeta: v1.ObjectMeta{Name: "pod-hello-world"},
					Status:     corev1.PodStatus{PodIP: "1.2.3.4"},
				},
			},
			expectTaskNodes: []*taskNode{
				{
					ObjectMeta: v1.ObjectMeta{Name: "test-0"},
					IP:         "4.3.2.1",
					PodName:    "pod-foo-bar",
				},
			},
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			if got := assignTaskNodes(tt.args.taskNodes, tt.args.freePods, testLogger); !reflect.DeepEqual(got, tt.want) {
				t.Errorf("assignTaskNodes() = %v, want %v", got, tt.want)
			}
			if !reflect.DeepEqual(tt.expectTaskNodes, tt.args.taskNodes) {
				t.Errorf("assignTaskNodes() = %v, want %v", tt.expectTaskNodes, tt.args.taskNodes)
			}
		})
	}
}

func Test_refreshFreePods(t *testing.T) {
	tests := []struct {
		name          string
		allPods       []*corev1.Pod
		taskNodes     []*taskNode
		expectedFree  int
		expectedNames []string
	}{
		{
			name: "no assigned pods",
			allPods: []*corev1.Pod{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-1"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.1"},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-2"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.2"},
				},
			},
			taskNodes: []*taskNode{
				{ObjectMeta: metav1.ObjectMeta{Name: "task-1"}},
				{ObjectMeta: metav1.ObjectMeta{Name: "task-2"}},
			},
			expectedFree:  2,
			expectedNames: []string{"pod-1", "pod-2"},
		},
		{
			name: "some assigned pods",
			allPods: []*corev1.Pod{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-1"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.1"},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-2"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.2"},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-3"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.3"},
				},
			},
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					IP:         "1.1.1.1",
					PodName:    "pod-1",
				},
				{ObjectMeta: metav1.ObjectMeta{Name: "task-2"}},
			},
			expectedFree:  2,
			expectedNames: []string{"pod-2", "pod-3"},
		},
		{
			name: "all pods assigned",
			allPods: []*corev1.Pod{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-1"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.1"},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-2"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.2"},
				},
			},
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					IP:         "1.1.1.1",
					PodName:    "pod-1",
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
					IP:         "1.1.1.2",
					PodName:    "pod-2",
				},
			},
			expectedFree:  0,
			expectedNames: []string{},
		},
		{
			name: "pods without IP addresses",
			allPods: []*corev1.Pod{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-1"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.1"},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-2"},
					Status:     corev1.PodStatus{PodIP: ""},
				},
			},
			taskNodes: []*taskNode{
				{ObjectMeta: metav1.ObjectMeta{Name: "task-1"}},
				{ObjectMeta: metav1.ObjectMeta{Name: "task-2"}},
			},
			expectedFree:  1,
			expectedNames: []string{"pod-1"},
		},
		{
			name:    "empty pods list",
			allPods: []*corev1.Pod{},
			taskNodes: []*taskNode{
				{ObjectMeta: metav1.ObjectMeta{Name: "task-1"}},
			},
			expectedFree:  0,
			expectedNames: []string{},
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			sch := &defaultTaskScheduler{
				allPods:   tt.allPods,
				taskNodes: tt.taskNodes,
			}

			sch.refreshFreePods()

			if len(sch.freePods) != tt.expectedFree {
				t.Errorf("refreshFreePods() freePods length = %v, want %v", len(sch.freePods), tt.expectedFree)
			}

			actualNames := make([]string, len(sch.freePods))
			for i, pod := range sch.freePods {
				actualNames[i] = pod.Name
			}

			if !reflect.DeepEqual(actualNames, tt.expectedNames) {
				t.Errorf("refreshFreePods() freePods names = %v, want %v", actualNames, tt.expectedNames)
			}
		})
	}
}

func Test_collectTaskStatus(t *testing.T) {
	ctl := gomock.NewController(t)
	defer ctl.Finish()

	mockTimeNow := time.Now()
	o := timeNow
	timeNow = func() time.Time {
		return mockTimeNow
	}
	defer func() {
		timeNow = o
	}()

	tests := []struct {
		name               string
		taskNodes          []*taskNode
		expectedCollectIPs []string
		mockReturnTasks    map[string]*api.Task
		expectedTaskNodes  []*taskNode
	}{
		{
			name: "no assigned task nodes",
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
				},
			},
			expectedCollectIPs: []string{},
			mockReturnTasks:    map[string]*api.Task{},
			expectedTaskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
				},
			},
		},
		{
			name: "assigned task nodes with task status",
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					IP:         "1.1.1.1",
					PodName:    "pod-1",
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
					IP:         "1.1.1.2",
					PodName:    "pod-2",
				},
			},
			expectedCollectIPs: []string{"1.1.1.1", "1.1.1.2"},
			mockReturnTasks: map[string]*api.Task{
				"1.1.1.1": {
					Name: "task-1",
					ProcessStatus: &api.ProcessStatus{
						Running: &api.Running{
							StartedAt: metav1.NewTime(mockTimeNow),
						},
					},
				},
				"1.1.1.2": {
					Name: "task-2",
					ProcessStatus: &api.ProcessStatus{
						Terminated: &api.Terminated{
							ExitCode:   0,
							FinishedAt: metav1.NewTime(mockTimeNow),
						},
					},
				},
			},
			expectedTaskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					IP:         "1.1.1.1",
					PodName:    "pod-1",
					Status: &api.Task{
						Name: "task-1",
						ProcessStatus: &api.ProcessStatus{
							Running: &api.Running{
								StartedAt: metav1.NewTime(mockTimeNow),
							},
						},
					},
					tState:              RunningTaskState,
					tStateLastTransTime: &mockTimeNow,
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
					IP:         "1.1.1.2",
					PodName:    "pod-2",
					Status: &api.Task{
						Name: "task-2",
						ProcessStatus: &api.ProcessStatus{
							Terminated: &api.Terminated{
								ExitCode:   0,
								FinishedAt: metav1.NewTime(mockTimeNow),
							},
						},
					},
					tState:              SucceedTaskState,
					tStateLastTransTime: &mockTimeNow,
				},
			},
		},
		{
			name: "assigned task nodes with nil task status",
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					IP:         "1.1.1.1",
					PodName:    "pod-1",
				},
			},
			expectedCollectIPs: []string{"1.1.1.1"},
			mockReturnTasks: map[string]*api.Task{
				"1.1.1.1": nil,
			},
			expectedTaskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					IP:         "1.1.1.1",
					PodName:    "pod-1",
				},
			},
		},
		{
			name: "mixed assigned and unassigned task nodes",
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					IP:         "1.1.1.1",
					PodName:    "pod-1",
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
				},
			},
			expectedCollectIPs: []string{"1.1.1.1"},
			mockReturnTasks: map[string]*api.Task{
				"1.1.1.1": {
					Name: "task-1",
					ProcessStatus: &api.ProcessStatus{
						Running: &api.Running{
							StartedAt: metav1.NewTime(mockTimeNow),
						},
					},
				},
			},
			expectedTaskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					IP:         "1.1.1.1",
					PodName:    "pod-1",
					Status: &api.Task{
						Name: "task-1",
						ProcessStatus: &api.ProcessStatus{
							Running: &api.Running{
								StartedAt: metav1.NewTime(mockTimeNow),
							},
						},
					},
					tState:              RunningTaskState,
					tStateLastTransTime: &mockTimeNow,
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
				},
			},
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// Create mock task status collector
			mockCollector := NewMocktaskStatusCollector(ctl)
			if len(tt.expectedCollectIPs) > 0 {
				mockCollector.EXPECT().Collect(gomock.Any(), tt.expectedCollectIPs).Return(tt.mockReturnTasks).Times(1)
			}

			// Create scheduler with mock collector
			sch := &defaultTaskScheduler{
				taskNodes:           tt.taskNodes,
				taskStatusCollector: mockCollector,
				logger:              testLogger,
			}

			// Call collectTaskStatus
			sch.collectTaskStatus(tt.taskNodes)

			// Verify results
			for i, expectedNode := range tt.expectedTaskNodes {
				actualNode := tt.taskNodes[i]

				if actualNode.Name != expectedNode.Name {
					t.Errorf("taskNode[%d].Name = %v, want %v", i, actualNode.Name, expectedNode.Name)
				}

				if actualNode.IP != expectedNode.IP {
					t.Errorf("taskNode[%d].IP = %v, want %v", i, actualNode.IP, expectedNode.IP)
				}

				if actualNode.PodName != expectedNode.PodName {
					t.Errorf("taskNode[%d].PodName = %v, want %v", i, actualNode.PodName, expectedNode.PodName)
				}

				if expectedNode.Status == nil {
					if actualNode.Status != nil {
						t.Errorf("taskNode[%d].Status = %v, want nil", i, actualNode.Status)
					}
				} else {
					if actualNode.Status == nil {
						t.Errorf("taskNode[%d].Status = nil, want %v", i, expectedNode.Status)
					} else if actualNode.Status.Name != expectedNode.Status.Name {
						t.Errorf("taskNode[%d].Status.Name = %v, want %v", i, actualNode.Status.Name, expectedNode.Status.Name)
					}
				}

				if actualNode.tState != expectedNode.tState {
					t.Errorf("taskNode[%d].tState = %v, want %v", i, actualNode.tState, expectedNode.tState)
				}

				// Compare time pointers
				if expectedNode.tStateLastTransTime == nil {
					if actualNode.tStateLastTransTime != nil {
						t.Errorf("taskNode[%d].tStateLastTransTime = %v, want nil", i, actualNode.tStateLastTransTime)
					}
				} else {
					if actualNode.tStateLastTransTime == nil {
						t.Errorf("taskNode[%d].tStateLastTransTime = nil, want %v", i, expectedNode.tStateLastTransTime)
					} else if !actualNode.tStateLastTransTime.Equal(*expectedNode.tStateLastTransTime) {
						t.Errorf("taskNode[%d].tStateLastTransTime = %v, want %v", i, actualNode.tStateLastTransTime, expectedNode.tStateLastTransTime)
					}
				}
			}
		})
	}
}

func Test_indexByName(t *testing.T) {
	tests := []struct {
		name      string
		taskNodes []*taskNode
		expected  map[string]*taskNode
	}{
		{
			name:      "empty task nodes",
			taskNodes: []*taskNode{},
			expected:  map[string]*taskNode{},
		},
		{
			name: "single task node",
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
				},
			},
			expected: map[string]*taskNode{
				"task-1": {
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
				},
			},
		},
		{
			name: "multiple task nodes",
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-3"},
				},
			},
			expected: map[string]*taskNode{
				"task-1": {
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
				},
				"task-2": {
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
				},
				"task-3": {
					ObjectMeta: metav1.ObjectMeta{Name: "task-3"},
				},
			},
		},
		{
			name: "duplicate task node names",
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
				},
			},
			expected: map[string]*taskNode{
				"task-1": {
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
				},
			},
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			result := indexByName(tt.taskNodes)

			if len(result) != len(tt.expected) {
				t.Errorf("indexByName() map length = %v, want %v", len(result), len(tt.expected))
			}

			for key, expectedNode := range tt.expected {
				actualNode, ok := result[key]
				if !ok {
					t.Errorf("indexByName() missing key %v", key)
					continue
				}

				if actualNode.Name != expectedNode.Name {
					t.Errorf("indexByName()[%v].Name = %v, want %v", key, actualNode.Name, expectedNode.Name)
				}
			}
		})
	}
}

func Test_scheduleTaskNodes(t *testing.T) {
	ctl := gomock.NewController(t)
	defer ctl.Finish()

	// Mock time for consistent testing
	mockTimeNow := time.Now()
	o := timeNow
	timeNow = func() time.Time {
		return mockTimeNow
	}
	defer func() {
		timeNow = o
	}()

	tests := []struct {
		name                      string
		taskNodes                 []*taskNode
		freePods                  []*corev1.Pod
		batchSbx                  *sandboxv1alpha1.BatchSandbox
		expectedTaskNodes         []*taskNode
		expectedRemainingFreePods int
		expectedSetCalls          map[string]*api.Task // IP -> Expected Task
	}{
		{
			name: "assign free pods to unassigned task nodes",
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "hello"},
						},
					},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "world"},
						},
					},
				},
			},
			freePods: []*corev1.Pod{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-1"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.1"},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-2"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.2"},
				},
			},
			batchSbx: &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: v1.ObjectMeta{Name: "test-batch"},
			},
			expectedTaskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "hello"},
						},
					},
					IP:      "1.1.1.1",
					PodName: "pod-1",
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "world"},
						},
					},
					IP:      "1.1.1.2",
					PodName: "pod-2",
				},
			},
			expectedRemainingFreePods: 0,
			expectedSetCalls: map[string]*api.Task{
				"1.1.1.1": {
					Name: "task-1",
					Process: &api.Process{
						Command: []string{"echo", "hello"},
					},
				},
				"1.1.1.2": {
					Name: "task-2",
					Process: &api.Process{
						Command: []string{"echo", "world"},
					},
				},
			},
		},
		{
			name: "no free pods available",
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "hello"},
						},
					},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "world"},
						},
					},
				},
			},
			freePods: []*corev1.Pod{},
			batchSbx: &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: v1.ObjectMeta{Name: "test-batch"},
			},
			expectedTaskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "hello"},
						},
					},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "world"},
						},
					},
				},
			},
			expectedRemainingFreePods: 0,
			expectedSetCalls:          map[string]*api.Task{},
		},
		{
			name: "some task nodes already assigned",
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "hello"},
						},
					},
					IP:      "1.1.1.1",
					PodName: "pod-1",
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "world"},
						},
					},
				},
			},
			freePods: []*corev1.Pod{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-2"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.2"},
				},
			},
			batchSbx: &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: v1.ObjectMeta{Name: "test-batch"},
			},
			expectedTaskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "hello"},
						},
					},
					IP:      "1.1.1.1",
					PodName: "pod-1",
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "world"},
						},
					},
					IP:      "1.1.1.2",
					PodName: "pod-2",
				},
			},
			expectedRemainingFreePods: 0,
			expectedSetCalls: map[string]*api.Task{
				"1.1.1.1": {
					Name: "task-1",
					Process: &api.Process{
						Command: []string{"echo", "hello"},
					},
				},
				"1.1.1.2": {
					Name: "task-2",
					Process: &api.Process{
						Command: []string{"echo", "world"},
					},
				},
			},
		},
		{
			name: "more free pods than unassigned tasks",
			taskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "hello"},
						},
					},
					IP:      "1.1.1.1",
					PodName: "pod-1",
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "world"},
						},
					},
				},
			},
			freePods: []*corev1.Pod{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-2"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.2"},
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "pod-3"},
					Status:     corev1.PodStatus{PodIP: "1.1.1.3"},
				},
			},
			batchSbx: &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: v1.ObjectMeta{Name: "test-batch"},
			},
			expectedTaskNodes: []*taskNode{
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-1"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "hello"},
						},
					},
					IP:      "1.1.1.1",
					PodName: "pod-1",
				},
				{
					ObjectMeta: metav1.ObjectMeta{Name: "task-2"},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "world"},
						},
					},
					IP:      "1.1.1.2",
					PodName: "pod-2",
				},
			},
			expectedRemainingFreePods: 1,
			expectedSetCalls: map[string]*api.Task{
				"1.1.1.1": {
					Name: "task-1",
					Process: &api.Process{
						Command: []string{"echo", "hello"},
					},
				},
				"1.1.1.2": {
					Name: "task-2",
					Process: &api.Process{
						Command: []string{"echo", "world"},
					},
				},
			},
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// Create mock task clients for each pod IP and task node
			mockClients := make(map[string]*MocktaskClient)

			// Create task client creator function that returns mock clients
			taskClientCreator := func(ip string) taskClient {
				if mockClient, ok := mockClients[ip]; ok {
					return mockClient
				}
				mockClient := NewMocktaskClient(ctl)
				mockClients[ip] = mockClient
				return mockClient
			}

			// Set expectations for Set calls
			for ip, expectedTask := range tt.expectedSetCalls {
				mockClient := mockClients[ip]
				if mockClient == nil {
					mockClient = NewMocktaskClient(ctl)
					mockClients[ip] = mockClient
				}
				mockClient.EXPECT().Set(gomock.Any(), expectedTask).Return(expectedTask, nil).Times(1)
			}

			// Create scheduler
			sch := &defaultTaskScheduler{
				taskNodes:         tt.taskNodes,
				freePods:          tt.freePods,
				maxConcurrency:    defaultSchConcurrency,
				taskClientCreator: taskClientCreator,
				logger:            testLogger,
			}

			// Call scheduleTaskNodes
			err := sch.scheduleTaskNodes()

			// Verify no error
			if err != nil {
				t.Errorf("scheduleTaskNodes() error = %v, want nil", err)
			}

			// Verify results
			for i, expectedNode := range tt.expectedTaskNodes {
				actualNode := tt.taskNodes[i]

				if actualNode.Name != expectedNode.Name {
					t.Errorf("taskNode[%d].Name = %v, want %v", i, actualNode.Name, expectedNode.Name)
				}

				if actualNode.IP != expectedNode.IP {
					t.Errorf("taskNode[%d].IP = %v, want %v", i, actualNode.IP, expectedNode.IP)
				}

				if actualNode.PodName != expectedNode.PodName {
					t.Errorf("taskNode[%d].PodName = %v, want %v", i, actualNode.PodName, expectedNode.PodName)
				}
			}

			// Verify remaining free pods
			if len(sch.freePods) != tt.expectedRemainingFreePods {
				t.Errorf("scheduleTaskNodes() remaining freePods length = %v, want %v", len(sch.freePods), tt.expectedRemainingFreePods)
			}
		})
	}
}

func Test_parseTaskState(t *testing.T) {
	mockTimeNow := time.Now()

	tests := []struct {
		name     string
		task     *api.Task
		expected TaskState
	}{
		{
			name: "running task",
			task: &api.Task{
				ProcessStatus: &api.ProcessStatus{
					Running: &api.Running{
						StartedAt: metav1.NewTime(mockTimeNow),
					},
				},
			},
			expected: RunningTaskState,
		},
		{
			name: "succeed task",
			task: &api.Task{
				ProcessStatus: &api.ProcessStatus{
					Terminated: &api.Terminated{
						ExitCode:   0,
						FinishedAt: metav1.NewTime(mockTimeNow),
					},
				},
			},
			expected: SucceedTaskState,
		},
		{
			name: "failed task",
			task: &api.Task{
				ProcessStatus: &api.ProcessStatus{
					Terminated: &api.Terminated{
						ExitCode:   1,
						FinishedAt: metav1.NewTime(mockTimeNow),
					},
				},
			},
			expected: FailedTaskState,
		},
		{
			name: "unknown task state",
			task: &api.Task{
				ProcessStatus: &api.ProcessStatus{},
			},
			expected: UnknownTaskState,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			result := parseTaskState(tt.task)
			if result != tt.expected {
				t.Errorf("parseTaskState() = %v, want %v", result, tt.expected)
			}
		})
	}
}

func Test_initTaskNodes(t *testing.T) {
	type args struct {
		tasks []*api.Task
	}
	tests := []struct {
		name    string
		args    args
		want    []*taskNode
		wantErr bool
	}{
		{
			name: "init success",
			args: args{
				tasks: []*api.Task{
					{
						Name: "test-task-0",
						Process: &api.Process{
							Command: []string{"tail", "-f", "/dev/null"},
						},
					},
				},
			},
			want: []*taskNode{
				{
					ObjectMeta: v1.ObjectMeta{
						Name: "test-task-0",
					},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"tail", "-f", "/dev/null"},
						}},
				},
			},
		},
		{
			name: "init multiple tasks",
			args: args{
				tasks: []*api.Task{
					{
						Name: "test-task-0",
						Process: &api.Process{
							Command: []string{"echo", "hello"},
						},
					},
					{
						Name: "test-task-1",
						Process: &api.Process{
							Command: []string{"echo", "world"},
						},
					},
				},
			},
			want: []*taskNode{
				{
					ObjectMeta: v1.ObjectMeta{
						Name: "test-task-0",
					},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "hello"},
						}},
				},
				{
					ObjectMeta: v1.ObjectMeta{
						Name: "test-task-1",
					},
					Spec: taskSpec{
						Process: &api.Process{
							Command: []string{"echo", "world"},
						},
					},
				},
			},
		},
		{
			name: "init empty tasks",
			args: args{
				tasks: []*api.Task{},
			},
			want: []*taskNode{},
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got, err := initTaskNodes(tt.args.tasks)
			if (err != nil) != tt.wantErr {
				t.Errorf("initTaskNodes() error = %v, wantErr %v", err, tt.wantErr)
				return
			}
			if !reflect.DeepEqual(got, tt.want) {
				t.Errorf("initTaskNodes() = %v, want %v", got, tt.want)
			}
		})
	}
}


================================================
FILE: kubernetes/internal/scheduler/interface.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package scheduler

import (
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	apis "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"

	"github.com/go-logr/logr"
	corev1 "k8s.io/api/core/v1"
)

type TaskScheduler interface {
	Schedule() error
	UpdatePods(pod []*corev1.Pod)
	ListTask() []Task
	StopTask() []Task
}

func NewTaskScheduler(name string, tasks []*apis.Task, pods []*corev1.Pod, resPolicyWhenTaskCompleted sandboxv1alpha1.TaskResourcePolicy, logger logr.Logger) (TaskScheduler, error) {
	return newTaskScheduler(name, tasks, pods, resPolicyWhenTaskCompleted, logger)
}


================================================
FILE: kubernetes/internal/scheduler/mock/interface.go
================================================
// Code generated by MockGen. DO NOT EDIT.
// Source: internal/task/scheduler/interface.go

// Package mock_scheduler is a generated GoMock package.
package mock_scheduler

import (
	reflect "reflect"

	gomock "github.com/golang/mock/gomock"
	v1 "k8s.io/api/core/v1"

	scheduler "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/scheduler"
)

// MockTaskScheduler is a mock of TaskScheduler interface.
type MockTaskScheduler struct {
	ctrl     *gomock.Controller
	recorder *MockTaskSchedulerMockRecorder
}

// MockTaskSchedulerMockRecorder is the mock recorder for MockTaskScheduler.
type MockTaskSchedulerMockRecorder struct {
	mock *MockTaskScheduler
}

// NewMockTaskScheduler creates a new mock instance.
func NewMockTaskScheduler(ctrl *gomock.Controller) *MockTaskScheduler {
	mock := &MockTaskScheduler{ctrl: ctrl}
	mock.recorder = &MockTaskSchedulerMockRecorder{mock}
	return mock
}

// EXPECT returns an object that allows the caller to indicate expected use.
func (m *MockTaskScheduler) EXPECT() *MockTaskSchedulerMockRecorder {
	return m.recorder
}

// ListTask mocks base method.
func (m *MockTaskScheduler) ListTask() []scheduler.Task {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "ListTask")
	ret0, _ := ret[0].([]scheduler.Task)
	return ret0
}

// ListTask indicates an expected call of ListTask.
func (mr *MockTaskSchedulerMockRecorder) ListTask() *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListTask", reflect.TypeOf((*MockTaskScheduler)(nil).ListTask))
}

// Schedule mocks base method.
func (m *MockTaskScheduler) Schedule() error {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "Schedule")
	ret0, _ := ret[0].(error)
	return ret0
}

// Schedule indicates an expected call of Schedule.
func (mr *MockTaskSchedulerMockRecorder) Schedule() *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Schedule", reflect.TypeOf((*MockTaskScheduler)(nil).Schedule))
}

// StopTask mocks base method.
func (m *MockTaskScheduler) StopTask() []scheduler.Task {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "StopTask")
	ret0, _ := ret[0].([]scheduler.Task)
	return ret0
}

// StopTask indicates an expected call of StopTask.
func (mr *MockTaskSchedulerMockRecorder) StopTask() *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "StopTask", reflect.TypeOf((*MockTaskScheduler)(nil).StopTask))
}

// UpdatePods mocks base method.
func (m *MockTaskScheduler) UpdatePods(pod []*v1.Pod) {
	m.ctrl.T.Helper()
	m.ctrl.Call(m, "UpdatePods", pod)
}

// UpdatePods indicates an expected call of UpdatePods.
func (mr *MockTaskSchedulerMockRecorder) UpdatePods(pod interface{}) *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdatePods", reflect.TypeOf((*MockTaskScheduler)(nil).UpdatePods), pod)
}


================================================
FILE: kubernetes/internal/scheduler/mock/types.go
================================================
// Code generated by MockGen. DO NOT EDIT.
// Source: internal/task/scheduler/types.go

// Package mock_scheduler is a generated GoMock package.
package mock_scheduler

import (
	reflect "reflect"

	gomock "github.com/golang/mock/gomock"

	scheduler "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/scheduler"
)

// MockTask is a mock of Task interface.
type MockTask struct {
	ctrl     *gomock.Controller
	recorder *MockTaskMockRecorder
}

// MockTaskMockRecorder is the mock recorder for MockTask.
type MockTaskMockRecorder struct {
	mock *MockTask
}

// NewMockTask creates a new mock instance.
func NewMockTask(ctrl *gomock.Controller) *MockTask {
	mock := &MockTask{ctrl: ctrl}
	mock.recorder = &MockTaskMockRecorder{mock}
	return mock
}

// EXPECT returns an object that allows the caller to indicate expected use.
func (m *MockTask) EXPECT() *MockTaskMockRecorder {
	return m.recorder
}

// GetName mocks base method.
func (m *MockTask) GetName() string {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "GetName")
	ret0, _ := ret[0].(string)
	return ret0
}

// GetName indicates an expected call of GetName.
func (mr *MockTaskMockRecorder) GetName() *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetName", reflect.TypeOf((*MockTask)(nil).GetName))
}

// GetPodName mocks base method.
func (m *MockTask) GetPodName() string {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "GetPodName")
	ret0, _ := ret[0].(string)
	return ret0
}

// GetPodName indicates an expected call of GetPodName.
func (mr *MockTaskMockRecorder) GetPodName() *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPodName", reflect.TypeOf((*MockTask)(nil).GetPodName))
}

// GetState mocks base method.
func (m *MockTask) GetState() scheduler.TaskState {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "GetState")
	ret0, _ := ret[0].(scheduler.TaskState)
	return ret0
}

// GetState indicates an expected call of GetState.
func (mr *MockTaskMockRecorder) GetState() *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetState", reflect.TypeOf((*MockTask)(nil).GetState))
}

// IsResourceReleased mocks base method.
func (m *MockTask) IsResourceReleased() bool {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "IsResourceReleased")
	ret0, _ := ret[0].(bool)
	return ret0
}

// IsResourceReleased indicates an expected call of IsResourceReleased.
func (mr *MockTaskMockRecorder) IsResourceReleased() *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsResourceReleased", reflect.TypeOf((*MockTask)(nil).IsResourceReleased))
}


================================================
FILE: kubernetes/internal/scheduler/recovery.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package scheduler

import (
	"context"

	"github.com/go-logr/logr"
	v1 "k8s.io/api/core/v1"

	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

// recover reconstructs the task scheduler state from existing pods and their endpoints
// This function is used to restore the scheduler state after a restart
func (sch *defaultTaskScheduler) recover() error {
	var err error
	sch.once.Do(func() {
		sch.recoverTaskNodesStatus()
		sch.logger.Info("task scheduler recovered", "scheduler", sch.name, "task_nodes", len(sch.taskNodes), "all_pods", len(sch.allPods))
	})
	return err
}

func (sch *defaultTaskScheduler) recoverTaskNodesStatus() error {
	ips := make([]string, 0, len(sch.allPods)/2)
	pods := make([]*v1.Pod, 0, len(sch.allPods)/2)
	for i := range sch.allPods {
		pod := sch.allPods[i]
		if pod.Status.PodIP == "" {
			continue
		}
		ips = append(ips, pod.Status.PodIP)
		pods = append(pods, pod)
	}
	if len(ips) == 0 {
		return nil
	}
	// TODO: When the agent starts stopping a task, if a recovery occurs at this moment,
	// the recovery may complete after the agent has already finished stopping the task and returned an empty task list.
	// This could cause the scheduler to be unable to determine whether the task was never executed or has already completed.
	// It might lead to duplicate execution, but it ensures at-least-once delivery semantics.
	tasks := sch.taskStatusCollector.Collect(context.Background(), ips)
	for i := range ips {
		ip := ips[i]
		pod := pods[i]
		task := tasks[ip]
		if task == nil || pod == nil {
			continue
		}
		if tNode := sch.taskNodeByNameIndex[task.Name]; tNode != nil {
			recoverOneTaskNode(tNode, task, pod.Status.PodIP, pod.Name, sch.logger)
		} else {
		}
		// TODO do we need to stop tasks not belong us? e.g users ScaleIn []*sandboxv1alpha1.Task
	}
	return nil
}

func recoverOneTaskNode(tNode *taskNode, currentTask *api.Task, ip string, podName string, log logr.Logger) {
	tNode.Status = currentTask
	tNode.transTaskState(parseTaskState(currentTask), log)
	tNode.IP = ip
	tNode.PodName = podName
	if currentTask.DeletionTimestamp != nil {
		tNode.transSchState(stateReleasing, log)
	}
}


================================================
FILE: kubernetes/internal/scheduler/recovery_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package scheduler

import (
	"reflect"
	"sync"
	"testing"
	"time"

	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"

	"github.com/golang/mock/gomock"

	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

func Test_recoverOneTaskNode(t *testing.T) {
	mockTimeNow := time.Now()
	o := timeNow
	timeNow = func() time.Time {
		return mockTimeNow
	}
	defer func() {
		timeNow = o
	}()
	testNow := metav1.Time{Time: mockTimeNow}
	testTask := &api.Task{
		Name: "test",
		Process: &api.Process{
			Command: []string{"sleep"},
		},
		ProcessStatus: &api.ProcessStatus{
			Running: &api.Running{
				StartedAt: testNow,
			},
		},
	}
	testReleasingTask := &api.Task{
		Name:              "test",
		DeletionTimestamp: &testNow,
		Process: &api.Process{
			Command: []string{"sleep"},
		},
		ProcessStatus: &api.ProcessStatus{
			Running: &api.Running{
				StartedAt: testNow,
			},
		},
	}
	type args struct {
		tNode       *taskNode
		currentTask *api.Task
		ip          string
		podName     string
	}
	tests := []struct {
		name           string
		args           args
		expectTaskNode *taskNode
	}{
		{
			name: "running task",
			args: args{
				tNode:       &taskNode{},
				currentTask: testTask,
				ip:          "1.2.3.4",
				podName:     "foo-bar",
			},
			expectTaskNode: &taskNode{
				Status:              testTask,
				IP:                  "1.2.3.4",
				PodName:             "foo-bar",
				tState:              RunningTaskState,
				tStateLastTransTime: &mockTimeNow,
			},
		},
		{
			name: "releasing task",
			args: args{
				tNode:       &taskNode{},
				currentTask: testReleasingTask,
				ip:          "1.2.3.4",
				podName:     "foo-bar",
			},
			expectTaskNode: &taskNode{
				Status:              testReleasingTask,
				IP:                  "1.2.3.4",
				PodName:             "foo-bar",
				sState:              stateReleasing,
				sStateLastTransTime: &mockTimeNow,
				tState:              RunningTaskState,
				tStateLastTransTime: &mockTimeNow,
			},
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			recoverOneTaskNode(tt.args.tNode, tt.args.currentTask, tt.args.ip, tt.args.podName, testLogger)
			if tt.expectTaskNode != nil {
				if !reflect.DeepEqual(tt.expectTaskNode, tt.args.tNode) {
					t.Errorf("recoverOneTaskNode, want %+v, got %+v", tt.expectTaskNode, tt.args.tNode)
				}
			}
		})
	}
}

func Test_defaultTaskScheduler_recoverTaskNodesStatus(t *testing.T) {
	mockTimeNow := time.Now()
	o := timeNow
	timeNow = func() time.Time {
		return mockTimeNow
	}
	defer func() {
		timeNow = o
	}()
	ctl := gomock.NewController(t)
	defer ctl.Finish()
	testNow := metav1.Now()
	testTaskNode := &taskNode{
		ObjectMeta: v1.ObjectMeta{
			Name: "bsbx-0",
		},
		Spec: taskSpec{
			Process: &api.Process{
				Command: []string{"hello"},
			},
		},
	}
	testTask := &api.Task{
		Name:    testTaskNode.Name,
		Process: testTaskNode.Spec.Process,
		ProcessStatus: &api.ProcessStatus{
			Running: &api.Running{
				StartedAt: testNow,
			},
		},
	}
	recoveredTestTaskNode := &taskNode{
		ObjectMeta: v1.ObjectMeta{
			Name: "bsbx-0",
		},
		Spec: taskSpec{
			Process: &api.Process{
				Command: []string{"hello"},
			},
		},
		Status:              testTask,
		PodName:             "test-0",
		IP:                  "1.2.3.4",
		tState:              RunningTaskState,
		tStateLastTransTime: &mockTimeNow,
	}

	type fields struct {
		freePods            []*corev1.Pod
		allPods             []*corev1.Pod
		taskNodes           []*taskNode
		taskNodeByNameIndex map[string]*taskNode
		maxConcurrency      int
		once                sync.Once
		taskStatusCollector taskStatusCollector
	}
	tests := []struct {
		name            string
		fields          fields
		wantErr         bool
		expectTaskNodes []*taskNode
	}{
		{
			name: "recover nothing, pod pending",
			fields: fields{
				allPods: []*corev1.Pod{{
					ObjectMeta: v1.ObjectMeta{Name: "test-0"},
				}},
				taskNodes: []*taskNode{
					{
						ObjectMeta: v1.ObjectMeta{
							Name: "bsbx-0",
						},
					},
				},
			},
			expectTaskNodes: []*taskNode{
				{
					ObjectMeta: v1.ObjectMeta{
						Name: "bsbx-0",
					},
				},
			},
		},
		{
			name: "recover nothing, client return nil task via endpoint",
			fields: fields{
				allPods: []*corev1.Pod{{
					ObjectMeta: v1.ObjectMeta{
						Name: "test-0",
					},
					Status: corev1.PodStatus{
						PodIP: "1.2.3.4",
					},
				}},
				taskNodes: []*taskNode{
					{
						ObjectMeta: v1.ObjectMeta{
							Name: "bsbx-0",
						},
					},
				},
				taskStatusCollector: func() taskStatusCollector {
					mock := NewMocktaskStatusCollector(ctl)
					mock.EXPECT().Collect(gomock.Any(), []string{"1.2.3.4"}).Return(map[string]*api.Task{"1.2.3.4": nil}).Times(1)
					return mock
				}(),
			},
			expectTaskNodes: []*taskNode{
				{
					ObjectMeta: v1.ObjectMeta{
						Name: "bsbx-0",
					},
				},
			},
		},
		{
			name: "recover successfully, client return running task via endpoint",
			fields: fields{
				allPods: []*corev1.Pod{{
					ObjectMeta: v1.ObjectMeta{
						Name: "test-0",
					},
					Status: corev1.PodStatus{
						PodIP: "1.2.3.4",
					},
				}},
				taskNodes: []*taskNode{testTaskNode},
				taskNodeByNameIndex: map[string]*taskNode{
					"bsbx-0": testTaskNode,
				},
				taskStatusCollector: func() taskStatusCollector {
					mock := NewMocktaskStatusCollector(ctl)
					mock.EXPECT().Collect(gomock.Any(), []string{"1.2.3.4"}).Return(map[string]*api.Task{"1.2.3.4": testTask}).Times(1)
					return mock
				}(),
			},
			expectTaskNodes: []*taskNode{
				recoveredTestTaskNode,
			},
		},
	}
	for i := range tests {
		tt := &tests[i]
		t.Run(tt.name, func(t *testing.T) {
			sch := &defaultTaskScheduler{
				freePods:            tt.fields.freePods,
				allPods:             tt.fields.allPods,
				taskNodes:           tt.fields.taskNodes,
				taskNodeByNameIndex: tt.fields.taskNodeByNameIndex,
				maxConcurrency:      tt.fields.maxConcurrency,
				taskStatusCollector: tt.fields.taskStatusCollector,
				logger:              testLogger,
			}
			if err := sch.recoverTaskNodesStatus(); (err != nil) != tt.wantErr {
				t.Errorf("defaultTaskScheduler.recoverTaskNodesStatus() error = %v, wantErr %v", err, tt.wantErr)
			}
			if tt.expectTaskNodes != nil {
				if !reflect.DeepEqual(tt.expectTaskNodes, sch.taskNodes) {
					t.Errorf("recoverTaskNodesStatus, want %+v, got %+v", tt.expectTaskNodes, sch.taskNodes)
				}
			}
		})
	}
}


================================================
FILE: kubernetes/internal/scheduler/status_collector.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package scheduler

import (
	"context"
	"sync"

	"github.com/go-logr/logr"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils"
	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

type taskClientCreator func(ip string) taskClient

func newTaskStatusCollector(creator taskClientCreator, logger logr.Logger) taskStatusCollector {
	return &defaultTaskStatusCollector{creator: creator, logger: logger}
}

// TODO error
type taskStatusCollector interface {
	Collect(ctx context.Context, ipList []string) map[string]*api.Task /*ip<->task*/
}

// TODO maybe cache
type defaultTaskStatusCollector struct {
	creator taskClientCreator
	logger  logr.Logger
}

func (s *defaultTaskStatusCollector) Collect(ctx context.Context, ipList []string) map[string]*api.Task {
	semaphore := make(chan struct{}, len(ipList))
	var wg sync.WaitGroup
	var mu sync.Mutex
	ret := make(map[string]*api.Task, len(ipList))
	for idx := range ipList {
		ip := ipList[idx]
		semaphore <- struct{}{}
		wg.Add(1)
		go func(ip string) {
			defer func() {
				<-semaphore
				wg.Done()
			}()
			ctx, cancel := context.WithTimeout(ctx, defaultTimeout)
			defer cancel()
			client := s.creator(ip)
			task, err := client.Get(ctx)
			if err != nil {
				s.logger.Error(err, "failed to GetTask", "ip", ip)
			} else if task != nil {
				mu.Lock()
				ret[ip] = task
				mu.Unlock()
			}
		}(ip)
	}
	wg.Wait()
	s.logger.Info("Collect task status", "result", utils.DumpJSON(ret))
	return ret
}


================================================
FILE: kubernetes/internal/scheduler/status_collector_mock.go
================================================
// Code generated by MockGen. DO NOT EDIT.
// Source: internal/task/scheduler/status_collector.go

// Package scheduler is a generated GoMock package.
package scheduler

import (
	context "context"
	reflect "reflect"

	gomock "github.com/golang/mock/gomock"

	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

// MocktaskStatusCollector is a mock of taskStatusCollector interface.
type MocktaskStatusCollector struct {
	ctrl     *gomock.Controller
	recorder *MocktaskStatusCollectorMockRecorder
}

// MocktaskStatusCollectorMockRecorder is the mock recorder for MocktaskStatusCollector.
type MocktaskStatusCollectorMockRecorder struct {
	mock *MocktaskStatusCollector
}

// NewMocktaskStatusCollector creates a new mock instance.
func NewMocktaskStatusCollector(ctrl *gomock.Controller) *MocktaskStatusCollector {
	mock := &MocktaskStatusCollector{ctrl: ctrl}
	mock.recorder = &MocktaskStatusCollectorMockRecorder{mock}
	return mock
}

// EXPECT returns an object that allows the caller to indicate expected use.
func (m *MocktaskStatusCollector) EXPECT() *MocktaskStatusCollectorMockRecorder {
	return m.recorder
}

// Collect mocks base method.
func (m *MocktaskStatusCollector) Collect(ctx context.Context, ipList []string) map[string]*api.Task {
	m.ctrl.T.Helper()
	ret := m.ctrl.Call(m, "Collect", ctx, ipList)
	ret0, _ := ret[0].(map[string]*api.Task)
	return ret0
}

// Collect indicates an expected call of Collect.
func (mr *MocktaskStatusCollectorMockRecorder) Collect(ctx, ipList interface{}) *gomock.Call {
	mr.mock.ctrl.T.Helper()
	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Collect", reflect.TypeOf((*MocktaskStatusCollector)(nil).Collect), ctx, ipList)
}


================================================
FILE: kubernetes/internal/scheduler/types.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package scheduler

type Task interface {
	GetName() string
	GetState() TaskState
	GetPodName() string
	// IsResourceReleased task resource is released
	// TODO func name is strange
	IsResourceReleased() bool
}

type TaskState string

const (
	RunningTaskState TaskState = "RUNNING"
	FailedTaskState  TaskState = "FAILED"
	SucceedTaskState TaskState = "SUCCEED"
	UnknownTaskState TaskState = "UNKNOWN"
)


================================================
FILE: kubernetes/internal/task-executor/config/config.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package config

import (
	"flag"
	"os"
	"path"
	"time"

	"gopkg.in/natefinch/lumberjack.v2"
	"k8s.io/klog/v2"
)

type Config struct {
	DataDir           string
	ListenAddr        string
	CRISocket         string
	ReadTimeout       time.Duration
	WriteTimeout      time.Duration
	ReconcileInterval time.Duration
	EnableSidecarMode bool
	MainContainerName string
	LogMaxSize        int
	LogMaxBackups     int
	LogMaxAge         int
	LogDir            string
}

func NewConfig() *Config {
	return &Config{
		DataDir:           "/var/lib/sandbox/tasks",
		ListenAddr:        "0.0.0.0:5758",
		CRISocket:         "/var/run/containerd/containerd.sock",
		ReadTimeout:       30 * time.Second,
		WriteTimeout:      30 * time.Second,
		ReconcileInterval: 500 * time.Millisecond,
		EnableSidecarMode: false,
		MainContainerName: "main",
		LogMaxSize:        100,
		LogMaxBackups:     10,
		LogMaxAge:         7,
		LogDir:            "logs",
	}
}

func (c *Config) LoadFromEnv() {
	if v := os.Getenv("DATA_DIR"); v != "" {
		c.DataDir = v
	}
	if v := os.Getenv("LISTEN_ADDR"); v != "" {
		c.ListenAddr = v
	}
	if v := os.Getenv("CRI_SOCKET"); v != "" {
		c.CRISocket = v
	}
	if v := os.Getenv("ENABLE_SIDECAR_MODE"); v == "true" {
		c.EnableSidecarMode = true
	}
	if v := os.Getenv("MAIN_CONTAINER_NAME"); v != "" {
		c.MainContainerName = v
	}
}

func (c *Config) LoadFromFlags() {
	flag.StringVar(&c.DataDir, "data-dir", c.DataDir, "data storage directory")
	flag.StringVar(&c.ListenAddr, "listen-addr", c.ListenAddr, "service listen address")
	flag.StringVar(&c.CRISocket, "cri-socket", c.CRISocket, "CRI socket path for container runner mode")
	flag.BoolVar(&c.EnableSidecarMode, "enable-sidecar-mode", c.EnableSidecarMode, "enable sidecar runner mode")
	flag.StringVar(&c.MainContainerName, "main-container-name", c.MainContainerName, "main container name")
	// set log flags
	flag.IntVar(&c.LogMaxSize, "log-max-size", c.LogMaxSize, "maximum log file size in MB")
	flag.IntVar(&c.LogMaxBackups, "log-max-backups", c.LogMaxBackups, "maximum number of log backup files")
	flag.IntVar(&c.LogMaxAge, "log-max-age", c.LogMaxAge, "maximum number of days to keep log files")
	flag.StringVar(&c.LogDir, "log-dir", c.LogDir, "log file directory")
	flag.Parse()
}

func (c *Config) InitKlog() error {
	logFile := path.Join(c.LogDir, "task-executor.log")
	fs := flag.NewFlagSet("klog", flag.ContinueOnError)
	klog.InitFlags(fs)
	fs.Set("logtostderr", "false")
	fs.Set("alsologtostderr", "false")
	fs.Set("stderrthreshold", "FATAL")
	fs.Set("one_output", "true")
	klog.SetOutput(&lumberjack.Logger{
		Filename:   logFile,
		MaxSize:    c.LogMaxSize,
		MaxBackups: c.LogMaxBackups,
		MaxAge:     c.LogMaxAge,
		Compress:   true,
	})
	return nil
}


================================================
FILE: kubernetes/internal/task-executor/manager/interface.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package manager

import (
	"context"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
)

// TaskManager defines the contract for managing tasks in memory.
type TaskManager interface {
	Create(ctx context.Context, task *types.Task) (*types.Task, error)
	// Sync synchronizes the current task list with the desired state.
	// It deletes tasks not in the desired list and creates new ones.
	// Returns the current task list after synchronization.
	Sync(ctx context.Context, desired []*types.Task) ([]*types.Task, error)

	Get(ctx context.Context, id string) (*types.Task, error)

	List(ctx context.Context) ([]*types.Task, error)

	Delete(ctx context.Context, id string) error

	Start(ctx context.Context)

	Stop()
}


================================================
FILE: kubernetes/internal/task-executor/manager/task_manager.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package manager

import (
	"context"
	"errors"
	"fmt"
	"reflect"
	"sync"
	"time"

	"k8s.io/klog/v2"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/config"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/runtime"
	store "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/storage"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
)

const (
	maxConcurrentTasks = 1
)

type taskManager struct {
	mu    sync.RWMutex
	tasks map[string]*types.Task // name -> task

	store    store.TaskStore
	executor runtime.Executor
	config   *config.Config

	stopping map[string]bool

	stopCh chan struct{}
	doneCh chan struct{}
}

// NewTaskManager creates a new task manager instance.
func NewTaskManager(cfg *config.Config, taskStore store.TaskStore, exec runtime.Executor) (TaskManager, error) {
	if cfg == nil {
		return nil, fmt.Errorf("config cannot be nil")
	}
	if taskStore == nil {
		return nil, fmt.Errorf("task store cannot be nil")
	}
	if exec == nil {
		return nil, fmt.Errorf("executor cannot be nil")
	}

	return &taskManager{
		tasks:    make(map[string]*types.Task),
		store:    taskStore,
		executor: exec,
		config:   cfg,
		stopping: make(map[string]bool),
		stopCh:   make(chan struct{}),
		doneCh:   make(chan struct{}),
	}, nil
}

// isTaskActive checks if the task is counting towards the concurrency limit
func (m *taskManager) isTaskActive(task *types.Task) bool {
	if task == nil {
		return false
	}
	if task.DeletionTimestamp != nil {
		return false
	}
	state := task.Status.State
	return state == types.TaskStatePending || state == types.TaskStateRunning
}

// countActiveTasks counts tasks that are active
func (m *taskManager) countActiveTasks() int {
	count := 0
	for _, task := range m.tasks {
		if m.isTaskActive(task) {
			count++
		}
	}
	return count
}

func (m *taskManager) Create(ctx context.Context, task *types.Task) (*types.Task, error) {
	if task == nil {
		return nil, fmt.Errorf("task cannot be nil")
	}
	if task.Name == "" {
		return nil, fmt.Errorf("task name cannot be empty")
	}

	m.mu.Lock()
	defer m.mu.Unlock()

	if _, exists := m.tasks[task.Name]; exists {
		return nil, fmt.Errorf("task %s already exists", task.Name)
	}

	if m.countActiveTasks() >= maxConcurrentTasks {
		return nil, fmt.Errorf("maximum concurrent tasks (%d) reached, cannot create new task", maxConcurrentTasks)
	}

	if err := m.store.Create(ctx, task); err != nil {
		return nil, fmt.Errorf("failed to persist task: %w", err)
	}

	if err := m.executor.Start(ctx, task); err != nil {
		if delErr := m.store.Delete(ctx, task.Name); delErr != nil {
			klog.ErrorS(delErr, "failed to rollback task creation", "name", task.Name)
		}
		return nil, fmt.Errorf("failed to start task: %w", err)
	}

	if status, err := m.executor.Inspect(ctx, task); err == nil {
		task.Status = *status
		// Persist the PID and initial status
		if err := m.store.Update(ctx, task); err != nil {
			klog.ErrorS(err, "failed to persist initial task status", "name", task.Name)
		}
	} else {
		klog.ErrorS(err, "failed to inspect task after start", "name", task.Name)
	}

	if task.Status.State == "" {
		task.Status.State = types.TaskStatePending
	}

	m.tasks[task.Name] = task

	klog.InfoS("task created successfully", "name", task.Name)
	return task, nil
}

// Sync synchronizes the current task list with the desired state
func (m *taskManager) Sync(ctx context.Context, desired []*types.Task) ([]*types.Task, error) {
	if desired == nil {
		return nil, fmt.Errorf("desired task list cannot be nil")
	}

	m.mu.Lock()
	defer m.mu.Unlock()

	desiredMap := make(map[string]*types.Task)
	for _, task := range desired {
		if task != nil && task.Name != "" {
			desiredMap[task.Name] = task
		}
	}

	var syncErrors []error

	for name, task := range m.tasks {
		if _, ok := desiredMap[name]; !ok {
			if err := m.softDeleteLocked(ctx, task); err != nil {
				klog.ErrorS(err, "failed to delete task during sync", "name", name)
				syncErrors = append(syncErrors, fmt.Errorf("failed to delete task %s: %w", name, err))
			}
		}
	}

	for name, task := range desiredMap {
		if _, exists := m.tasks[name]; !exists {
			if err := m.createTaskLocked(ctx, task); err != nil {
				klog.ErrorS(err, "failed to create task during sync", "name", name)
				syncErrors = append(syncErrors, fmt.Errorf("failed to create task %s: %w", name, err))
			}
		}
	}

	if len(syncErrors) > 0 {
		return m.listTasksLocked(), errors.Join(syncErrors...)
	}
	return m.listTasksLocked(), nil
}

func (m *taskManager) Get(ctx context.Context, name string) (*types.Task, error) {
	if name == "" {
		return nil, fmt.Errorf("task name cannot be empty")
	}

	m.mu.RLock()
	defer m.mu.RUnlock()

	task, exists := m.tasks[name]
	if !exists {
		return nil, fmt.Errorf("task %s not found", name)
	}

	return task, nil
}

func (m *taskManager) List(ctx context.Context) ([]*types.Task, error) {
	m.mu.RLock()
	defer m.mu.RUnlock()

	return m.listTasksLocked(), nil
}

// Delete removes a task by marking it for deletion
func (m *taskManager) Delete(ctx context.Context, name string) error {
	if name == "" {
		return fmt.Errorf("task name cannot be empty")
	}

	m.mu.Lock()
	defer m.mu.Unlock()

	task, exists := m.tasks[name]
	if !exists {
		return nil
	}

	return m.softDeleteLocked(ctx, task)
}

// softDeleteLocked marks a task for deletion
func (m *taskManager) softDeleteLocked(ctx context.Context, task *types.Task) error {
	if task.DeletionTimestamp != nil {
		return nil
	}

	now := time.Now()
	task.DeletionTimestamp = &now

	if err := m.store.Update(ctx, task); err != nil {
		return fmt.Errorf("failed to mark task for deletion: %w", err)
	}

	klog.InfoS("task marked for deletion", "name", task.Name)
	return nil
}

// Start initializes the manager, loads tasks from store, and starts the reconcile loop
func (m *taskManager) Start(ctx context.Context) {
	klog.InfoS("starting task manager")

	if err := m.recoverTasks(ctx); err != nil {
		klog.ErrorS(err, "failed to recover tasks from store")
	}

	go m.reconcileLoop(ctx)

	klog.InfoS("task manager started")
}

func (m *taskManager) Stop() {
	klog.InfoS("stopping task manager")
	close(m.stopCh)
	<-m.doneCh
	klog.InfoS("task manager stopped")
}

// createTaskLocked creates a task without acquiring the lock
func (m *taskManager) createTaskLocked(ctx context.Context, task *types.Task) error {
	if task == nil || task.Name == "" {
		return fmt.Errorf("invalid task")
	}

	if _, exists := m.tasks[task.Name]; exists {
		return fmt.Errorf("task %s already exists", task.Name)
	}

	if m.countActiveTasks() >= maxConcurrentTasks {
		return fmt.Errorf("maximum concurrent tasks (%d) reached, cannot create new task", maxConcurrentTasks)
	}

	if err := m.store.Create(ctx, task); err != nil {
		return fmt.Errorf("failed to persist task: %w", err)
	}

	if err := m.executor.Start(ctx, task); err != nil {
		m.store.Delete(ctx, task.Name)
		return fmt.Errorf("failed to start task: %w", err)
	}

	if status, err := m.executor.Inspect(ctx, task); err == nil {
		task.Status = *status
		// Persist the PID and initial status
		if err := m.store.Update(ctx, task); err != nil {
			klog.ErrorS(err, "failed to persist initial task status", "name", task.Name)
		}
	} else {
		klog.ErrorS(err, "failed to inspect task after start", "name", task.Name)
	}

	m.tasks[task.Name] = task
	return nil
}

// listTasksLocked returns all tasks without acquiring the lock
func (m *taskManager) listTasksLocked() []*types.Task {
	tasks := make([]*types.Task, 0, len(m.tasks))
	for _, task := range m.tasks {
		if task != nil {
			tasks = append(tasks, task)
		}
	}
	return tasks
}

func (m *taskManager) recoverTasks(ctx context.Context) error {
	klog.InfoS("recovering tasks from store")

	tasks, err := m.store.List(ctx)
	if err != nil {
		return fmt.Errorf("failed to list tasks from store: %w", err)
	}

	m.mu.Lock()
	defer m.mu.Unlock()

	for _, task := range tasks {
		if task == nil {
			continue
		}

		status, err := m.executor.Inspect(ctx, task)
		if err != nil {
			klog.ErrorS(err, "failed to inspect task during recovery", "name", task.Name)
			continue
		}

		task.Status = *status

		m.tasks[task.Name] = task

		klog.InfoS("recovered task", "name", task.Name, "state", task.Status.State, "deleting", task.DeletionTimestamp != nil)
	}

	klog.InfoS("task recovery completed", "count", len(m.tasks))
	return nil
}

func (m *taskManager) reconcileLoop(ctx context.Context) {
	ticker := time.NewTicker(m.config.ReconcileInterval)
	defer ticker.Stop()
	defer close(m.doneCh)

	for {
		select {
		case <-ticker.C:
			m.reconcileTasks(ctx)
		case <-m.stopCh:
			klog.InfoS("reconcile loop stopped")
			return
		case <-ctx.Done():
			klog.InfoS("reconcile loop context canceled")
			return
		}
	}
}

func (m *taskManager) reconcileTasks(ctx context.Context) {
	m.mu.Lock()
	defer m.mu.Unlock()

	var tasksToDelete []string

	for name, task := range m.tasks {
		if task == nil {
			continue
		}
		status, err := m.executor.Inspect(ctx, task)
		if err != nil {
			klog.ErrorS(err, "failed to inspect task", "name", name)
			continue
		}
		state := status.State

		shouldStop := false
		stopReason := ""

		if task.DeletionTimestamp != nil && !m.stopping[name] {
			if !isTerminalState(state) {
				shouldStop = true
				stopReason = "deletion requested"
			}
		} else if state == types.TaskStateTimeout && !m.stopping[name] {
			shouldStop = true
			stopReason = "timeout exceeded"
		}

		if shouldStop {
			klog.InfoS("stopping task", "name", name, "reason", stopReason, "current_state", state)
			m.stopping[name] = true

			go func(t *types.Task, taskName string) {
				defer func() {
					m.mu.Lock()
					delete(m.stopping, taskName)
					m.mu.Unlock()
				}()

				klog.V(1).InfoS("task stop initiated", "name", taskName, "reason", stopReason)
				if err := m.executor.Stop(ctx, t); err != nil {
					klog.ErrorS(err, "failed to stop task", "name", taskName)
				}
				klog.InfoS("task stopped", "name", taskName)
			}(task, name)
		}

		if task.DeletionTimestamp != nil && isTerminalState(state) {
			klog.InfoS("task terminated, finalizing deletion", "name", name)
			tasksToDelete = append(tasksToDelete, name)
		}

		if !m.stopping[name] {
			if !reflect.DeepEqual(task.Status, *status) {
				oldState := task.Status.State
				task.Status = *status
				// Log state changes only
				if oldState != status.State {
					klog.InfoS("task state changed", "name", name, "oldState", oldState, "newState", status.State)
				}
				if err := m.store.Update(ctx, task); err != nil {
					klog.ErrorS(err, "failed to update task status in store", "name", name)
				}
			}
		}
	}

	for _, name := range tasksToDelete {
		if _, exists := m.tasks[name]; !exists {
			continue
		}

		if err := m.store.Delete(ctx, name); err != nil {
			klog.ErrorS(err, "failed to delete task from store", "name", name)
			continue
		}

		delete(m.tasks, name)
		delete(m.stopping, name)
		klog.InfoS("task deleted successfully", "name", name)
	}
}

// isTerminalState returns true if the task will not transition to another state
func isTerminalState(state types.TaskState) bool {
	return state == types.TaskStateSucceeded ||
		state == types.TaskStateFailed ||
		state == types.TaskStateNotFound
}


================================================
FILE: kubernetes/internal/task-executor/manager/task_manager_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package manager

import (
	"context"
	"os/exec"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/config"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/runtime"
	store "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/storage"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

func setupTestManager(t *testing.T) (TaskManager, *config.Config) {
	cfg := &config.Config{
		DataDir:           t.TempDir(),
		EnableSidecarMode: false,
		ReconcileInterval: 100 * time.Millisecond,
	}

	taskStore, err := store.NewFileStore(cfg.DataDir)
	if err != nil {
		t.Fatalf("failed to create store: %v", err)
	}

	exec, err := runtime.NewProcessExecutor(cfg)
	if err != nil {
		t.Fatalf("failed to create executor: %v", err)
	}

	mgr, err := NewTaskManager(cfg, taskStore, exec)
	if err != nil {
		t.Fatalf("failed to create manager: %v", err)
	}

	return mgr, cfg
}

func cleanupTask(t *testing.T, mgr TaskManager, name string) {
	ctx := context.Background()
	mgr.Delete(ctx, name)
	deadline := time.Now().Add(5 * time.Second)
	for time.Now().Before(deadline) {
		_, err := mgr.Get(ctx, name)
		if err != nil {
			return
		}
		time.Sleep(100 * time.Millisecond)
	}
	t.Logf("Task %s not deleted within timeout during cleanup", name)
}

func TestNewTaskManager(t *testing.T) {
	cfg := &config.Config{
		DataDir: t.TempDir(),
	}
	taskStore, _ := store.NewFileStore(cfg.DataDir)
	exec, _ := runtime.NewProcessExecutor(cfg)

	tests := []struct {
		name     string
		cfg      *config.Config
		store    store.TaskStore
		executor runtime.Executor
		wantErr  bool
	}{
		{
			name:     "nil config",
			cfg:      nil,
			store:    taskStore,
			executor: exec,
			wantErr:  true,
		},
		{
			name:     "nil store",
			cfg:      cfg,
			store:    nil,
			executor: exec,
			wantErr:  true,
		},
		{
			name:     "nil executor",
			cfg:      cfg,
			store:    taskStore,
			executor: nil,
			wantErr:  true,
		},
		{
			name:     "valid parameters",
			cfg:      cfg,
			store:    taskStore,
			executor: exec,
			wantErr:  false,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			mgr, err := NewTaskManager(tt.cfg, tt.store, tt.executor)
			if (err != nil) != tt.wantErr {
				t.Errorf("NewTaskManager() error = %v, wantErr %v", err, tt.wantErr)
				return
			}
			if !tt.wantErr && mgr == nil {
				t.Error("NewTaskManager() returned nil manager")
			}
		})
	}
}

func TestTaskManager_Create(t *testing.T) {
	mgr, _ := setupTestManager(t)
	ctx := context.Background()

	tests := []struct {
		name    string
		task    *types.Task
		wantErr bool
	}{
		{
			name:    "nil task",
			task:    nil,
			wantErr: true,
		},
		{
			name: "empty task name",
			task: &types.Task{
				Name: "",
				Process: &api.Process{
					Command: []string{"echo", "test"},
				},
			},
			wantErr: true,
		},
		{
			name: "valid task",
			task: &types.Task{
				Name: "test-task",
				Process: &api.Process{
					Command: []string{"sh", "-c", "echo hello && exit 0"},
				},
			},
			wantErr: false,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			created, err := mgr.Create(ctx, tt.task)
			if (err != nil) != tt.wantErr {
				t.Errorf("Create() error = %v, wantErr %v", err, tt.wantErr)
				return
			}

			if !tt.wantErr {
				if created == nil {
					t.Error("Create() returned nil task")
				}
				if created != nil && created.Name != tt.task.Name {
					t.Errorf("Create() task name = %v, want %v", created.Name, tt.task.Name)
				}

				// Wait for task to complete naturally
				time.Sleep(200 * time.Millisecond)
				// Then clean up
				if tt.task != nil {
					mgr.Delete(ctx, tt.task.Name)
				}
			}
		})
	}
}

func TestTaskManager_CreateDuplicate(t *testing.T) {
	mgr, _ := setupTestManager(t)
	mgr.Start(context.Background())
	defer mgr.Stop()

	ctx := context.Background()

	task := &types.Task{
		Name: "duplicate-task",
		Process: &api.Process{
			Command: []string{"echo", "test"},
		},
	}

	// First create should succeed
	_, err := mgr.Create(ctx, task)
	if err != nil {
		t.Fatalf("First Create() failed: %v", err)
	}
	defer cleanupTask(t, mgr, task.Name)

	// Second create should fail
	_, err = mgr.Create(ctx, task)
	if err == nil {
		t.Error("Create() should fail for duplicate task")
	}
}

func TestTaskManager_CreateMaxConcurrentTasks(t *testing.T) {
	mgr, _ := setupTestManager(t)
	mgr.Start(context.Background())
	defer mgr.Stop()

	ctx := context.Background()

	task1 := &types.Task{
		Name: "task-1",
		Process: &api.Process{
			Command: []string{"sleep", "10"},
		},
	}

	// Create first task
	_, err := mgr.Create(ctx, task1)
	if err != nil {
		t.Fatalf("First Create() failed: %v", err)
	}
	defer cleanupTask(t, mgr, task1.Name)

	// Try to create second task - should fail due to max concurrent limit
	task2 := &types.Task{
		Name: "task-2",
		Process: &api.Process{
			Command: []string{"echo", "test"},
		},
	}

	_, err = mgr.Create(ctx, task2)
	if err == nil {
		t.Error("Create() should fail when max concurrent tasks reached")
		cleanupTask(t, mgr, task2.Name)
	}
}

func TestTaskManager_Get(t *testing.T) {
	mgr, _ := setupTestManager(t)
	mgr.Start(context.Background())
	defer mgr.Stop()

	ctx := context.Background()

	task := &types.Task{
		Name: "get-task",
		Process: &api.Process{
			Command: []string{"echo", "get"},
		},
	}

	// Create task
	_, err := mgr.Create(ctx, task)
	if err != nil {
		t.Fatalf("Create() failed: %v", err)
	}
	defer cleanupTask(t, mgr, task.Name)

	// Get task
	got, err := mgr.Get(ctx, task.Name)
	if err != nil {
		t.Fatalf("Get() failed: %v", err)
	}

	if got.Name != task.Name {
		t.Errorf("Get() name = %v, want %v", got.Name, task.Name)
	}
}

func TestTaskManager_GetNotFound(t *testing.T) {
	mgr, _ := setupTestManager(t)
	ctx := context.Background()

	_, err := mgr.Get(ctx, "non-existent")
	if err == nil {
		t.Error("Get() should fail for non-existent task")
	}
}

func TestTaskManager_GetEmptyName(t *testing.T) {
	mgr, _ := setupTestManager(t)
	ctx := context.Background()

	_, err := mgr.Get(ctx, "")
	if err == nil {
		t.Error("Get() should fail for empty name")
	}
}

func TestTaskManager_List(t *testing.T) {
	mgr, _ := setupTestManager(t)
	ctx := context.Background()

	// Initially empty
	tasks, err := mgr.List(ctx)
	if err != nil {
		t.Fatalf("List() failed: %v", err)
	}
	if len(tasks) != 0 {
		t.Errorf("List() initial count = %d, want 0", len(tasks))
	}

	// Create a task
	task := &types.Task{
		Name: "list-task",
		Process: &api.Process{
			Command: []string{"echo", "list"},
		},
	}

	_, err = mgr.Create(ctx, task)
	if err != nil {
		t.Fatalf("Create() failed: %v", err)
	}
	defer mgr.Delete(ctx, task.Name)

	// List should return 1 task
	tasks, err = mgr.List(ctx)
	if err != nil {
		t.Fatalf("List() failed: %v", err)
	}
	if len(tasks) != 1 {
		t.Errorf("List() count = %d, want 1", len(tasks))
	}
	if tasks[0].Name != task.Name {
		t.Errorf("List() task name = %v, want %v", tasks[0].Name, task.Name)
	}
}

func TestTaskManager_Delete(t *testing.T) {
	mgr, _ := setupTestManager(t)
	// Start the manager to enable the reconcile loop
	mgr.Start(context.Background())
	defer mgr.Stop()

	ctx := context.Background()

	task := &types.Task{
		Name: "delete-task",
		Process: &api.Process{
			Command: []string{"echo", "delete"},
		},
	}

	// Create task
	_, err := mgr.Create(ctx, task)
	if err != nil {
		t.Fatalf("Create() failed: %v", err)
	}

	// Delete task (soft delete)
	err = mgr.Delete(ctx, task.Name)
	if err != nil {
		t.Errorf("Delete() failed: %v", err)
	}

	// Verify task is marked for deletion but still exists
	got, err := mgr.Get(ctx, task.Name)
	if err != nil {
		t.Fatalf("Get() should succeed after Delete() (soft delete): %v", err)
	}
	if got.DeletionTimestamp == nil {
		t.Error("DeletionTimestamp should be set after Delete()")
	}

	// Wait for task to be finalized
	timeout := 5 * time.Second
	deadline := time.Now().Add(timeout)
	for time.Now().Before(deadline) {
		_, err := mgr.Get(ctx, task.Name)
		if err != nil {
			// Task is gone, success
			return
		}
		time.Sleep(100 * time.Millisecond)
	}
	t.Error("Task was not finalized (deleted) within timeout")
}

func TestTaskManager_DeleteNonExistent(t *testing.T) {
	mgr, _ := setupTestManager(t)
	ctx := context.Background()

	// Delete non-existent task should not error
	err := mgr.Delete(ctx, "non-existent")
	if err != nil {
		t.Errorf("Delete() should not fail for non-existent task: %v", err)
	}
}

func TestTaskManager_Sync(t *testing.T) {
	mgr, _ := setupTestManager(t)
	// Start the manager to enable the reconcile loop
	mgr.Start(context.Background())
	defer mgr.Stop()

	ctx := context.Background()

	// Create initial task
	task1 := &types.Task{
		Name: "sync-task-1",
		Process: &api.Process{
			Command: []string{"echo", "1"},
		},
	}

	_, err := mgr.Create(ctx, task1)
	if err != nil {
		t.Fatalf("Create() failed: %v", err)
	}

	// Sync with new desired state (task1 removed, task2 added)
	task2 := &types.Task{
		Name: "sync-task-2",
		Process: &api.Process{
			Command: []string{"echo", "2"},
		},
	}

	// Sync triggers soft delete for task1 and creation of task2
	current, err := mgr.Sync(ctx, []*types.Task{task2})
	if err != nil {
		t.Fatalf("Sync() failed: %v", err)
	}
	defer mgr.Delete(ctx, task2.Name)

	// Verify task1 is marked for deletion in the returned list
	var task1Found bool
	for _, t1 := range current {
		if t1.Name == task1.Name {
			task1Found = true
			if t1.DeletionTimestamp == nil {
				t.Error("task1 should be marked for deletion after Sync()")
			}
		}
	}
	if !task1Found {
		// It's possible it was deleted super fast, but unlikely
		t.Log("task1 not found in Sync result (maybe already deleted?)")
	}

	// Verify task2 is created
	var task2Found bool
	for _, t2 := range current {
		if t2.Name == task2.Name {
			task2Found = true
		}
	}
	if !task2Found {
		t.Error("task2 should be present after Sync()")
	}

	// Wait for task1 to be finalized
	timeout := 5 * time.Second
	deadline := time.Now().Add(timeout)
	for time.Now().Before(deadline) {
		_, err := mgr.Get(ctx, task1.Name)
		if err != nil {
			// Task is gone, success
			return
		}
		time.Sleep(100 * time.Millisecond)
	}
	t.Error("task1 should be deleted after Sync()")
}

func TestTaskManager_SyncNil(t *testing.T) {
	mgr, _ := setupTestManager(t)
	ctx := context.Background()

	_, err := mgr.Sync(ctx, nil)
	if err == nil {
		t.Error("Sync() should fail for nil desired list")
	}
}

func TestTaskManager_AsyncStopOnDelete(t *testing.T) {
	mgr, _ := setupTestManager(t)
	mgr.Start(context.Background())
	defer mgr.Stop()

	ctx := context.Background()

	timeoutSec := int64(30)
	task := &types.Task{
		Name: "long-running-task",
		Process: &api.Process{
			Command:        []string{"sleep", "30"},
			TimeoutSeconds: &timeoutSec,
		},
	}

	// Create task
	created, err := mgr.Create(ctx, task)
	if err != nil {
		t.Fatalf("Create() failed: %v", err)
	}
	defer cleanupTask(t, mgr, task.Name)

	// Verify task is running
	assert.Equal(t, types.TaskStateRunning, created.Status.State)

	// Record the time before delete
	beforeDelete := time.Now()

	// Delete task (should trigger async stop)
	err = mgr.Delete(ctx, task.Name)
	if err != nil {
		t.Fatalf("Delete() failed: %v", err)
	}

	// Verify DeletionTimestamp is set immediately (soft delete)
	got, err := mgr.Get(ctx, task.Name)
	if err != nil {
		t.Fatalf("Get() after Delete failed: %v", err)
	}
	if got.DeletionTimestamp == nil {
		t.Error("DeletionTimestamp should be set immediately after Delete()")
	}

	// Verify Delete returned quickly (not blocked by Stop)
	deleteDuration := time.Since(beforeDelete)
	if deleteDuration > 500*time.Millisecond {
		t.Errorf("Delete() took too long (%v), should be fast (async stop)", deleteDuration)
	}

	// Wait for task to be finalized
	deadline := time.Now().Add(15 * time.Second)
	for time.Now().Before(deadline) {
		_, err := mgr.Get(ctx, task.Name)
		if err != nil {
			// Task is gone, success
			return
		}
		time.Sleep(100 * time.Millisecond)
	}
	t.Error("Task was not finalized within timeout after async stop")
}

func TestTaskManager_TimeoutHandling(t *testing.T) {
	if _, err := exec.LookPath("sh"); err != nil {
		t.Skip("sh not found, skipping timeout test")
	}

	mgr, _ := setupTestManager(t)
	mgr.Start(context.Background())
	defer mgr.Stop()

	ctx := context.Background()

	// Create task with short timeout
	timeoutSec := int64(2)
	task := &types.Task{
		Name: "timeout-task",
		Process: &api.Process{
			Command:        []string{"sleep", "30"},
			TimeoutSeconds: &timeoutSec,
		},
	}

	_, err := mgr.Create(ctx, task)
	if err != nil {
		t.Fatalf("Create() failed: %v", err)
	}
	defer cleanupTask(t, mgr, task.Name)

	// Wait for timeout to be detected and async stop triggered
	time.Sleep(3 * time.Second)

	// Check task status - should be Timeout or Failed (after stop)
	got, err := mgr.Get(ctx, task.Name)
	if err != nil {
		t.Fatalf("Get() failed: %v", err)
	}

	// State should be Timeout (during stop) or Failed (after stop completes)
	if got.Status.State != types.TaskStateTimeout && got.Status.State != types.TaskStateFailed {
		t.Errorf("Expected Timeout or Failed state, got: %s", got.Status.State)
	}

	// If in Timeout state, verify reason
	if got.Status.State == types.TaskStateTimeout {
		assert.NotEmpty(t, got.Status.SubStatuses)
		assert.Equal(t, "TaskTimeout", got.Status.SubStatuses[0].Reason)
	}

	// Wait for final state
	deadline := time.Now().Add(15 * time.Second)
	for time.Now().Before(deadline) {
		got, err := mgr.Get(ctx, task.Name)
		if err != nil {
			// Task was deleted, that's also acceptable
			return
		}
		if got.Status.State == types.TaskStateFailed {
			// Stop completed
			return
		}
		time.Sleep(200 * time.Millisecond)
	}
}

func TestTaskManager_CountActiveTasks(t *testing.T) {
	mgr, _ := setupTestManager(t)
	mgr.Start(context.Background())
	defer mgr.Stop()
	ctx := context.Background()

	// Initially empty
	activeCount := mgr.(*taskManager).countActiveTasks()
	if activeCount != 0 {
		t.Errorf("Initial active count = %d, want 0", activeCount)
	}

	// Create a short-lived task that will complete quickly
	task1 := &types.Task{
		Name: "quick-task-1",
		Process: &api.Process{
			Command: []string{"echo", "done"},
		},
	}
	_, err := mgr.Create(ctx, task1)
	if err != nil {
		t.Fatalf("Create() failed: %v", err)
	}
	defer mgr.Delete(ctx, task1.Name)

	// Wait for task1 to complete
	time.Sleep(500 * time.Millisecond)

	// Should have 0 active tasks after task1 completes
	activeCount = mgr.(*taskManager).countActiveTasks()
	if activeCount != 0 {
		t.Errorf("Active count after task1 completion = %d, want 0", activeCount)
	}

	// Create a running task
	task2 := &types.Task{
		Name: "active-task-2",
		Process: &api.Process{
			Command: []string{"sleep", "5"},
		},
	}
	_, err = mgr.Create(ctx, task2)
	if err != nil {
		t.Fatalf("Create() failed: %v", err)
	}
	defer mgr.Delete(ctx, task2.Name)

	// Should have 1 active task
	activeCount = mgr.(*taskManager).countActiveTasks()
	if activeCount != 1 {
		t.Errorf("Active count after create = %d, want 1", activeCount)
	}
}

func TestIsTerminalState(t *testing.T) {
	tests := []struct {
		name     string
		state    types.TaskState
		expected bool
	}{
		{"Succeeded is terminal", types.TaskStateSucceeded, true},
		{"Failed is terminal", types.TaskStateFailed, true},
		{"NotFound is terminal", types.TaskStateNotFound, true},
		{"Pending is not terminal", types.TaskStatePending, false},
		{"Running is not terminal", types.TaskStateRunning, false},
		{"Unknown is not terminal", types.TaskStateUnknown, false},
		{"Timeout is not terminal", types.TaskStateTimeout, false},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got := isTerminalState(tt.state)
			if got != tt.expected {
				t.Errorf("isTerminalState(%v) = %v, want %v", tt.state, got, tt.expected)
			}
		})
	}
}


================================================
FILE: kubernetes/internal/task-executor/runtime/composite.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"context"
	"fmt"

	"k8s.io/klog/v2"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/config"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
)

func NewExecutor(cfg *config.Config) (Executor, error) {
	if cfg == nil {
		return nil, fmt.Errorf("config cannot be nil")
	}

	procExec, err := NewProcessExecutor(cfg)
	if err != nil {
		return nil, fmt.Errorf("failed to create process executor: %w", err)
	}
	klog.InfoS("process executor initialized", "enableSidecar", cfg.EnableSidecarMode, "mainContainer", cfg.MainContainerName)

	containerExec, err := newContainerExecutor(cfg)
	if err != nil {
		return nil, fmt.Errorf("failed to create container executor: %w", err)
	}

	return &compositeExecutor{
		processExec:   procExec,
		containerExec: containerExec,
	}, nil
}

// compositeExecutor dispatches tasks to the appropriate underlying executor
type compositeExecutor struct {
	processExec   Executor
	containerExec Executor
}

func (e *compositeExecutor) getDelegate(task *types.Task) (Executor, error) {
	if task == nil {
		return nil, fmt.Errorf("task cannot be nil")
	}
	executor := e.processExec
	if task.Process == nil {
		executor = e.containerExec
	}
	if executor == nil {
		return nil, fmt.Errorf("no executor available for task: %s", task.Name)
	}
	return executor, nil
}

func (e *compositeExecutor) Start(ctx context.Context, task *types.Task) error {
	delegate, err := e.getDelegate(task)
	if err != nil {
		return err
	}
	return delegate.Start(ctx, task)
}

func (e *compositeExecutor) Inspect(ctx context.Context, task *types.Task) (*types.Status, error) {
	delegate, err := e.getDelegate(task)
	if err != nil {
		return nil, err
	}
	return delegate.Inspect(ctx, task)
}

func (e *compositeExecutor) Stop(ctx context.Context, task *types.Task) error {
	delegate, err := e.getDelegate(task)
	if err != nil {
		return err
	}
	return delegate.Stop(ctx, task)
}


================================================
FILE: kubernetes/internal/task-executor/runtime/container.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"context"
	"errors"
	"fmt"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/config"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
)

type containerExecutor struct {
	config *config.Config
}

// newContainerExecutor creates a new container-based task executor.
// This is a placeholder implementation - container mode is not yet supported.
func newContainerExecutor(cfg *config.Config) (Executor, error) {
	if cfg == nil {
		return nil, fmt.Errorf("config cannot be nil")
	}

	return &containerExecutor{
		config: cfg,
	}, nil
}

// Start is not implemented for container mode yet.
func (e *containerExecutor) Start(ctx context.Context, task *types.Task) error {
	return errors.New("container mode is not implemented yet - use process mode instead")
}

// Inspect is not implemented for container mode yet.
func (e *containerExecutor) Inspect(ctx context.Context, task *types.Task) (*types.Status, error) {
	return nil, errors.New("container mode is not implemented yet - use process mode instead")
}

// Stop is not implemented for container mode yet.
func (e *containerExecutor) Stop(ctx context.Context, task *types.Task) error {
	return errors.New("container mode is not implemented yet - use process mode instead")
}


================================================
FILE: kubernetes/internal/task-executor/runtime/interface.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"context"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
)

// Executor defines the contract for running tasks across different modes.
type Executor interface {
	Start(ctx context.Context, task *types.Task) error
	// Inspect retrieves the current runtime state.
	Inspect(ctx context.Context, task *types.Task) (*types.Status, error)

	Stop(ctx context.Context, task *types.Task) error
}


================================================
FILE: kubernetes/internal/task-executor/runtime/process.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"context"
	"fmt"
	"os"
	"os/exec"
	"path/filepath"
	"strconv"
	"strings"
	"syscall"
	"time"

	"k8s.io/klog/v2"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/config"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/utils"
)

const (
	ExitFile   = "exit"
	PidFile    = "pid"
	StdoutFile = "stdout.log"
	StderrFile = "stderr.log"
)

// processExecutor handles both Host and Sidecar modes as they share the same
// shim-based process execution model.
type processExecutor struct {
	config  *config.Config
	rootDir string
}

func NewProcessExecutor(config *config.Config) (Executor, error) {
	return &processExecutor{rootDir: config.DataDir, config: config}, nil
}

func (e *processExecutor) Start(ctx context.Context, task *types.Task) error {
	if task == nil {
		return fmt.Errorf("task cannot be nil")
	}
	taskDir, err := utils.SafeJoin(e.rootDir, task.Name)
	if err != nil {
		return fmt.Errorf("invalid task name: %w", err)
	}
	pidPath := filepath.Join(taskDir, PidFile)
	exitPath := filepath.Join(taskDir, ExitFile)

	var cmdList []string
	if task.Process != nil {
		cmdList = append(task.Process.Command, task.Process.Args...)
	} else {
		return fmt.Errorf("process spec is required for process executor but task.Process is nil (task name: %s)", task.Name)
	}

	if len(cmdList) == 0 {
		return fmt.Errorf("no command specified in process spec (task name: %s)", task.Name)
	}

	safeCmdStr := shellEscape(cmdList)
	shimScript := e.buildShimScript(exitPath, safeCmdStr)

	var cmd *exec.Cmd

	if e.config.EnableSidecarMode {
		targetPID, err := e.findPidByEnvVar("SANDBOX_MAIN_CONTAINER", e.config.MainContainerName)
		if err != nil {
			return fmt.Errorf("failed to resolve target PID: %w", err)
		}

		targetEnv, err := getProcEnviron(targetPID)
		if err != nil {
			return fmt.Errorf("failed to read target process environment: %w", err)
		}

		nsenterArgs := []string{
			"-t", strconv.Itoa(targetPID),
			"--mount", "--uts", "--ipc", "--net", "--pid",
			"--",
			"/bin/sh", "-c", shimScript,
		}
		cmd = exec.Command("nsenter", nsenterArgs...)
		cmd.Env = targetEnv
		klog.InfoS("Starting sidecar task", "id", task.Name, "targetPID", targetPID)

	} else {
		cmd = exec.Command("/bin/sh", "-c", shimScript)
		cmd.Env = os.Environ()
		klog.InfoS("Starting host task", "name", task.Name, "cmd", safeCmdStr, "exitPath", exitPath)
	}

	cmd.SysProcAttr = &syscall.SysProcAttr{
		Setpgid: true,
		Pgid:    0,
	}

	return e.executeCommand(task, cmd, pidPath)
}

// executeCommand handles log setup and process starting
func (e *processExecutor) executeCommand(task *types.Task, cmd *exec.Cmd, pidPath string) error {
	if task == nil || cmd == nil {
		return fmt.Errorf("task and cmd cannot be nil")
	}

	taskDir, err := utils.SafeJoin(e.rootDir, task.Name)
	if err != nil {
		return fmt.Errorf("invalid task name: %w", err)
	}

	stdoutPath := filepath.Join(taskDir, StdoutFile)
	stderrPath := filepath.Join(taskDir, StderrFile)

	stdoutFile, err := os.OpenFile(stdoutPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
	if err != nil {
		return fmt.Errorf("failed to open stdout: %w", err)
	}

	stderrFile, err := os.OpenFile(stderrPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
	if err != nil {
		stdoutFile.Close()
		return fmt.Errorf("failed to open stderr: %w", err)
	}

	cmd.Stdout = stdoutFile
	cmd.Stderr = stderrFile

	if task.Process != nil {
		for _, env := range task.Process.Env {
			if env.Name != "" {
				cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", env.Name, env.Value))
			}
		}

		if task.Process.WorkingDir != "" {
			cmd.Dir = task.Process.WorkingDir
			klog.InfoS("Set working directory", "name", task.Name, "workingDir", task.Process.WorkingDir)
		}
	}

	if err := cmd.Start(); err != nil {
		klog.ErrorS(err, "failed to start command", "name", task.Name)
		stdoutFile.Close()
		stderrFile.Close()
		return fmt.Errorf("failed to start cmd: %w", err)
	}

	// Write PID to file immediately (Host-side PID)
	// This fixes the issue where sidecar tasks would write the container-internal PID
	pid := cmd.Process.Pid
	if err := os.WriteFile(pidPath, []byte(strconv.Itoa(pid)), 0644); err != nil {
		klog.ErrorS(err, "failed to write pid file", "name", task.Name)
		_ = cmd.Process.Kill()
		stdoutFile.Close()
		stderrFile.Close()
		return fmt.Errorf("failed to write pid file: %w", err)
	}

	klog.InfoS("Task command started successfully", "name", task.Name, "pid", pid)

	stdoutFile.Close()
	stderrFile.Close()

	go func() {
		if err := cmd.Wait(); err != nil {
			klog.ErrorS(err, "task process exited with error", "name", task.Name)
		} else {
			klog.InfoS("task process exited successfully", "name", task.Name)
		}
	}()
	return nil
}

func (e *processExecutor) buildShimScript(exitPath, cmdStr string) string {
	// The shim script acts as a mini-init process.
	// 1. It runs the user command in the background.
	// 2. It traps SIGTERM and forwards it to the child process.
	// 3. It waits for the child to exit and captures the exit code.
	// This ensures graceful shutdown propagation in sidecar/host modes.
	script := fmt.Sprintf(`
cleanup() {
    if [ -n "$CHILD_PID" ]; then
        kill -TERM "$CHILD_PID" 2>/dev/null
    fi
}
trap cleanup TERM

%s &
CHILD_PID=$!
wait "$CHILD_PID"
EXIT_CODE=$?

printf "%%d" $EXIT_CODE > %s
exit $EXIT_CODE
`, cmdStr, shellEscapePath(exitPath))
	klog.InfoS("Generated shim script", "exitPath", exitPath, "script", script)
	return script
}

func (e *processExecutor) Inspect(ctx context.Context, task *types.Task) (*types.Status, error) {
	taskDir, err := utils.SafeJoin(e.rootDir, task.Name)
	if err != nil {
		return nil, fmt.Errorf("invalid task name: %w", err)
	}
	exitPath := filepath.Join(taskDir, ExitFile)
	pidPath := filepath.Join(taskDir, PidFile)

	status := &types.Status{
		State: types.TaskStateUnknown,
	}
	subStatus := types.SubStatus{}
	var pid int
	if exitData, err := os.ReadFile(exitPath); err == nil {
		fileInfo, _ := os.Stat(exitPath)
		exitCode, _ := strconv.Atoi(string(exitData))

		subStatus.ExitCode = exitCode
		finishedAt := fileInfo.ModTime()
		subStatus.FinishedAt = &finishedAt

		if exitCode == 0 {
			status.State = types.TaskStateSucceeded
			subStatus.Reason = "Succeeded"
		} else {
			status.State = types.TaskStateFailed
			subStatus.Reason = "Failed"
		}

		if pidFileInfo, err := os.Stat(pidPath); err == nil {
			startedAt := pidFileInfo.ModTime()
			subStatus.StartedAt = &startedAt
		}

		status.SubStatuses = []types.SubStatus{subStatus}
		return status, nil
	}

	if pidData, err := os.ReadFile(pidPath); err == nil {
		pid, _ = strconv.Atoi(strings.TrimSpace(string(pidData)))
		fileInfo, _ := os.Stat(pidPath)
		startedAt := fileInfo.ModTime()
		subStatus.StartedAt = &startedAt

		if isProcessRunning(pid) {
			status.State = types.TaskStateRunning
			if task.Process != nil && task.Process.TimeoutSeconds != nil {
				timeout := time.Duration(*task.Process.TimeoutSeconds) * time.Second
				elapsed := time.Since(startedAt)
				if elapsed > timeout {
					status.State = types.TaskStateTimeout
					subStatus.Reason = "TaskTimeout"
					subStatus.Message = fmt.Sprintf("Task exceeded timeout of %d seconds", *task.Process.TimeoutSeconds)
				}
			}
		} else {
			status.State = types.TaskStateFailed
			subStatus.ExitCode = 137
			subStatus.Reason = "ProcessCrashed"
			subStatus.Message = "Process exited without writing exit code"
			subStatus.FinishedAt = &startedAt
		}
		status.SubStatuses = []types.SubStatus{subStatus}
		return status, nil
	}

	status.State = types.TaskStatePending
	subStatus.Reason = "Pending"
	status.SubStatuses = []types.SubStatus{subStatus}

	return status, nil
}

func (e *processExecutor) Stop(ctx context.Context, task *types.Task) error {
	taskDir, err := utils.SafeJoin(e.rootDir, task.Name)
	if err != nil {
		return fmt.Errorf("invalid task name: %w", err)
	}
	pidPath := filepath.Join(taskDir, PidFile)
	pidData, err := os.ReadFile(pidPath)
	if err != nil {
		return nil
	}
	var pid int
	pid, err = strconv.Atoi(strings.TrimSpace(string(pidData)))
	if err != nil || pid == 0 {
		return nil
	}
	klog.InfoS("Read PID from pid file", "name", task.Name, "pid", pid)

	pgid := -pid

	targetPID := 0
	if e.config.EnableSidecarMode {
		children, err := getChildrenPIDs(pid)
		if err == nil && len(children) > 0 {
			targetPID = children[0]
			klog.InfoS("Sidecar mode: targeted Shim process via /proc/children", "nsenterPID", pid, "shimPID", targetPID)
		} else {
			klog.Warning("Sidecar mode: failed to find child process via /proc/children, falling back to PGID", "pid", pid, "err", err)
		}
	} else {
		targetPID = pid
	}

	killedShim := false
	if targetPID > 0 {
		if err := syscall.Kill(targetPID, syscall.SIGTERM); err == nil {
			killedShim = true
		} else if err != syscall.ESRCH {
			klog.ErrorS(err, "Failed to send SIGTERM to target process", "targetPID", targetPID)
		}
	}

	if !killedShim {
		_ = syscall.Kill(pgid, syscall.SIGTERM)
	}

	timeout := 10 * time.Second
	deadline := time.Now().Add(timeout)
	for time.Now().Before(deadline) {
		if !isProcessRunning(pid) {
			return nil
		}
		time.Sleep(500 * time.Millisecond)
	}

	klog.InfoS("Process did not exit after timeout, sending SIGKILL", "pgid", pgid)
	if targetPID > 0 {
		_ = syscall.Kill(targetPID, syscall.SIGKILL)
	}
	_ = syscall.Kill(pgid, syscall.SIGKILL)

	return nil
}

// getChildrenPIDs reads /proc/<pid>/task/<pid>/children to find direct children
func getChildrenPIDs(pid int) ([]int, error) {
	path := fmt.Sprintf("/proc/%d/task/%d/children", pid, pid)
	data, err := os.ReadFile(path)
	if err != nil {
		return nil, err
	}

	var pids []int
	for _, field := range strings.Fields(string(data)) {
		if id, err := strconv.Atoi(field); err == nil {
			pids = append(pids, id)
		}
	}
	return pids, nil
}

func isProcessRunning(pid int) bool {
	process, err := os.FindProcess(pid)
	if err != nil {
		return false
	}
	return process.Signal(syscall.Signal(0)) == nil
}

// shellEscape quotes arguments for safe shell execution
func shellEscape(args []string) string {
	quoted := make([]string, len(args))
	for i, s := range args {
		quoted[i] = shellEscapePath(s)
	}
	return strings.Join(quoted, " ")
}

// shellEscapePath escapes a single string for safe shell execution.
// It wraps the string in single quotes and escapes any embedded single quotes.
// e.g., foo'bar -> 'foo'\”bar'
func shellEscapePath(s string) string {
	return "'" + strings.ReplaceAll(s, "'", "'\\''") + "'"
}

// findPidByEnvVar finds a process by checking for a specific environment variable
func (e *processExecutor) findPidByEnvVar(envName, expectedValue string) (int, error) {
	procDir, err := os.Open("/proc")
	if err != nil {
		return 0, fmt.Errorf("failed to open /proc: %w", err)
	}
	defer procDir.Close()

	entries, err := procDir.Readdirnames(-1)
	if err != nil {
		return 0, fmt.Errorf("failed to read /proc entries: %w", err)
	}

	selfPID := os.Getpid()
	targetEnv := fmt.Sprintf("%s=%s", envName, expectedValue)

	for _, entry := range entries {
		pid, err := strconv.Atoi(entry)
		if err != nil {
			continue
		}
		if pid == selfPID {
			continue
		}

		// Read process environment
		envPath := filepath.Join("/proc", entry, "environ")
		envData, err := os.ReadFile(envPath)
		if err != nil {
			continue
		}

		// Environment variables are null-separated
		envVars := strings.Split(string(envData), "\x00")
		for _, env := range envVars {
			if env == targetEnv {
				klog.InfoS("Found main container by environment variable", "pid", pid, "env", targetEnv)
				return pid, nil
			}
		}
	}

	return 0, fmt.Errorf("no process found with environment variable %s=%s", envName, expectedValue)
}

// getProcEnviron reads environment variables from /proc/<pid>/environ
func getProcEnviron(pid int) ([]string, error) {
	envPath := filepath.Join("/proc", strconv.Itoa(pid), "environ")
	data, err := os.ReadFile(envPath)
	if err != nil {
		return nil, err
	}

	// Environment variables in /proc/<pid>/environ are separated by null bytes
	var envs []string
	for _, env := range strings.Split(string(data), "\x00") {
		if len(env) > 0 {
			envs = append(envs, env)
		}
	}
	return envs, nil
}


================================================
FILE: kubernetes/internal/task-executor/runtime/process_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package runtime

import (
	"context"
	"os"
	"os/exec"
	"path/filepath"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
	corev1 "k8s.io/api/core/v1"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/config"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/utils"
	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

func setupTestExecutor(t *testing.T) (Executor, string) {
	dataDir := t.TempDir()
	cfg := &config.Config{
		DataDir:           dataDir,
		EnableSidecarMode: false,
	}
	executor, err := NewProcessExecutor(cfg)
	if err != nil {
		t.Fatalf("Failed to create executor: %v", err)
	}
	return executor, dataDir
}

func TestProcessExecutor_Lifecycle(t *testing.T) {
	// Skip if not running on Linux/Unix-like systems where sh is available
	if _, err := exec.LookPath("sh"); err != nil {
		t.Skip("sh not found, skipping process executor test")
	}

	executor, _ := setupTestExecutor(t)
	pExecutor := executor.(*processExecutor)
	ctx := context.Background()

	// 1. Create a task that runs for a while
	task := &types.Task{
		Name: "long-running",
		Process: &api.Process{
			Command: []string{"/bin/sh", "-c", "sleep 10"},
		},
	}

	// Create task directory manually (normally handled by store)

	taskDir, err := utils.SafeJoin(pExecutor.rootDir, task.Name)
	assert.Nil(t, err)
	os.MkdirAll(taskDir, 0755)

	// 2. Start
	if err := executor.Start(ctx, task); err != nil {
		t.Fatalf("Start failed: %v", err)
	}

	// 3. Inspect (Running)
	status, err := executor.Inspect(ctx, task)
	if err != nil {
		t.Fatalf("Inspect failed: %v", err)
	}
	if status.State != types.TaskStateRunning {
		t.Errorf("Task should be running, got: %s", status.State)
	}

	// 4. Stop
	if err := executor.Stop(ctx, task); err != nil {
		t.Fatalf("Stop failed: %v", err)
	}

	// 5. Inspect (Terminated)
	// Wait a bit for file to be written
	time.Sleep(100 * time.Millisecond)
	status, err = executor.Inspect(ctx, task)
	if err != nil {
		t.Fatalf("Inspect failed: %v", err)
	}
	// sleep command killed by signal results in non-zero exit code, so it's Failed
	if status.State != types.TaskStateFailed {
		t.Errorf("Task should be failed (terminated), got: %s", status.State)
	}
}

func TestProcessExecutor_ShortLived(t *testing.T) {
	if _, err := exec.LookPath("sh"); err != nil {
		t.Skip("sh not found")
	}

	executor, _ := setupTestExecutor(t)
	pExecutor := executor.(*processExecutor)
	ctx := context.Background()

	task := &types.Task{
		Name: "short-lived",
		Process: &api.Process{
			Command: []string{"echo", "done"},
		},
	}
	taskDir, err := utils.SafeJoin(pExecutor.rootDir, task.Name)
	assert.Nil(t, err)
	os.MkdirAll(taskDir, 0755)

	if err := executor.Start(ctx, task); err != nil {
		t.Fatalf("Start failed: %v", err)
	}

	// Wait for process to finish
	time.Sleep(200 * time.Millisecond)

	status, err := executor.Inspect(ctx, task)
	if err != nil {
		t.Fatalf("Inspect failed: %v", err)
	}
	if status.State != types.TaskStateSucceeded {
		t.Errorf("Task should be succeeded, got: %s", status.State)
	}
	assert.NotEmpty(t, status.SubStatuses)
	if status.SubStatuses[0].ExitCode != 0 {
		t.Errorf("Exit code should be 0, got %d", status.SubStatuses[0].ExitCode)
	}
}

func TestProcessExecutor_Failure(t *testing.T) {
	if _, err := exec.LookPath("sh"); err != nil {
		t.Skip("sh not found")
	}

	executor, _ := setupTestExecutor(t)
	pExecutor := executor.(*processExecutor)
	ctx := context.Background()

	task := &types.Task{
		Name: "failing-task",
		Process: &api.Process{
			Command: []string{"/bin/sh", "-c", "exit 1"},
		},
	}
	taskDir, err := utils.SafeJoin(pExecutor.rootDir, task.Name)
	assert.Nil(t, err)
	os.MkdirAll(taskDir, 0755)

	if err := executor.Start(ctx, task); err != nil {
		t.Fatalf("Start failed: %v", err)
	}

	time.Sleep(200 * time.Millisecond)

	status, err := executor.Inspect(ctx, task)
	if err != nil {
		t.Fatalf("Inspect failed: %v", err)
	}
	if status.State != types.TaskStateFailed {
		t.Errorf("Task should be failed")
	}
	assert.NotEmpty(t, status.SubStatuses)
	if status.SubStatuses[0].ExitCode != 1 {
		t.Errorf("Exit code should be 1, got %d", status.SubStatuses[0].ExitCode)
	}
}

func TestProcessExecutor_InvalidArgs(t *testing.T) {
	exec, _ := setupTestExecutor(t)
	ctx := context.Background()

	// Nil task
	if err := exec.Start(ctx, nil); err == nil {
		t.Error("Start should fail with nil task")
	}

	// Missing process spec
	task := &types.Task{
		Name:    "invalid",
		Process: &api.Process{},
	}
	if err := exec.Start(ctx, task); err == nil {
		t.Error("Start should fail with missing process spec")
	}
}

func TestShellEscape(t *testing.T) {
	tests := []struct {
		input    []string
		expected string
	}{
		{[]string{"echo", "hello"}, "'echo' 'hello'"},
		{[]string{"echo", "hello world"}, "'echo' 'hello world'"},
		{[]string{"foo'bar"}, "'foo'\\''bar'"},
	}

	for _, tt := range tests {
		got := shellEscape(tt.input)
		if got != tt.expected {
			t.Errorf("shellEscape(%v) = %q, want %q", tt.input, got, tt.expected)
		}
	}
}

func TestNewExecutor(t *testing.T) {
	// 1. Container mode + Host Mode
	cfg := &config.Config{}
	e, err := NewExecutor(cfg)
	if err != nil {
		t.Fatalf("NewExecutor(container) failed: %v", err)
	}
	if _, ok := e.(*compositeExecutor); !ok {
		t.Error("NewExecutor should return CompositeExecutor")
	}

	// 2. Process mode only
	cfg = &config.Config{
		DataDir: t.TempDir(),
	}
	e, err = NewExecutor(cfg)
	if err != nil {
		t.Fatalf("NewExecutor(process) failed: %v", err)
	}
	if _, ok := e.(*compositeExecutor); !ok {
		t.Error("NewExecutor should return CompositeExecutor")
	}

	// 3. Nil config
	if _, err := NewExecutor(nil); err == nil {
		t.Error("NewExecutor should fail with nil config")
	}
}

func TestProcessExecutor_EnvInheritance(t *testing.T) {
	if _, err := exec.LookPath("sh"); err != nil {
		t.Skip("sh not found")
	}

	// 1. Setup Host Environment
	expectedHostVar := "HOST_TEST_VAR=host_value"
	os.Setenv("HOST_TEST_VAR", "host_value")
	defer os.Unsetenv("HOST_TEST_VAR")

	executor, _ := setupTestExecutor(t)
	pExecutor := executor.(*processExecutor)
	ctx := context.Background()

	// 2. Define Task with Custom Env
	task := &types.Task{
		Name: "env-test",
		Process: &api.Process{
			Command: []string{"env"},
			Env: []corev1.EnvVar{
				{Name: "TASK_TEST_VAR", Value: "task_value"},
			},
		},
	}
	expectedTaskVar := "TASK_TEST_VAR=task_value"

	taskDir, err := utils.SafeJoin(pExecutor.rootDir, task.Name)
	assert.Nil(t, err)
	os.MkdirAll(taskDir, 0755)

	// 3. Start Task
	if err := executor.Start(ctx, task); err != nil {
		t.Fatalf("Start failed: %v", err)
	}

	// 4. Wait for completion
	time.Sleep(200 * time.Millisecond)

	status, err := executor.Inspect(ctx, task)
	assert.Nil(t, err)
	assert.Equal(t, types.TaskStateSucceeded, status.State)

	// 5. Verify Output
	stdoutPath := filepath.Join(taskDir, StdoutFile)
	output, err := os.ReadFile(stdoutPath)
	assert.Nil(t, err)
	outputStr := string(output)

	assert.Contains(t, outputStr, expectedHostVar, "Should inherit host environment variables")
	assert.Contains(t, outputStr, expectedTaskVar, "Should include task-specific environment variables")
}

func TestProcessExecutor_TimeoutDetection(t *testing.T) {
	if _, err := exec.LookPath("sh"); err != nil {
		t.Skip("sh not found")
	}

	executor, _ := setupTestExecutor(t)
	pExecutor := executor.(*processExecutor)
	ctx := context.Background()

	timeoutSec := int64(2)
	task := &types.Task{
		Name: "timeout-task",
		Process: &api.Process{
			Command:        []string{"sleep", "30"},
			TimeoutSeconds: &timeoutSec,
		},
	}
	taskDir, err := utils.SafeJoin(pExecutor.rootDir, task.Name)
	assert.Nil(t, err)
	os.MkdirAll(taskDir, 0755)

	if err := executor.Start(ctx, task); err != nil {
		t.Fatalf("Start failed: %v", err)
	}

	// Wait for timeout to be detected (2 seconds + margin)
	time.Sleep(2500 * time.Millisecond)

	status, err := executor.Inspect(ctx, task)
	if err != nil {
		t.Fatalf("Inspect failed: %v", err)
	}

	// Should detect timeout
	assert.Equal(t, types.TaskStateTimeout, status.State, "Task should be in Timeout state")
	assert.NotEmpty(t, status.SubStatuses)
	assert.Equal(t, "TaskTimeout", status.SubStatuses[0].Reason)
	assert.Contains(t, status.SubStatuses[0].Message, "timeout of 2 seconds")

	// Cleanup
	executor.Stop(ctx, task)
}

func TestProcessExecutor_TimeoutNotExceeded(t *testing.T) {
	if _, err := exec.LookPath("sh"); err != nil {
		t.Skip("sh not found")
	}

	executor, _ := setupTestExecutor(t)
	ctx := context.Background()

	timeoutSec := int64(10)
	task := &types.Task{
		Name: "quick-task",
		Process: &api.Process{
			Command:        []string{"echo", "done"},
			TimeoutSeconds: &timeoutSec,
		},
	}
	taskDir, err := utils.SafeJoin(executor.(*processExecutor).rootDir, task.Name)
	assert.Nil(t, err)
	os.MkdirAll(taskDir, 0755)

	if err := executor.Start(ctx, task); err != nil {
		t.Fatalf("Start failed: %v", err)
	}

	// Wait for process to complete
	time.Sleep(200 * time.Millisecond)

	status, err := executor.Inspect(ctx, task)
	if err != nil {
		t.Fatalf("Inspect failed: %v", err)
	}

	// Should be Succeeded, not Timeout
	assert.Equal(t, types.TaskStateSucceeded, status.State, "Task should be Succeeded, not Timeout")
}

func TestProcessExecutor_NoTimeout(t *testing.T) {
	if _, err := exec.LookPath("sh"); err != nil {
		t.Skip("sh not found")
	}

	executor, _ := setupTestExecutor(t)
	pExecutor := executor.(*processExecutor)
	ctx := context.Background()

	// Task without timeout setting
	task := &types.Task{
		Name: "no-timeout-task",
		Process: &api.Process{
			Command: []string{"sleep", "1"},
		},
	}
	taskDir, err := utils.SafeJoin(pExecutor.rootDir, task.Name)
	assert.Nil(t, err)
	os.MkdirAll(taskDir, 0755)

	if err := executor.Start(ctx, task); err != nil {
		t.Fatalf("Start failed: %v", err)
	}

	// Inspect immediately
	status, err := executor.Inspect(ctx, task)
	if err != nil {
		t.Fatalf("Inspect failed: %v", err)
	}

	// Should be Running, not Timeout
	assert.Equal(t, types.TaskStateRunning, status.State, "Task should be Running when no timeout is set")

	// Cleanup
	executor.Stop(ctx, task)
}


================================================
FILE: kubernetes/internal/task-executor/server/handler.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package server

import (
	"encoding/json"
	"fmt"
	"net/http"
	"time"

	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/klog/v2"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/config"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/manager"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

// ErrorResponse represents a standard error response
type ErrorResponse struct {
	Code    string `json:"code"`
	Message string `json:"message"`
}

type Handler struct {
	manager manager.TaskManager
	config  *config.Config
}

func NewHandler(mgr manager.TaskManager, cfg *config.Config) *Handler {
	if mgr == nil {
		klog.Warning("TaskManager is nil, handler may not work properly")
	}
	if cfg == nil {
		klog.Warning("Config is nil, handler may not work properly")
	}
	return &Handler{
		manager: mgr,
		config:  cfg,
	}
}

func (h *Handler) CreateTask(w http.ResponseWriter, r *http.Request) {
	if h.manager == nil {
		writeError(w, http.StatusInternalServerError, "task manager not initialized")
		return
	}

	var apiTask api.Task
	if err := json.NewDecoder(r.Body).Decode(&apiTask); err != nil {
		writeError(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err))
		return
	}

	if apiTask.Name == "" {
		writeError(w, http.StatusBadRequest, "task name is required")
		return
	}

	task := h.convertAPIToInternalTask(&apiTask)
	if task == nil {
		writeError(w, http.StatusBadRequest, "failed to convert task")
		return
	}

	created, err := h.manager.Create(r.Context(), task)
	if err != nil {
		klog.ErrorS(err, "failed to create task", "name", apiTask.Name)
		writeError(w, http.StatusInternalServerError, fmt.Sprintf("failed to create task: %v", err))
		return
	}

	response := convertInternalToAPITask(created)

	w.Header().Set("Content-Type", "application/json")
	w.WriteHeader(http.StatusCreated)
	json.NewEncoder(w).Encode(response)

	klog.InfoS("task created via API", "name", apiTask.Name)
}

func (h *Handler) SyncTasks(w http.ResponseWriter, r *http.Request) {
	if h.manager == nil {
		writeError(w, http.StatusInternalServerError, "task manager not initialized")
		return
	}

	var apiTasks []api.Task
	if err := json.NewDecoder(r.Body).Decode(&apiTasks); err != nil {
		writeError(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err))
		return
	}

	desired := make([]*types.Task, 0, len(apiTasks))
	for i := range apiTasks {
		if apiTasks[i].Name == "" {
			continue
		}
		task := h.convertAPIToInternalTask(&apiTasks[i])
		if task != nil {
			desired = append(desired, task)
		}
	}

	current, err := h.manager.Sync(r.Context(), desired)
	if err != nil {
		klog.ErrorS(err, "failed to sync tasks")
		writeError(w, http.StatusInternalServerError, fmt.Sprintf("failed to sync tasks: %v", err))
		return
	}

	response := make([]api.Task, 0, len(current))
	for _, task := range current {
		if task != nil {
			response = append(response, *convertInternalToAPITask(task))
		}
	}

	w.Header().Set("Content-Type", "application/json")
	json.NewEncoder(w).Encode(response)

	klog.V(1).InfoS("tasks synced via API", "count", len(response))
}

func (h *Handler) GetTask(w http.ResponseWriter, r *http.Request) {
	if h.manager == nil {
		writeError(w, http.StatusInternalServerError, "task manager not initialized")
		return
	}

	// Extract task ID from path
	taskID := r.PathValue("id")
	if taskID == "" {
		writeError(w, http.StatusBadRequest, "task id is required")
		return
	}

	task, err := h.manager.Get(r.Context(), taskID)
	if err != nil {
		klog.ErrorS(err, "failed to get task", "id", taskID)
		writeError(w, http.StatusNotFound, fmt.Sprintf("task not found: %v", err))
		return
	}

	response := convertInternalToAPITask(task)

	w.Header().Set("Content-Type", "application/json")
	json.NewEncoder(w).Encode(response)
}

func (h *Handler) ListTasks(w http.ResponseWriter, r *http.Request) {
	if h.manager == nil {
		writeError(w, http.StatusInternalServerError, "task manager not initialized")
		return
	}

	tasks, err := h.manager.List(r.Context())
	if err != nil {
		klog.ErrorS(err, "failed to list tasks")
		writeError(w, http.StatusInternalServerError, fmt.Sprintf("failed to list tasks: %v", err))
		return
	}

	response := make([]api.Task, 0, len(tasks))
	for _, task := range tasks {
		if task != nil {
			response = append(response, *convertInternalToAPITask(task))
		}
	}

	w.Header().Set("Content-Type", "application/json")
	json.NewEncoder(w).Encode(response)
}

func (h *Handler) Health(w http.ResponseWriter, r *http.Request) {
	response := map[string]string{
		"status": "healthy",
	}
	w.Header().Set("Content-Type", "application/json")
	json.NewEncoder(w).Encode(response)
}

func (h *Handler) DeleteTask(w http.ResponseWriter, r *http.Request) {
	if h.manager == nil {
		writeError(w, http.StatusInternalServerError, "task manager not initialized")
		return
	}

	// Extract task ID from path
	taskID := r.PathValue("id")
	if taskID == "" {
		writeError(w, http.StatusBadRequest, "task id is required")
		return
	}

	err := h.manager.Delete(r.Context(), taskID)
	if err != nil {
		klog.ErrorS(err, "failed to delete task", "id", taskID)
		writeError(w, http.StatusInternalServerError, fmt.Sprintf("failed to delete task: %v", err))
		return
	}

	w.WriteHeader(http.StatusNoContent)
	klog.InfoS("task deleted via API", "id", taskID)
}

func writeError(w http.ResponseWriter, code int, message string) {
	w.Header().Set("Content-Type", "application/json")
	w.WriteHeader(code)
	json.NewEncoder(w).Encode(ErrorResponse{
		Code:    http.StatusText(code),
		Message: message,
	})
}

func (h *Handler) convertAPIToInternalTask(apiTask *api.Task) *types.Task {
	if apiTask == nil {
		return nil
	}
	task := &types.Task{
		Name:            apiTask.Name,
		Process:         apiTask.Process,
		PodTemplateSpec: apiTask.PodTemplateSpec,
	}
	task.Status = types.Status{
		State: types.TaskStatePending,
	}

	return task
}

func convertInternalToAPITask(task *types.Task) *api.Task {
	if task == nil {
		return nil
	}

	apiTask := &api.Task{
		Name:            task.Name,
		Process:         task.Process,
		PodTemplateSpec: task.PodTemplateSpec,
	}

	if task.Process != nil && len(task.Status.SubStatuses) > 0 {
		sub := task.Status.SubStatuses[0]
		apiStatus := &api.ProcessStatus{}

		if task.Status.State == types.TaskStateTimeout {
			term := &api.Terminated{
				ExitCode: 137,
				Reason:   sub.Reason,
				Message:  sub.Message,
			}
			if sub.StartedAt != nil {
				term.StartedAt = metav1.NewTime(*sub.StartedAt)
			}
			term.FinishedAt = metav1.Now()
			apiStatus.Terminated = term
		} else if sub.FinishedAt != nil {
			term := &api.Terminated{
				ExitCode: int32(sub.ExitCode),
				Reason:   sub.Reason,
				Message:  sub.Message,
			}
			term.FinishedAt = metav1.NewTime(*sub.FinishedAt)
			if sub.StartedAt != nil {
				term.StartedAt = metav1.NewTime(*sub.StartedAt)
			}
			apiStatus.Terminated = term
		} else if sub.StartedAt != nil {
			apiStatus.Running = &api.Running{
				StartedAt: metav1.NewTime(*sub.StartedAt),
			}
		} else {
			apiStatus.Waiting = &api.Waiting{
				Reason:  sub.Reason,
				Message: sub.Message,
			}
		}
		apiTask.ProcessStatus = apiStatus
	}

	if task.PodTemplateSpec != nil {
		podStatus := &corev1.PodStatus{
			Phase: corev1.PodUnknown,
		}

		switch task.Status.State {
		case types.TaskStatePending:
			podStatus.Phase = corev1.PodPending
		case types.TaskStateRunning:
			podStatus.Phase = corev1.PodRunning
		case types.TaskStateSucceeded:
			podStatus.Phase = corev1.PodSucceeded
		case types.TaskStateFailed:
			podStatus.Phase = corev1.PodFailed
		}

		for _, sub := range task.Status.SubStatuses {
			cs := corev1.ContainerStatus{
				Name: sub.Name,
			}
			if sub.FinishedAt != nil {
				cs.State.Terminated = &corev1.ContainerStateTerminated{
					ExitCode:   int32(sub.ExitCode),
					Reason:     sub.Reason,
					Message:    sub.Message,
					FinishedAt: metav1.NewTime(*sub.FinishedAt),
				}
				if sub.StartedAt != nil {
					cs.State.Terminated.StartedAt = metav1.NewTime(*sub.StartedAt)
				}
			} else if sub.StartedAt != nil {
				cs.State.Running = &corev1.ContainerStateRunning{
					StartedAt: metav1.NewTime(*sub.StartedAt),
				}
				cs.Ready = true
			} else {
				cs.State.Waiting = &corev1.ContainerStateWaiting{
					Reason:  sub.Reason,
					Message: sub.Message,
				}
			}
			podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, cs)
		}

		allReady := len(podStatus.ContainerStatuses) > 0
		for _, cs := range podStatus.ContainerStatuses {
			if !cs.Ready {
				allReady = false
				break
			}
		}
		readyStatus := corev1.ConditionFalse
		if allReady {
			readyStatus = corev1.ConditionTrue
		}

		var latestTransition time.Time
		for _, sub := range task.Status.SubStatuses {
			if sub.StartedAt != nil && sub.StartedAt.After(latestTransition) {
				latestTransition = *sub.StartedAt
			}
			if sub.FinishedAt != nil && sub.FinishedAt.After(latestTransition) {
				latestTransition = *sub.FinishedAt
			}
		}
		ltt := metav1.NewTime(latestTransition)
		if latestTransition.IsZero() {
			ltt = metav1.Now()
		}

		podStatus.Conditions = append(podStatus.Conditions,
			corev1.PodCondition{
				Type:               corev1.PodReady,
				Status:             readyStatus,
				LastTransitionTime: ltt,
			},
			corev1.PodCondition{
				Type:               corev1.ContainersReady,
				Status:             readyStatus,
				LastTransitionTime: ltt,
			},
		)

		apiTask.PodStatus = podStatus
	}

	return apiTask
}


================================================
FILE: kubernetes/internal/task-executor/server/handler_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package server

import (
	"bytes"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"net/http"
	"net/http/httptest"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
	corev1 "k8s.io/api/core/v1"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/config"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils"
	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

// MockTaskManager implements manager.TaskManager for testing
type MockTaskManager struct {
	tasks map[string]*types.Task
	err   error
}

func NewMockTaskManager() *MockTaskManager {
	return &MockTaskManager{
		tasks: make(map[string]*types.Task),
	}
}

func (m *MockTaskManager) Create(ctx context.Context, task *types.Task) (*types.Task, error) {
	if m.err != nil {
		return nil, m.err
	}
	m.tasks[task.Name] = task
	return task, nil
}

func (m *MockTaskManager) Sync(ctx context.Context, desired []*types.Task) ([]*types.Task, error) {
	if m.err != nil {
		return nil, m.err
	}
	m.tasks = make(map[string]*types.Task)
	var result []*types.Task
	for _, t := range desired {
		m.tasks[t.Name] = t
		result = append(result, t)
	}
	return result, nil
}

func (m *MockTaskManager) Get(ctx context.Context, id string) (*types.Task, error) {
	if m.err != nil {
		return nil, m.err
	}
	if t, ok := m.tasks[id]; ok {
		return t, nil
	}
	return nil, fmt.Errorf("not found")
}

func (m *MockTaskManager) List(ctx context.Context) ([]*types.Task, error) {
	if m.err != nil {
		return nil, m.err
	}
	var list []*types.Task
	for _, t := range m.tasks {
		list = append(list, t)
	}
	return list, nil
}

func (m *MockTaskManager) Delete(ctx context.Context, id string) error {
	if m.err != nil {
		return m.err
	}
	delete(m.tasks, id)
	return nil
}

func (m *MockTaskManager) Start(ctx context.Context) {}
func (m *MockTaskManager) Stop()                     {}

func TestHandler_Health(t *testing.T) {
	cfg := &config.Config{}
	h := NewHandler(NewMockTaskManager(), cfg)
	req := httptest.NewRequest("GET", "/health", nil)
	w := httptest.NewRecorder()

	h.Health(w, req)

	if w.Code != http.StatusOK {
		t.Errorf("Health returned status %d", w.Code)
	}
}

func TestHandler_CreateTask(t *testing.T) {
	mgr := NewMockTaskManager()
	cfg := &config.Config{}
	h := NewHandler(mgr, cfg)

	task := api.Task{
		Name: "test-task",
		Process: &api.Process{
			Command: []string{"echo"},
		},
	}
	body, _ := json.Marshal(task)

	req := httptest.NewRequest("POST", "/tasks", bytes.NewReader(body))
	w := httptest.NewRecorder()

	h.CreateTask(w, req)

	if w.Code != http.StatusCreated {
		t.Errorf("CreateTask returned status %d", w.Code)
	}

	if _, ok := mgr.tasks["test-task"]; !ok {
		t.Error("Task was not created in manager")
	}
}

func TestHandler_GetTask(t *testing.T) {
	mgr := NewMockTaskManager()
	mgr.tasks["test-task"] = &types.Task{Name: "test-task"}
	cfg := &config.Config{}
	h := NewHandler(mgr, cfg)

	router := NewRouter(h)
	req := httptest.NewRequest("GET", "/tasks/test-task", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)

	if w.Code != http.StatusOK {
		t.Errorf("GetTask returned status %d", w.Code)
	}

	var resp api.Task
	json.NewDecoder(w.Body).Decode(&resp)
	if resp.Name != "test-task" {
		t.Errorf("GetTask returned name %s", resp.Name)
	}
}

func TestHandler_DeleteTask(t *testing.T) {
	mgr := NewMockTaskManager()
	mgr.tasks["test-task"] = &types.Task{Name: "test-task"}
	cfg := &config.Config{}
	h := NewHandler(mgr, cfg)
	router := NewRouter(h)

	req := httptest.NewRequest("DELETE", "/tasks/test-task", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)

	if w.Code != http.StatusNoContent {
		t.Errorf("DeleteTask returned status %d", w.Code)
	}

	if _, ok := mgr.tasks["test-task"]; ok {
		t.Error("Task was not deleted from manager")
	}
}

func TestHandler_ListTasks(t *testing.T) {
	mgr := NewMockTaskManager()
	mgr.tasks["task-1"] = &types.Task{Name: "task-1"}
	mgr.tasks["task-2"] = &types.Task{Name: "task-2"}
	cfg := &config.Config{}
	h := NewHandler(mgr, cfg)

	req := httptest.NewRequest("GET", "/getTasks", nil)
	w := httptest.NewRecorder()

	h.ListTasks(w, req)

	if w.Code != http.StatusOK {
		t.Errorf("ListTasks returned status %d", w.Code)
	}

	var resp []api.Task
	json.NewDecoder(w.Body).Decode(&resp)
	if len(resp) != 2 {
		t.Errorf("ListTasks returned %d tasks, want 2", len(resp))
	}
}

func TestHandler_SyncTasks(t *testing.T) {
	mgr := NewMockTaskManager()
	cfg := &config.Config{}
	h := NewHandler(mgr, cfg)

	tasks := []api.Task{
		{Name: "task-1", Process: &api.Process{}},
	}
	body, _ := json.Marshal(tasks)

	req := httptest.NewRequest("POST", "/setTasks", bytes.NewReader(body))
	w := httptest.NewRecorder()

	h.SyncTasks(w, req)

	if w.Code != http.StatusOK {
		t.Errorf("SyncTasks returned status %d", w.Code)
	}

	if _, ok := mgr.tasks["task-1"]; !ok {
		t.Error("Task was not synced to manager")
	}
}

func TestHandler_Errors(t *testing.T) {
	mgr := NewMockTaskManager()
	mgr.err = errors.New("mock error")
	cfg := &config.Config{}
	h := NewHandler(mgr, cfg)

	// Create fail
	task := api.Task{Name: "fail"}
	body, _ := json.Marshal(task)
	req := httptest.NewRequest("POST", "/tasks", bytes.NewReader(body))
	w := httptest.NewRecorder()
	h.CreateTask(w, req)
	if w.Code != http.StatusInternalServerError {
		t.Errorf("CreateTask should fail with 500, got %d", w.Code)
	}
}

func TestConvertInternalToAPITask(t *testing.T) {
	now := time.Now()

	t.Run("Process Task", func(t *testing.T) {
		task := &types.Task{
			Name:    "proc-task",
			Process: &api.Process{Command: []string{"ls"}},
			Status: types.Status{
				State: types.TaskStateSucceeded,
				SubStatuses: []types.SubStatus{
					{
						ExitCode:   0,
						Reason:     "Completed",
						FinishedAt: &now,
					},
				},
			},
		}

		apiTask := convertInternalToAPITask(task)
		assert.NotNil(t, apiTask.ProcessStatus)
		assert.NotNil(t, apiTask.ProcessStatus.Terminated)
		assert.Equal(t, int32(0), apiTask.ProcessStatus.Terminated.ExitCode)
		assert.Nil(t, apiTask.PodStatus)
	})

	t.Run("Pod Task - Partially Ready", func(t *testing.T) {
		task := &types.Task{
			Name:            "pod-task-partial",
			PodTemplateSpec: &corev1.PodTemplateSpec{},
			Status: types.Status{
				State: types.TaskStateRunning,
				SubStatuses: []types.SubStatus{
					{
						Name:      "c1",
						StartedAt: &now,
					},
					{
						Name:   "c2",
						Reason: "Pending",
					},
				},
			},
		}

		apiTask := convertInternalToAPITask(task)
		assert.NotNil(t, apiTask.PodStatus)
		assert.Equal(t, corev1.PodRunning, apiTask.PodStatus.Phase)
		assert.Len(t, apiTask.PodStatus.ContainerStatuses, 2)
		assert.True(t, apiTask.PodStatus.ContainerStatuses[0].Ready)
		assert.False(t, apiTask.PodStatus.ContainerStatuses[1].Ready)
		assert.False(t, utils.IsPodReadyConditionTrue(*apiTask.PodStatus))

		// Conditions check
		var podReady, containersReady *corev1.PodCondition
		for i := range apiTask.PodStatus.Conditions {
			c := &apiTask.PodStatus.Conditions[i]
			if c.Type == corev1.PodReady {
				podReady = c
			} else if c.Type == corev1.ContainersReady {
				containersReady = c
			}
		}
		assert.NotNil(t, podReady)
		assert.Equal(t, corev1.ConditionFalse, podReady.Status)
		assert.NotNil(t, containersReady)
		assert.Equal(t, corev1.ConditionFalse, containersReady.Status)
		assert.Equal(t, now.Unix(), podReady.LastTransitionTime.Unix())
	})

	t.Run("Pod Task - Fully Ready", func(t *testing.T) {
		later := now.Add(time.Minute)
		task := &types.Task{
			Name:            "pod-task-ready",
			PodTemplateSpec: &corev1.PodTemplateSpec{},
			Status: types.Status{
				State: types.TaskStateRunning,
				SubStatuses: []types.SubStatus{
					{
						Name:      "c1",
						StartedAt: &now,
					},
					{
						Name:      "c2",
						StartedAt: &later,
					},
				},
			},
		}

		apiTask := convertInternalToAPITask(task)
		assert.NotNil(t, apiTask.PodStatus)

		// Conditions check
		var podReady, containersReady *corev1.PodCondition
		for i := range apiTask.PodStatus.Conditions {
			c := &apiTask.PodStatus.Conditions[i]
			if c.Type == corev1.PodReady {
				podReady = c
			} else if c.Type == corev1.ContainersReady {
				containersReady = c
			}
		}
		assert.NotNil(t, podReady)
		assert.Equal(t, corev1.ConditionTrue, podReady.Status)
		assert.NotNil(t, containersReady)
		assert.Equal(t, corev1.ConditionTrue, containersReady.Status)
		// Should use the latest timestamp (later)
		assert.Equal(t, later.Unix(), podReady.LastTransitionTime.Unix())
		assert.True(t, utils.IsPodReadyConditionTrue(*apiTask.PodStatus))
	})
}

func TestConvertInternalToAPITask_Timeout(t *testing.T) {
	now := time.Now()
	timeoutSec := int64(60)

	t.Run("Process Task Timeout", func(t *testing.T) {
		task := &types.Task{
			Name: "timeout-task",
			Process: &api.Process{
				Command:        []string{"sleep", "100"},
				TimeoutSeconds: &timeoutSec,
			},
			Status: types.Status{
				State: types.TaskStateTimeout,
				SubStatuses: []types.SubStatus{
					{
						Reason:     "TaskTimeout",
						Message:    "Task exceeded timeout of 60 seconds",
						StartedAt:  &now,
						FinishedAt: nil, // Not finished yet
					},
				},
			},
		}

		apiTask := convertInternalToAPITask(task)

		// Should map to Terminated with exit code 137
		assert.NotNil(t, apiTask.ProcessStatus)
		assert.NotNil(t, apiTask.ProcessStatus.Terminated)
		assert.Nil(t, apiTask.ProcessStatus.Running)
		assert.Nil(t, apiTask.ProcessStatus.Waiting)
		assert.Equal(t, int32(137), apiTask.ProcessStatus.Terminated.ExitCode)
		assert.Equal(t, "TaskTimeout", apiTask.ProcessStatus.Terminated.Reason)
		assert.Equal(t, "Task exceeded timeout of 60 seconds", apiTask.ProcessStatus.Terminated.Message)
		assert.Equal(t, now.Unix(), apiTask.ProcessStatus.Terminated.StartedAt.Unix())
		// FinishedAt should be set to "now" for timeout
		assert.False(t, apiTask.ProcessStatus.Terminated.FinishedAt.IsZero())
		assert.Nil(t, apiTask.PodStatus)
	})

	t.Run("Timeout After Completion", func(t *testing.T) {
		later := now.Add(2 * time.Minute)
		task := &types.Task{
			Name:    "completed-task",
			Process: &api.Process{Command: []string{"ls"}},
			Status: types.Status{
				State: types.TaskStateFailed, // After stop, it becomes Failed
				SubStatuses: []types.SubStatus{
					{
						ExitCode:   137,
						Reason:     "Killed",
						StartedAt:  &now,
						FinishedAt: &later,
					},
				},
			},
		}

		apiTask := convertInternalToAPITask(task)

		// Should be Terminated with actual exit code
		assert.NotNil(t, apiTask.ProcessStatus.Terminated)
		assert.Equal(t, int32(137), apiTask.ProcessStatus.Terminated.ExitCode)
		assert.Equal(t, now.Unix(), apiTask.ProcessStatus.Terminated.StartedAt.Unix())
		assert.Equal(t, later.Unix(), apiTask.ProcessStatus.Terminated.FinishedAt.Unix())
	})
}


================================================
FILE: kubernetes/internal/task-executor/server/router.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package server

import (
	"net/http"
)

func NewRouter(h *Handler) http.Handler {
	mux := http.NewServeMux()

	mux.HandleFunc("POST /setTasks", h.SyncTasks)
	mux.HandleFunc("GET /getTasks", h.ListTasks)
	mux.HandleFunc("POST /tasks", h.CreateTask)
	mux.HandleFunc("GET /tasks/{id}", h.GetTask)
	mux.HandleFunc("DELETE /tasks/{id}", h.DeleteTask)
	mux.HandleFunc("GET /health", h.Health)

	return mux
}


================================================
FILE: kubernetes/internal/task-executor/storage/file_store.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package store

import (
	"context"
	"encoding/json"
	"fmt"
	"os"
	"path/filepath"
	"sync"

	"k8s.io/klog/v2"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/utils"
)

type fileStore struct {
	dataDir string
	locks   sync.Map // key: taskName, value: *sync.RWMutex
}

func NewFileStore(dataDir string) (TaskStore, error) {
	if dataDir == "" {
		return nil, fmt.Errorf("dataDir cannot be empty")
	}

	if err := os.MkdirAll(dataDir, 0755); err != nil {
		return nil, fmt.Errorf("failed to create data directory %s: %w", dataDir, err)
	}

	testFile := filepath.Join(dataDir, ".test")
	if err := os.WriteFile(testFile, []byte("test"), 0644); err != nil {
		return nil, fmt.Errorf("data directory %s is not writable: %w", dataDir, err)
	}
	os.Remove(testFile)

	klog.InfoS("initialized file store", "dataDir", dataDir)

	return &fileStore{
		dataDir: dataDir,
	}, nil
}

func (s *fileStore) getTaskLock(name string) *sync.RWMutex {
	val, _ := s.locks.LoadOrStore(name, &sync.RWMutex{})
	return val.(*sync.RWMutex)
}

func (s *fileStore) Create(ctx context.Context, task *types.Task) error {
	if task == nil {
		return fmt.Errorf("task cannot be nil")
	}
	if task.Name == "" {
		return fmt.Errorf("task name cannot be empty")
	}

	mu := s.getTaskLock(task.Name)
	mu.Lock()
	defer mu.Unlock()

	taskDir, err := utils.SafeJoin(s.dataDir, task.Name)
	if err != nil {
		return fmt.Errorf("invalid task name: %w", err)
	}

	if _, err := os.Stat(taskDir); err == nil {
		return fmt.Errorf("task %s already exists", task.Name)
	}

	if err := os.MkdirAll(taskDir, 0755); err != nil {
		return fmt.Errorf("failed to create task directory: %w", err)
	}

	if err := s.writeTaskFile(taskDir, task); err != nil {
		os.RemoveAll(taskDir)
		return err
	}

	klog.InfoS("created task", "name", task.Name, "dir", taskDir)
	return nil
}

func (s *fileStore) Update(ctx context.Context, task *types.Task) error {
	if task == nil {
		return fmt.Errorf("task cannot be nil")
	}
	if task.Name == "" {
		return fmt.Errorf("task name cannot be empty")
	}

	mu := s.getTaskLock(task.Name)
	mu.Lock()
	defer mu.Unlock()

	taskDir, err := utils.SafeJoin(s.dataDir, task.Name)
	if err != nil {
		return fmt.Errorf("invalid task name: %w", err)
	}

	// Check if task exists
	if _, err := os.Stat(taskDir); os.IsNotExist(err) {
		return fmt.Errorf("task %s does not exist", task.Name)
	}

	if err := s.writeTaskFile(taskDir, task); err != nil {
		return err
	}

	klog.V(2).InfoS("updated task", "name", task.Name, "state", task.Status.State)
	return nil
}

func (s *fileStore) Get(ctx context.Context, name string) (*types.Task, error) {
	if name == "" {
		return nil, fmt.Errorf("task name cannot be empty")
	}

	mu := s.getTaskLock(name)
	mu.RLock()
	defer mu.RUnlock()

	taskDir, err := utils.SafeJoin(s.dataDir, name)
	if err != nil {
		return nil, fmt.Errorf("invalid task name: %w", err)
	}

	// Check if task exists
	if _, err := os.Stat(taskDir); os.IsNotExist(err) {
		return nil, fmt.Errorf("task %s not found", name)
	}

	return s.readTaskFile(taskDir, name)
}

func (s *fileStore) List(ctx context.Context) ([]*types.Task, error) {
	entries, err := os.ReadDir(s.dataDir)
	if err != nil {
		return nil, fmt.Errorf("failed to read data directory: %w", err)
	}

	tasks := make([]*types.Task, 0, len(entries))
	for _, entry := range entries {
		if !entry.IsDir() {
			continue
		}

		taskName := entry.Name()
		taskDir, err := utils.SafeJoin(s.dataDir, taskName)
		if err != nil {
			klog.ErrorS(err, "invalid task directory, skipping", "name", taskName)
			continue
		}

		mu := s.getTaskLock(taskName)
		mu.RLock()
		task, err := s.readTaskFile(taskDir, taskName)
		mu.RUnlock()

		if err != nil {
			klog.ErrorS(err, "failed to read task, skipping", "name", taskName)
			continue
		}

		tasks = append(tasks, task)
	}

	return tasks, nil
}

func (s *fileStore) Delete(ctx context.Context, name string) error {
	if name == "" {
		return fmt.Errorf("task name cannot be empty")
	}

	mu := s.getTaskLock(name)
	mu.Lock()
	defer mu.Unlock()

	taskDir, err := utils.SafeJoin(s.dataDir, name)
	if err != nil {
		return fmt.Errorf("invalid task name: %w", err)
	}

	// Check if task exists
	if _, err := os.Stat(taskDir); os.IsNotExist(err) {
		klog.InfoS("task already deleted", "name", name)
		return nil
	}

	if err := os.RemoveAll(taskDir); err != nil {
		return fmt.Errorf("failed to delete task %s: %w", name, err)
	}

	klog.InfoS("deleted task", "name", name)
	return nil
}

func (s *fileStore) getTaskFilePath(taskDir string) string {
	return filepath.Join(taskDir, "task.json")
}

// writeTaskFile writes task data to disk atomically
func (s *fileStore) writeTaskFile(taskDir string, task *types.Task) error {
	data, err := json.MarshalIndent(task, "", "  ")
	if err != nil {
		return fmt.Errorf("failed to marshal task: %w", err)
	}

	taskFile := s.getTaskFilePath(taskDir)
	tmpFile := taskFile + ".tmp"

	if err := os.WriteFile(tmpFile, data, 0644); err != nil {
		return fmt.Errorf("failed to write temp file: %w", err)
	}

	f, err := os.Open(tmpFile)
	if err != nil {
		os.Remove(tmpFile)
		return fmt.Errorf("failed to open temp file for sync: %w", err)
	}
	if err := f.Sync(); err != nil {
		f.Close()
		os.Remove(tmpFile)
		return fmt.Errorf("failed to sync temp file: %w", err)
	}
	f.Close()

	if err := os.Rename(tmpFile, taskFile); err != nil {
		os.Remove(tmpFile)
		return fmt.Errorf("failed to rename temp file: %w", err)
	}

	return nil
}

func (s *fileStore) readTaskFile(taskDir, taskName string) (*types.Task, error) {
	taskFile := s.getTaskFilePath(taskDir)

	data, err := os.ReadFile(taskFile)
	if err != nil {
		return nil, fmt.Errorf("failed to read task file: %w", err)
	}

	var task types.Task
	if err := json.Unmarshal(data, &task); err != nil {
		return nil, fmt.Errorf("failed to unmarshal task file: %w", err)
	}

	return &task, nil
}


================================================
FILE: kubernetes/internal/task-executor/storage/file_store_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package store

import (
	"context"
	"os"
	"path/filepath"
	"testing"
	"time"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

func TestNewFileStore(t *testing.T) {
	// Test case 1: Valid directory
	tmpDir := t.TempDir()
	store, err := NewFileStore(tmpDir)
	if err != nil {
		t.Fatalf("NewFileStore failed: %v", err)
	}
	if store == nil {
		t.Fatal("NewFileStore returned nil store")
	}

	// Test case 2: Empty directory
	_, err = NewFileStore("")
	if err == nil {
		t.Fatal("NewFileStore should fail with empty dir")
	}
}

func TestFileStore_CRUD(t *testing.T) {
	tmpDir := t.TempDir()
	store, err := NewFileStore(tmpDir)
	if err != nil {
		t.Fatalf("Failed to create store: %v", err)
	}

	ctx := context.Background()
	task := &types.Task{
		Name: "test-task",
		Process: &api.Process{
			Command: []string{"echo", "hello"},
		},
	}

	// 1. Create
	if err := store.Create(ctx, task); err != nil {
		t.Fatalf("Create failed: %v", err)
	}

	// Verify file exists
	taskDir := filepath.Join(tmpDir, task.Name)
	if _, err := os.Stat(taskDir); os.IsNotExist(err) {
		t.Error("Task directory was not created")
	}

	// 2. Get
	got, err := store.Get(ctx, task.Name)
	if err != nil {
		t.Fatalf("Get failed: %v", err)
	}
	if got.Name != task.Name {
		t.Errorf("Get returned wrong name: got %s, want %s", got.Name, task.Name)
	}

	// 3. Update
	now := time.Now()
	got.DeletionTimestamp = &now

	if err := store.Update(ctx, got); err != nil {
		t.Fatalf("Update failed: %v", err)
	}

	updated, err := store.Get(ctx, task.Name)
	if err != nil {
		t.Fatalf("Get after update failed: %v", err)
	}
	if updated.DeletionTimestamp == nil {
		t.Error("Update failed to persist DeletionTimestamp")
	}

	// 4. List
	tasks, err := store.List(ctx)
	if err != nil {
		t.Fatalf("List failed: %v", err)
	}
	if len(tasks) != 1 {
		t.Errorf("List returned %d tasks, want 1", len(tasks))
	}
	if tasks[0].Name != task.Name {
		t.Errorf("List returned wrong task: %s", tasks[0].Name)
	}

	// 5. Delete
	if err := store.Delete(ctx, task.Name); err != nil {
		t.Fatalf("Delete failed: %v", err)
	}

	// Verify deletion
	if _, err := store.Get(ctx, task.Name); err == nil {
		t.Error("Get should fail after delete")
	}

	tasks, err = store.List(ctx)
	if err != nil {
		t.Fatalf("List failed: %v", err)
	}
	if len(tasks) != 0 {
		t.Errorf("List returned %d tasks after delete, want 0", len(tasks))
	}

	// Verify directory gone
	if _, err := os.Stat(taskDir); !os.IsNotExist(err) {
		t.Error("Task directory still exists after delete")
	}
}

func TestFileStore_EdgeCases(t *testing.T) {
	tmpDir := t.TempDir()
	store, _ := NewFileStore(tmpDir)
	ctx := context.Background()

	// Create with nil task
	if err := store.Create(ctx, nil); err == nil {
		t.Error("Create should fail with nil task")
	}

	// Create with empty name
	if err := store.Create(ctx, &types.Task{}); err == nil {
		t.Error("Create should fail with empty name")
	}

	// Create duplicate
	task := &types.Task{Name: "dup"}
	store.Create(ctx, task)
	if err := store.Create(ctx, task); err == nil {
		t.Error("Create should fail for duplicate task")
	}

	// Update non-existent
	if err := store.Update(ctx, &types.Task{Name: "missing"}); err == nil {
		t.Error("Update should fail for non-existent task")
	}

	// Get non-existent
	if _, err := store.Get(ctx, "missing"); err == nil {
		t.Error("Get should fail for non-existent task")
	}

	// Delete non-existent
	if err := store.Delete(ctx, "missing"); err != nil {
		t.Errorf("Delete should not fail for non-existent task, got %v", err)
	}
}

func TestFileStore_CorruptedData(t *testing.T) {
	tmpDir := t.TempDir()
	store, _ := NewFileStore(tmpDir)
	ctx := context.Background()

	// Manually create a corrupted task file
	taskDir := filepath.Join(tmpDir, "corrupted")
	os.MkdirAll(taskDir, 0755)
	os.WriteFile(filepath.Join(taskDir, "task.json"), []byte("{invalid-json"), 0644)

	// List should skip corrupted task
	tasks, err := store.List(ctx)
	if err != nil {
		t.Fatalf("List failed: %v", err)
	}
	if len(tasks) != 0 {
		t.Errorf("List should skip corrupted task, got %d", len(tasks))
	}

	// Get should fail for corrupted task
	if _, err := store.Get(ctx, "corrupted"); err == nil {
		t.Error("Get should fail for corrupted task")
	}
}

// TestConcurrency verifies thread safety
func TestFileStore_Concurrency(t *testing.T) {
	tmpDir := t.TempDir()
	store, _ := NewFileStore(tmpDir)
	ctx := context.Background()
	taskName := "concurrent-task"

	store.Create(ctx, &types.Task{Name: taskName})

	done := make(chan bool)
	for i := 0; i < 10; i++ {
		go func(id int) {
			store.Update(ctx, &types.Task{
				Name: taskName,
				Process: &api.Process{
					Args: []string{time.Now().String()},
				},
			})
			store.Get(ctx, taskName)
			done <- true
		}(i)
	}

	for i := 0; i < 10; i++ {
		<-done
	}
}


================================================
FILE: kubernetes/internal/task-executor/storage/interface.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package store

import (
	"context"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/internal/task-executor/types"
)

// TaskStore defines the contract for persisting task state.
type TaskStore interface {
	Create(ctx context.Context, task *types.Task) error

	Update(ctx context.Context, task *types.Task) error

	Get(ctx context.Context, name string) (*types.Task, error)

	List(ctx context.Context) ([]*types.Task, error)

	Delete(ctx context.Context, name string) error
}


================================================
FILE: kubernetes/internal/task-executor/types/task.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package types

import (
	"time"

	corev1 "k8s.io/api/core/v1"

	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

// TaskState defines the simplified internal state of a task.
type TaskState string

const (
	TaskStatePending   TaskState = "Pending"
	TaskStateRunning   TaskState = "Running"
	TaskStateSucceeded TaskState = "Succeeded"
	TaskStateFailed    TaskState = "Failed"
	TaskStateUnknown   TaskState = "Unknown"
	TaskStateNotFound  TaskState = "NotFound"
	TaskStateTimeout   TaskState = "Timeout"
)

// Status represents the internal status of a task.
// This is decoupled from the Kubernetes API status.
type Status struct {
	State       TaskState   `json:"state"`
	SubStatuses []SubStatus `json:"subStatuses,omitempty"`
}

type SubStatus struct {
	Name       string     `json:"name,omitempty"` // for process it's empty, for PodTemplateSpec is container name
	Reason     string     `json:"reason,omitempty"`
	Message    string     `json:"message,omitempty"`
	ExitCode   int        `json:"exitCode,omitempty"`
	StartedAt  *time.Time `json:"startedAt,omitempty"`
	FinishedAt *time.Time `json:"finishedAt,omitempty"`
}

type Task struct {
	Name              string     `json:"name"`
	DeletionTimestamp *time.Time `json:"deletionTimestamp,omitempty"`

	Process         *api.Process            `json:"process"`
	PodTemplateSpec *corev1.PodTemplateSpec `json:"podTemplateSpec"`

	// Status is now a first-class citizen and persisted.
	Status Status `json:"status"`
}


================================================
FILE: kubernetes/internal/task-executor/utils/pathutil.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//	http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import (
	"fmt"
	"os"
	"path/filepath"
)

func SafeJoin(baseDir, userPath string) (string, error) {
	joinedPath := filepath.Join(baseDir, userPath)

	absBaseDir, err := filepath.Abs(baseDir)
	if err != nil {
		return "", fmt.Errorf("failed to resolve base directory absolute path: %w", err)
	}
	absJoinedPath, err := filepath.Abs(joinedPath)
	if err != nil {
		return "", fmt.Errorf("failed to resolve joined path absolute path: %w", err)
	}

	if !isSubPath(absBaseDir, absJoinedPath) {
		return "", fmt.Errorf("path traversal detected")
	}

	return absJoinedPath, nil
}

func isSubPath(parent, child string) bool {
	if len(parent) == 0 {
		return false
	}

	parentWithSep := parent
	if !os.IsPathSeparator(parent[len(parent)-1]) {
		parentWithSep = parent + string(filepath.Separator)
	}

	return child == parent || (len(child) > len(parentWithSep) && child[:len(parentWithSep)] == parentWithSep)
}


================================================
FILE: kubernetes/internal/task-executor/utils/pathutil_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import (
	"os"
	"path/filepath"
	"testing"
)

func TestSafeJoin(t *testing.T) {
	tempDir, err := os.MkdirTemp("", "safejoin-test")
	if err != nil {
		t.Fatalf("failed to create temp dir: %v", err)
	}
	defer os.RemoveAll(tempDir)

	tests := []struct {
		name     string
		baseDir  string
		userPath string
		wantErr  bool
	}{
		{
			name:     "valid path",
			baseDir:  tempDir,
			userPath: "foo",
			wantErr:  false,
		},
		{
			name:     "valid nested path",
			baseDir:  tempDir,
			userPath: "foo/bar",
			wantErr:  false,
		},
		{
			name:     "path traversal attempt",
			baseDir:  tempDir,
			userPath: "../foo",
			wantErr:  true,
		},
		{
			name:     "path traversal to root (treated as relative)",
			baseDir:  tempDir,
			userPath: "/etc/passwd",
			wantErr:  false,
		},
		{
			name:     "complex traversal",
			baseDir:  tempDir,
			userPath: "foo/../../bar",
			wantErr:  true,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got, err := SafeJoin(tt.baseDir, tt.userPath)
			if (err != nil) != tt.wantErr {
				t.Errorf("SafeJoin() error = %v, wantErr %v", err, tt.wantErr)
				return
			}
			if !tt.wantErr {
				expected := filepath.Join(tt.baseDir, tt.userPath)
				absExpected, _ := filepath.Abs(expected)
				if got != absExpected {
					t.Errorf("SafeJoin() = %v, want %v", got, absExpected)
				}
			}
		})
	}
}


================================================
FILE: kubernetes/internal/utils/controller/util.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/types"
)

// GetControllerKey return key of CloneSet.
func GetControllerKey(obj metav1.Object) string {
	return types.NamespacedName{Namespace: obj.GetNamespace(), Name: obj.GetName()}.String()
}


================================================
FILE: kubernetes/internal/utils/expectations/init.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package expectations

import (
	"flag"
	"time"
)

func init() {
	flag.DurationVar(&ExpectationTimeout, "expectation-timeout", time.Minute*5, "The expectation timeout. Defaults 5min")
}

var ExpectationTimeout time.Duration


================================================
FILE: kubernetes/internal/utils/expectations/resource_version_expectation.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package expectations

import (
	"strconv"
	"sync"
	"time"

	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/types"
)

type ResourceVersionExpectation interface {
	Expect(obj metav1.Object)
	Observe(obj metav1.Object)
	IsSatisfied(obj metav1.Object) (bool, time.Duration)
	Delete(obj metav1.Object)
}

func NewResourceVersionExpectation() ResourceVersionExpectation {
	return &realResourceVersionExpectation{objectVersions: make(map[types.UID]*objectCacheVersions, 100)}
}

type realResourceVersionExpectation struct {
	sync.Mutex
	objectVersions map[types.UID]*objectCacheVersions
}

type objectCacheVersions struct {
	version                   string
	firstUnsatisfiedTimestamp time.Time
}

func (r *realResourceVersionExpectation) Expect(obj metav1.Object) {
	r.Lock()
	defer r.Unlock()

	expectations := r.objectVersions[obj.GetUID()]
	if expectations == nil {
		r.objectVersions[obj.GetUID()] = &objectCacheVersions{}
	}
	if isResourceVersionNewer(r.objectVersions[obj.GetUID()].version, obj.GetResourceVersion()) {
		r.objectVersions[obj.GetUID()].version = obj.GetResourceVersion()
	}
}

func (r *realResourceVersionExpectation) Observe(obj metav1.Object) {
	r.Lock()
	defer r.Unlock()

	expectations := r.objectVersions[obj.GetUID()]
	if expectations == nil {
		return
	}
	if isResourceVersionNewer(r.objectVersions[obj.GetUID()].version, obj.GetResourceVersion()) {
		delete(r.objectVersions, obj.GetUID())
	}
}

func (r *realResourceVersionExpectation) IsSatisfied(obj metav1.Object) (bool, time.Duration) {
	r.Lock()
	defer r.Unlock()

	expectations := r.objectVersions[obj.GetUID()]
	if expectations == nil {
		return true, 0
	}

	if isResourceVersionNewer(r.objectVersions[obj.GetUID()].version, obj.GetResourceVersion()) {
		delete(r.objectVersions, obj.GetUID())
	}
	_, existing := r.objectVersions[obj.GetUID()]
	if existing {
		if r.objectVersions[obj.GetUID()].firstUnsatisfiedTimestamp.IsZero() {
			r.objectVersions[obj.GetUID()].firstUnsatisfiedTimestamp = time.Now()
		}

		return false, time.Since(r.objectVersions[obj.GetUID()].firstUnsatisfiedTimestamp)
	}

	return !existing, 0
}

func (r *realResourceVersionExpectation) Delete(obj metav1.Object) {
	r.Lock()
	defer r.Unlock()
	delete(r.objectVersions, obj.GetUID())
}

func isResourceVersionNewer(old, new string) bool {
	if len(old) == 0 {
		return true
	}

	oldCount, err := strconv.ParseUint(old, 10, 64)
	if err != nil {
		return true
	}

	newCount, err := strconv.ParseUint(new, 10, 64)
	if err != nil {
		return false
	}

	return newCount >= oldCount
}


================================================
FILE: kubernetes/internal/utils/expectations/resource_version_expectation_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package expectations

import (
	"testing"

	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func TestResourceVersionExpectation(t *testing.T) {
	cases := []struct {
		expect      *v1.Pod
		observe     *v1.Pod
		isSatisfied *v1.Pod
		result      bool
	}{
		{
			expect:      &v1.Pod{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "2"}},
			observe:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "1"}},
			isSatisfied: &v1.Pod{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "1"}},
			result:      false,
		},
		{
			expect:      &v1.Pod{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "2"}},
			observe:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "2"}},
			isSatisfied: &v1.Pod{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "2"}},
			result:      true,
		},
		{
			expect:      &v1.Pod{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "2"}},
			observe:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "1"}},
			isSatisfied: &v1.Pod{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "2"}},
			result:      true,
		},
		{
			expect:      &v1.Pod{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "2"}},
			observe:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "2"}},
			isSatisfied: &v1.Pod{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "3"}},
			result:      true,
		},
	}

	for i, testCase := range cases {
		c := NewResourceVersionExpectation()
		c.Expect(testCase.expect)
		c.Observe(testCase.observe)
		got, _ := c.IsSatisfied(testCase.isSatisfied)
		if got != testCase.result {
			t.Fatalf("#%d expected %v, got %v", i, testCase.result, got)
		}
	}
}


================================================
FILE: kubernetes/internal/utils/expectations/scale_expectations.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package expectations

import (
	"sync"
	"time"

	"k8s.io/apimachinery/pkg/util/sets"
)

// ScaleAction is the action of scale, like create and delete.
type ScaleAction string

const (
	// Create action
	Create ScaleAction = "create"
	// Delete action
	Delete ScaleAction = "delete"
)

// ScaleExpectations is an interface that allows users to set and wait on expectations of pods scale.
type ScaleExpectations interface {
	ExpectScale(controllerKey string, action ScaleAction, name string)
	ObserveScale(controllerKey string, action ScaleAction, name string)
	SatisfiedExpectations(controllerKey string) (bool, time.Duration, map[ScaleAction][]string)
	DeleteExpectations(controllerKey string)
	GetExpectations(controllerKey string) map[ScaleAction]sets.String
}

// NewScaleExpectations returns a common ScaleExpectations.
func NewScaleExpectations() ScaleExpectations {
	return &realScaleExpectations{
		controllerCache: make(map[string]*realControllerScaleExpectations),
	}
}

type realScaleExpectations struct {
	sync.Mutex
	// key: parent key, workload namespace/name
	controllerCache map[string]*realControllerScaleExpectations
}

type realControllerScaleExpectations struct {
	// item: name for this object
	objsCache                 map[ScaleAction]sets.String
	firstUnsatisfiedTimestamp time.Time
}

func (r *realScaleExpectations) GetExpectations(controllerKey string) map[ScaleAction]sets.String {
	r.Lock()
	defer r.Unlock()

	expectations := r.controllerCache[controllerKey]
	if expectations == nil {
		return nil
	}

	res := make(map[ScaleAction]sets.String, len(expectations.objsCache))
	for k, v := range expectations.objsCache {
		res[k] = sets.NewString(v.List()...)
	}

	return res
}

func (r *realScaleExpectations) ExpectScale(controllerKey string, action ScaleAction, name string) {
	r.Lock()
	defer r.Unlock()

	expectations := r.controllerCache[controllerKey]
	if expectations == nil {
		expectations = &realControllerScaleExpectations{
			objsCache: make(map[ScaleAction]sets.String),
		}
		r.controllerCache[controllerKey] = expectations
	}

	if s := expectations.objsCache[action]; s != nil {
		s.Insert(name)
	} else {
		expectations.objsCache[action] = sets.NewString(name)
	}
}

func (r *realScaleExpectations) ObserveScale(controllerKey string, action ScaleAction, name string) {
	r.Lock()
	defer r.Unlock()

	expectations := r.controllerCache[controllerKey]
	if expectations == nil {
		return
	}

	s := expectations.objsCache[action]
	if s == nil {
		return
	}
	s.Delete(name)

	for _, s := range expectations.objsCache {
		if s.Len() > 0 {
			return
		}
	}
	delete(r.controllerCache, controllerKey)
}

func (r *realScaleExpectations) SatisfiedExpectations(controllerKey string) (bool, time.Duration, map[ScaleAction][]string) {
	r.Lock()
	defer r.Unlock()

	expectations := r.controllerCache[controllerKey]
	if expectations == nil {
		return true, 0, nil
	}

	for a, s := range expectations.objsCache {
		if s.Len() > 0 {
			if expectations.firstUnsatisfiedTimestamp.IsZero() {
				expectations.firstUnsatisfiedTimestamp = time.Now()
			}
			return false, time.Since(expectations.firstUnsatisfiedTimestamp), map[ScaleAction][]string{a: s.List()}
		}
	}

	delete(r.controllerCache, controllerKey)
	return true, 0, nil
}

func (r *realScaleExpectations) DeleteExpectations(controllerKey string) {
	r.Lock()
	defer r.Unlock()
	delete(r.controllerCache, controllerKey)
}


================================================
FILE: kubernetes/internal/utils/expectations/scale_expectations_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package expectations

import (
	"testing"
)

func TestScale(t *testing.T) {
	e := NewScaleExpectations()
	controllerKey01 := "default/cs01"
	controllerKey02 := "default/cs02"
	pod01 := "pod01"
	pod02 := "pod02"

	e.ExpectScale(controllerKey01, Create, pod01)
	e.ExpectScale(controllerKey01, Create, pod02)
	e.ExpectScale(controllerKey01, Delete, pod01)
	if ok, _, _ := e.SatisfiedExpectations(controllerKey01); ok {
		t.Fatalf("expected not satisfied")
	}

	e.ObserveScale(controllerKey01, Create, pod02)
	e.ObserveScale(controllerKey01, Create, pod01)
	if ok, _, _ := e.SatisfiedExpectations(controllerKey01); ok {
		t.Fatalf("expected not satisfied")
	}

	e.ObserveScale(controllerKey02, Delete, pod01)
	if ok, _, _ := e.SatisfiedExpectations(controllerKey01); ok {
		t.Fatalf("expected not satisfied")
	}

	e.ObserveScale(controllerKey01, Delete, pod01)
	if ok, _, _ := e.SatisfiedExpectations(controllerKey01); !ok {
		t.Fatalf("expected satisfied")
	}
}


================================================
FILE: kubernetes/internal/utils/fieldindex/register.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package fieldindex

import (
	"context"
	"sync"

	v1 "k8s.io/api/core/v1"
	"sigs.k8s.io/controller-runtime/pkg/cache"
	"sigs.k8s.io/controller-runtime/pkg/client"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
)

const (
	IndexNameForOwnerRefUID = "ownerRefUID"
	IndexNameForPoolRef     = "poolRef"
)

var (
	registerOnce sync.Once
)

var OwnerIndexFunc = func(obj client.Object) []string {
	var owners []string
	for _, ref := range obj.GetOwnerReferences() {
		owners = append(owners, string(ref.UID))
	}
	return owners
}

var PoolRefIndexFunc = func(obj client.Object) []string {
	batchSandbox, ok := obj.(*sandboxv1alpha1.BatchSandbox)
	if ok {
		return []string{batchSandbox.Spec.PoolRef}
	}
	return nil
}

func RegisterFieldIndexes(c cache.Cache) error {
	var err error
	registerOnce.Do(func() {
		// pod ownerReference
		if err = c.IndexField(context.TODO(), &v1.Pod{}, IndexNameForOwnerRefUID, OwnerIndexFunc); err != nil {
			return
		}
		if err = c.IndexField(context.TODO(), &sandboxv1alpha1.BatchSandbox{}, IndexNameForPoolRef, PoolRefIndexFunc); err != nil {
			return
		}
	})
	return err
}


================================================
FILE: kubernetes/internal/utils/finalizer.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import (
	"context"
	"errors"

	"k8s.io/apimachinery/pkg/util/sets"
	"k8s.io/client-go/util/retry"
	"sigs.k8s.io/controller-runtime/pkg/client"
	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
)

type FinalizerOpType string

const (
	AddFinalizerOpType    FinalizerOpType = "Add"
	RemoveFinalizerOpType FinalizerOpType = "Remove"
)

func UpdateFinalizer(c client.Client, object client.Object, op FinalizerOpType, finalizer string) error {
	switch op {
	case AddFinalizerOpType, RemoveFinalizerOpType:
	default:
		return errors.New("UpdateFinalizer Func 'op' parameter must be 'Add' or 'Remove'")
	}

	key := client.ObjectKeyFromObject(object)
	return retry.RetryOnConflict(retry.DefaultRetry, func() error {
		fetchedObject := object.DeepCopyObject().(client.Object)
		getErr := c.Get(context.TODO(), key, fetchedObject)
		if getErr != nil {
			return getErr
		}
		finalizers := fetchedObject.GetFinalizers()
		switch op {
		case AddFinalizerOpType:
			if controllerutil.ContainsFinalizer(fetchedObject, finalizer) {
				return nil
			}
			finalizers = append(finalizers, finalizer)
		case RemoveFinalizerOpType:
			finalizerSet := sets.NewString(finalizers...)
			if !finalizerSet.Has(finalizer) {
				return nil
			}
			finalizers = finalizerSet.Delete(finalizer).List()
		}
		fetchedObject.SetFinalizers(finalizers)
		return c.Update(context.TODO(), fetchedObject)
	})
}


================================================
FILE: kubernetes/internal/utils/helper.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import (
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// GetAnnotation from metaObject annotations
func GetAnnotation(obj metav1.Object, key string) string {
	if obj == nil {
		return ""
	}
	annotations := obj.GetAnnotations()
	if annotations == nil {
		return ""
	}
	return annotations[key]
}


================================================
FILE: kubernetes/internal/utils/json.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import (
	"encoding/json"
	"reflect"
)

// DumpJSON returns the JSON encoding
func DumpJSON(o interface{}) string {
	j, _ := json.Marshal(o)
	return string(j)
}

// IsJSONObjectEqual checks if two objects are equal after encoding json
func IsJSONObjectEqual(o1, o2 interface{}) bool {
	if reflect.DeepEqual(o1, o2) {
		return true
	}

	oj1, _ := json.Marshal(o1)
	oj2, _ := json.Marshal(o2)
	os1 := string(oj1)
	os2 := string(oj2)
	if os1 == os2 {
		return true
	}

	om1 := make(map[string]interface{})
	om2 := make(map[string]interface{})
	_ = json.Unmarshal(oj1, &om1)
	_ = json.Unmarshal(oj2, &om2)

	return reflect.DeepEqual(om1, om2)
}


================================================
FILE: kubernetes/internal/utils/logging/logger.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package logging

import (
	"os"

	"github.com/go-logr/logr"
	zap2 "go.uber.org/zap"
	"go.uber.org/zap/zapcore"
	"gopkg.in/natefinch/lumberjack.v2"
	"sigs.k8s.io/controller-runtime/pkg/log/zap"
)

// Options contains configuration for the logger
type Options struct {
	// Development configures the logger to use a development config
	Development bool
	// EnableFileOutput enables output to file
	EnableFileOutput bool
	// LogFilePath is the path to the log file
	LogFilePath string
	// MaxSize is the maximum size in megabytes of the log file before it gets rotated
	MaxSize int
	// MaxBackups is the maximum number of old log files to retain
	MaxBackups int
	// MaxAge is the maximum number of days to retain old log files
	MaxAge int
	// Compress determines if the rotated log files should be compressed using gzip
	Compress bool
	// ZapOptions are additional zap options
	ZapOptions zap.Options
}

// DefaultOptions returns default logger options
func DefaultOptions() Options {
	return Options{
		Development:      false,
		EnableFileOutput: false,
		LogFilePath:      "/var/log/sandbox-controller/controller.log",
		MaxSize:          100,  // 100MB
		MaxBackups:       10,   // keep 10 old log files
		MaxAge:           30,   // keep log files for 30 days
		Compress:         true, // compress rotated files
		ZapOptions: zap.Options{
			Development: false,
		},
	}
}

// NewLoggerWithZapOptions creates a logger using controller-runtime's zap options
// and adds file output support
func NewLoggerWithZapOptions(opts Options) logr.Logger {
	// Add AddCaller option to include file and line number in logs
	if opts.ZapOptions.ZapOpts == nil {
		opts.ZapOptions.ZapOpts = []zap2.Option{}
	}
	opts.ZapOptions.ZapOpts = append(opts.ZapOptions.ZapOpts, zap2.AddCaller())

	// If file output is not enabled, use the default zap logger
	if !opts.EnableFileOutput {
		return zap.New(zap.UseFlagOptions(&opts.ZapOptions))
	}

	// Create file writer with rotation
	fileWriter := &lumberjack.Logger{
		Filename:   opts.LogFilePath,
		MaxSize:    opts.MaxSize,
		MaxBackups: opts.MaxBackups,
		MaxAge:     opts.MaxAge,
		Compress:   opts.Compress,
		LocalTime:  true,
	}

	// Create multi-writer that writes to both stdout and file
	multiWriter := zapcore.NewMultiWriteSyncer(
		zapcore.AddSync(os.Stdout),
		zapcore.AddSync(fileWriter),
	)

	// Create logger with multi-writer
	return zap.New(
		zap.UseFlagOptions(&opts.ZapOptions),
		zap.WriteTo(multiWriter),
	)
}


================================================
FILE: kubernetes/internal/utils/pod.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import (
	"fmt"
	"time"

	v1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/meta"
	apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/labels"
	"k8s.io/apimachinery/pkg/runtime"
)

// IsPodAvailable returns true if a pod is available; false otherwise.
// Precondition for an available pod is that it must be ready. On top
// of that, there are two cases when a pod can be considered available:
// 1. minReadySeconds == 0, or
// 2. LastTransitionTime (is set) + minReadySeconds < current time
func IsPodAvailable(pod *v1.Pod, minReadySeconds int32, now metav1.Time) bool {
	if !IsPodReady(pod) {
		return false
	}

	c := GetPodReadyCondition(pod.Status)
	minReadySecondsDuration := time.Duration(minReadySeconds) * time.Second
	if minReadySeconds == 0 || (!c.LastTransitionTime.IsZero() && c.LastTransitionTime.Add(minReadySecondsDuration).Before(now.Time)) {
		return true
	}
	return false
}

// IsPodReady returns true if a pod is ready; false otherwise.
func IsPodReady(pod *v1.Pod) bool {
	return IsPodReadyConditionTrue(pod.Status)
}

// IsPodTerminal returns true if a pod is terminal, all containers are stopped and cannot ever regress.
func IsPodTerminal(pod *v1.Pod) bool {
	return IsPodPhaseTerminal(pod.Status.Phase)
}

// IsPodPhaseTerminal returns true if the pod's phase is terminal.
func IsPodPhaseTerminal(phase v1.PodPhase) bool {
	return phase == v1.PodFailed || phase == v1.PodSucceeded
}

// IsPodReadyConditionTrue returns true if a pod is ready; false otherwise.
func IsPodReadyConditionTrue(status v1.PodStatus) bool {
	condition := GetPodReadyCondition(status)
	return condition != nil && condition.Status == v1.ConditionTrue
}

// IsContainersReadyConditionTrue returns true if a pod is ready; false otherwise.
func IsContainersReadyConditionTrue(status v1.PodStatus) bool {
	condition := GetContainersReadyCondition(status)
	return condition != nil && condition.Status == v1.ConditionTrue
}

// GetPodReadyCondition extracts the pod ready condition from the given status and returns that.
// Returns nil if the condition is not present.
func GetPodReadyCondition(status v1.PodStatus) *v1.PodCondition {
	_, condition := GetPodCondition(&status, v1.PodReady)
	return condition
}

// GetContainersReadyCondition extracts the containers ready condition from the given status and returns that.
// Returns nil if the condition is not present.
func GetContainersReadyCondition(status v1.PodStatus) *v1.PodCondition {
	_, condition := GetPodCondition(&status, v1.ContainersReady)
	return condition
}

// GetPodCondition extracts the provided condition from the given status and returns that.
// Returns nil and -1 if the condition is not present, and the index of the located condition.
func GetPodCondition(status *v1.PodStatus, conditionType v1.PodConditionType) (int, *v1.PodCondition) {
	if status == nil {
		return -1, nil
	}
	return GetPodConditionFromList(status.Conditions, conditionType)
}

// GetPodConditionFromList extracts the provided condition from the given list of condition and
// returns the index of the condition and the condition. Returns -1 and nil if the condition is not present.
func GetPodConditionFromList(conditions []v1.PodCondition, conditionType v1.PodConditionType) (int, *v1.PodCondition) {
	if conditions == nil {
		return -1, nil
	}
	for i := range conditions {
		if conditions[i].Type == conditionType {
			return i, &conditions[i]
		}
	}
	return -1, nil
}

func GetPodFromTemplate(
	template *v1.PodTemplateSpec,
	parentObject runtime.Object,
	controllerRef *metav1.OwnerReference,
) (*v1.Pod, error) {
	desiredLabels := getPodsLabelSet(template)
	desiredFinalizers := getPodsFinalizers(template)
	desiredAnnotations := getPodsAnnotationSet(template)
	accessor, err := meta.Accessor(parentObject)
	if err != nil {
		return nil, fmt.Errorf("parentObject does not have ObjectMeta, %v", err)
	}
	prefix := getPodsPrefix(accessor.GetName())

	pod := &v1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			Labels:       desiredLabels,
			Annotations:  desiredAnnotations,
			GenerateName: prefix,
			Finalizers:   desiredFinalizers,
		},
	}
	if controllerRef != nil {
		pod.OwnerReferences = append(pod.OwnerReferences, *controllerRef)
	}
	pod.Spec = *template.Spec.DeepCopy()
	return pod, nil
}

func getPodsLabelSet(template *v1.PodTemplateSpec) labels.Set {
	desiredLabels := make(labels.Set)
	for k, v := range template.Labels {
		desiredLabels[k] = v
	}
	return desiredLabels
}

func getPodsFinalizers(template *v1.PodTemplateSpec) []string {
	desiredFinalizers := make([]string, len(template.Finalizers))
	copy(desiredFinalizers, template.Finalizers)
	return desiredFinalizers
}

func getPodsAnnotationSet(template *v1.PodTemplateSpec) labels.Set {
	desiredAnnotations := make(labels.Set)
	for k, v := range template.Annotations {
		desiredAnnotations[k] = v
	}
	return desiredAnnotations
}

func getPodsPrefix(controllerName string) string {
	// use the dash (if the name isn't too long) to make the pod name a bit prettier
	prefix := fmt.Sprintf("%s-", controllerName)
	if len(apimachineryvalidation.NameIsDNSSubdomain(prefix, true)) != 0 {
		prefix = controllerName
	}
	return prefix
}

func IsAssigned(pod *v1.Pod) bool {
	return pod != nil && (pod.Spec.NodeName != "" || pod.Status.PodIP != "")
}

func PodNameSorter(a, b *v1.Pod) int {
	if a.Name < b.Name {
		return -1
	} else if a.Name > b.Name {
		return 1
	}
	return 0
}

func WithPodIndexSorter(podIndex map[string]int) func(*v1.Pod, *v1.Pod) int {
	return func(a, b *v1.Pod) int {
		aIdx, aOk := podIndex[a.Name]
		bIdx, bOk := podIndex[b.Name]
		if !aOk && !bOk {
			return 0
		}
		if !aOk {
			return 1
		}
		if !bOk {
			return -1
		}
		if aIdx < bIdx {
			return -1
		} else if aIdx > bIdx {
			return 1
		}
		return 0
	}
}

type MultiPodSorter []func(a, b *v1.Pod) int

func (m MultiPodSorter) Sort(a, b *v1.Pod) int {
	for i := range m {
		ret := m[i](a, b)
		if ret != 0 {
			return ret
		}
	}
	return 0
}


================================================
FILE: kubernetes/internal/utils/pod_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import (
	"slices"
	"testing"

	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func TestWithPodIndexSorter(t *testing.T) {
	tests := []struct {
		name     string
		podIndex map[string]int
		podA     *v1.Pod
		podB     *v1.Pod
		want     int
	}{
		{
			name: "a index < b index",
			podIndex: map[string]int{
				"pod-a": 1,
				"pod-b": 2,
			},
			podA: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want: -1,
		},
		{
			name: "a index > b index",
			podIndex: map[string]int{
				"pod-a": 5,
				"pod-b": 3,
			},
			podA: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want: 1,
		},
		{
			name: "a index == b index",
			podIndex: map[string]int{
				"pod-a": 2,
				"pod-b": 2,
			},
			podA: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want: 0,
		},
		{
			name: "a has no index, b has index - a should be last",
			podIndex: map[string]int{
				"pod-b": 1,
			},
			podA: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want: 1,
		},
		{
			name: "a has index, b has no index - b should be last",
			podIndex: map[string]int{
				"pod-a": 1,
			},
			podA: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want: -1,
		},
		{
			name:     "both have no index",
			podIndex: map[string]int{},
			podA:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want:     0,
		},
		{
			name: "index 0 vs index 1",
			podIndex: map[string]int{
				"pod-a": 0,
				"pod-b": 1,
			},
			podA: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want: -1,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			sorter := WithPodIndexSorter(tt.podIndex)
			got := sorter(tt.podA, tt.podB)
			if got != tt.want {
				t.Errorf("WithPodIndexSorter() = %v, want %v", got, tt.want)
			}
		})
	}
}

func TestMultiPodSorter(t *testing.T) {
	tests := []struct {
		name     string
		sorters  MultiPodSorter
		podA     *v1.Pod
		podB     *v1.Pod
		want     int
		wantDesc string
	}{
		{
			name: "first sorter decides - a < b",
			sorters: MultiPodSorter{
				func(a, b *v1.Pod) int {
					if a.Name < b.Name {
						return -1
					} else if a.Name > b.Name {
						return 1
					}
					return 0
				},
			},
			podA:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want:     -1,
			wantDesc: "pod-a should come before pod-b",
		},
		{
			name: "first sorter equal, second sorter decides",
			sorters: MultiPodSorter{
				func(a, b *v1.Pod) int {
					return 0
				},
				func(a, b *v1.Pod) int {
					if a.Name < b.Name {
						return -1
					} else if a.Name > b.Name {
						return 1
					}
					return 0
				},
			},
			podA:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want:     -1,
			wantDesc: "first sorter returns 0, second sorter decides",
		},
		{
			name: "all sorters return equal",
			sorters: MultiPodSorter{
				func(a, b *v1.Pod) int { return 0 },
				func(a, b *v1.Pod) int { return 0 },
				func(a, b *v1.Pod) int { return 0 },
			},
			podA:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want:     0,
			wantDesc: "all sorters return 0",
		},
		{
			name: "index sorter then name sorter - decided by index",
			sorters: MultiPodSorter{
				WithPodIndexSorter(map[string]int{
					"pod-b": 0,
					"pod-a": 1,
				}),
				PodNameSorter,
			},
			podA:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want:     1,
			wantDesc: "pod-b has lower index (0) than pod-a (1), so pod-a > pod-b",
		},
		{
			name: "index sorter then name sorter - decided by name",
			sorters: MultiPodSorter{
				WithPodIndexSorter(map[string]int{
					"pod-a": 1,
					"pod-b": 1,
				}),
				PodNameSorter,
			},
			podA:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want:     -1,
			wantDesc: "same index, fallback to name comparison",
		},
		{
			name:     "empty sorters list",
			sorters:  MultiPodSorter{},
			podA:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
			podB:     &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
			want:     0,
			wantDesc: "no sorters, should return 0",
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got := tt.sorters.Sort(tt.podA, tt.podB)
			if got != tt.want {
				t.Errorf("MultiPodSorter.Sort() = %v, want %v (%s)", got, tt.want, tt.wantDesc)
			}
		})
	}
}

func TestMultiPodSorter_Integration(t *testing.T) {
	pods := []*v1.Pod{
		{ObjectMeta: metav1.ObjectMeta{Name: "pod-c"}},
		{ObjectMeta: metav1.ObjectMeta{Name: "pod-a"}},
		{ObjectMeta: metav1.ObjectMeta{Name: "pod-b"}},
		{ObjectMeta: metav1.ObjectMeta{Name: "pod-d"}},
	}

	podIndex := map[string]int{
		"pod-a": 2,
		"pod-b": 0,
		"pod-c": 1,
	}

	sorter := MultiPodSorter{
		WithPodIndexSorter(podIndex),
		PodNameSorter,
	}

	slices.SortStableFunc(pods, sorter.Sort)

	expectedOrder := []string{"pod-b", "pod-c", "pod-a", "pod-d"}

	for i, pod := range pods {
		if pod.Name != expectedOrder[i] {
			t.Errorf("pod at index %d: got %s, want %s", i, pod.Name, expectedOrder[i])
		}
	}
}


================================================
FILE: kubernetes/internal/utils/requeueduration/duration.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package requeueduration

import (
	"fmt"
	"sync"
	"time"
)

// DurationStore can store a duration map for multiple workloads
type DurationStore struct {
	store sync.Map
}

func (dm *DurationStore) Push(key string, newDuration time.Duration) {
	value, _ := dm.store.LoadOrStore(key, &Duration{})
	requeueDuration, ok := value.(*Duration)
	if !ok {
		dm.store.Delete(key)
		return
	}
	requeueDuration.Update(newDuration)
}

func (dm *DurationStore) Pop(key string) time.Duration {
	value, ok := dm.store.Load(key)
	if !ok {
		return 0
	}
	defer dm.store.Delete(key)
	requeueDuration, ok := value.(*Duration)
	if !ok {
		return 0
	}
	return requeueDuration.Get()
}

// Duration helps calculate the shortest non-zore duration to requeue
type Duration struct {
	sync.Mutex
	duration time.Duration
	message  string
}

func (rd *Duration) Update(newDuration time.Duration) {
	rd.Lock()
	defer rd.Unlock()
	if newDuration > 0 {
		if rd.duration <= 0 || newDuration < rd.duration {
			rd.duration = newDuration
		}
	}
}

func (rd *Duration) UpdateWithMsg(newDuration time.Duration, format string, args ...interface{}) {
	rd.Lock()
	defer rd.Unlock()
	if newDuration > 0 {
		if rd.duration <= 0 || newDuration < rd.duration {
			rd.duration = newDuration
			rd.message = fmt.Sprintf(format, args...)
		}
	}
}

func (rd *Duration) Merge(rd2 *Duration) {
	rd2.Lock()
	defer rd2.Unlock()
	rd.UpdateWithMsg(rd2.duration, "%s", rd2.message)
}

func (rd *Duration) Get() time.Duration {
	rd.Lock()
	defer rd.Unlock()
	return rd.duration
}

func (rd *Duration) GetWithMsg() (time.Duration, string) {
	rd.Lock()
	defer rd.Unlock()
	return rd.duration, rd.message
}


================================================
FILE: kubernetes/pkg/client/clientset/versioned/clientset.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

package versioned

import (
	fmt "fmt"
	http "net/http"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned/typed/sandbox/v1alpha1"
	discovery "k8s.io/client-go/discovery"
	rest "k8s.io/client-go/rest"
	flowcontrol "k8s.io/client-go/util/flowcontrol"
)

type Interface interface {
	Discovery() discovery.DiscoveryInterface
	SandboxV1alpha1() sandboxv1alpha1.SandboxV1alpha1Interface
}

// Clientset contains the clients for groups.
type Clientset struct {
	*discovery.DiscoveryClient
	sandboxV1alpha1 *sandboxv1alpha1.SandboxV1alpha1Client
}

// SandboxV1alpha1 retrieves the SandboxV1alpha1Client
func (c *Clientset) SandboxV1alpha1() sandboxv1alpha1.SandboxV1alpha1Interface {
	return c.sandboxV1alpha1
}

// Discovery retrieves the DiscoveryClient
func (c *Clientset) Discovery() discovery.DiscoveryInterface {
	if c == nil {
		return nil
	}
	return c.DiscoveryClient
}

// NewForConfig creates a new Clientset for the given config.
// If config's RateLimiter is not set and QPS and Burst are acceptable,
// NewForConfig will generate a rate-limiter in configShallowCopy.
// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient),
// where httpClient was generated with rest.HTTPClientFor(c).
func NewForConfig(c *rest.Config) (*Clientset, error) {
	configShallowCopy := *c

	if configShallowCopy.UserAgent == "" {
		configShallowCopy.UserAgent = rest.DefaultKubernetesUserAgent()
	}

	// share the transport between all clients
	httpClient, err := rest.HTTPClientFor(&configShallowCopy)
	if err != nil {
		return nil, err
	}

	return NewForConfigAndClient(&configShallowCopy, httpClient)
}

// NewForConfigAndClient creates a new Clientset for the given config and http client.
// Note the http client provided takes precedence over the configured transport values.
// If config's RateLimiter is not set and QPS and Burst are acceptable,
// NewForConfigAndClient will generate a rate-limiter in configShallowCopy.
func NewForConfigAndClient(c *rest.Config, httpClient *http.Client) (*Clientset, error) {
	configShallowCopy := *c
	if configShallowCopy.RateLimiter == nil && configShallowCopy.QPS > 0 {
		if configShallowCopy.Burst <= 0 {
			return nil, fmt.Errorf("burst is required to be greater than 0 when RateLimiter is not set and QPS is set to greater than 0")
		}
		configShallowCopy.RateLimiter = flowcontrol.NewTokenBucketRateLimiter(configShallowCopy.QPS, configShallowCopy.Burst)
	}

	var cs Clientset
	var err error
	cs.sandboxV1alpha1, err = sandboxv1alpha1.NewForConfigAndClient(&configShallowCopy, httpClient)
	if err != nil {
		return nil, err
	}

	cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfigAndClient(&configShallowCopy, httpClient)
	if err != nil {
		return nil, err
	}
	return &cs, nil
}

// NewForConfigOrDie creates a new Clientset for the given config and
// panics if there is an error in the config.
func NewForConfigOrDie(c *rest.Config) *Clientset {
	cs, err := NewForConfig(c)
	if err != nil {
		panic(err)
	}
	return cs
}

// New creates a new Clientset for the given RESTClient.
func New(c rest.Interface) *Clientset {
	var cs Clientset
	cs.sandboxV1alpha1 = sandboxv1alpha1.New(c)

	cs.DiscoveryClient = discovery.NewDiscoveryClient(c)
	return &cs
}


================================================
FILE: kubernetes/pkg/client/clientset/versioned/fake/clientset_generated.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

package fake

import (
	clientset "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned"
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned/typed/sandbox/v1alpha1"
	fakesandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned/typed/sandbox/v1alpha1/fake"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/runtime"
	"k8s.io/apimachinery/pkg/watch"
	"k8s.io/client-go/discovery"
	fakediscovery "k8s.io/client-go/discovery/fake"
	"k8s.io/client-go/testing"
)

// NewSimpleClientset returns a clientset that will respond with the provided objects.
// It's backed by a very simple object tracker that processes creates, updates and deletions as-is,
// without applying any field management, validations and/or defaults. It shouldn't be considered a replacement
// for a real clientset and is mostly useful in simple unit tests.
//
// DEPRECATED: NewClientset replaces this with support for field management, which significantly improves
// server side apply testing. NewClientset is only available when apply configurations are generated (e.g.
// via --with-applyconfig).
func NewSimpleClientset(objects ...runtime.Object) *Clientset {
	o := testing.NewObjectTracker(scheme, codecs.UniversalDecoder())
	for _, obj := range objects {
		if err := o.Add(obj); err != nil {
			panic(err)
		}
	}

	cs := &Clientset{tracker: o}
	cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake}
	cs.AddReactor("*", "*", testing.ObjectReaction(o))
	cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) {
		var opts metav1.ListOptions
		if watchActcion, ok := action.(testing.WatchActionImpl); ok {
			opts = watchActcion.ListOptions
		}
		gvr := action.GetResource()
		ns := action.GetNamespace()
		watch, err := o.Watch(gvr, ns, opts)
		if err != nil {
			return false, nil, err
		}
		return true, watch, nil
	})

	return cs
}

// Clientset implements clientset.Interface. Meant to be embedded into a
// struct to get a default implementation. This makes faking out just the method
// you want to test easier.
type Clientset struct {
	testing.Fake
	discovery *fakediscovery.FakeDiscovery
	tracker   testing.ObjectTracker
}

func (c *Clientset) Discovery() discovery.DiscoveryInterface {
	return c.discovery
}

func (c *Clientset) Tracker() testing.ObjectTracker {
	return c.tracker
}

var (
	_ clientset.Interface = &Clientset{}
	_ testing.FakeClient  = &Clientset{}
)

// SandboxV1alpha1 retrieves the SandboxV1alpha1Client
func (c *Clientset) SandboxV1alpha1() sandboxv1alpha1.SandboxV1alpha1Interface {
	return &fakesandboxv1alpha1.FakeSandboxV1alpha1{Fake: &c.Fake}
}


================================================
FILE: kubernetes/pkg/client/clientset/versioned/fake/doc.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

// This package has the automatically generated fake clientset.
package fake


================================================
FILE: kubernetes/pkg/client/clientset/versioned/fake/register.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

package fake

import (
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	runtime "k8s.io/apimachinery/pkg/runtime"
	schema "k8s.io/apimachinery/pkg/runtime/schema"
	serializer "k8s.io/apimachinery/pkg/runtime/serializer"
	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
)

var scheme = runtime.NewScheme()
var codecs = serializer.NewCodecFactory(scheme)

var localSchemeBuilder = runtime.SchemeBuilder{
	sandboxv1alpha1.AddToScheme,
}

// AddToScheme adds all types of this clientset into the given scheme. This allows composition
// of clientsets, like in:
//
//	import (
//	  "k8s.io/client-go/kubernetes"
//	  clientsetscheme "k8s.io/client-go/kubernetes/scheme"
//	  aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme"
//	)
//
//	kclientset, _ := kubernetes.NewForConfig(c)
//	_ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme)
//
// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types
// correctly.
var AddToScheme = localSchemeBuilder.AddToScheme

func init() {
	v1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"})
	utilruntime.Must(AddToScheme(scheme))
}


================================================
FILE: kubernetes/pkg/client/clientset/versioned/scheme/doc.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

// This package contains the scheme of the automatically generated clientset.
package scheme


================================================
FILE: kubernetes/pkg/client/clientset/versioned/scheme/register.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

package scheme

import (
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	runtime "k8s.io/apimachinery/pkg/runtime"
	schema "k8s.io/apimachinery/pkg/runtime/schema"
	serializer "k8s.io/apimachinery/pkg/runtime/serializer"
	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
)

var Scheme = runtime.NewScheme()
var Codecs = serializer.NewCodecFactory(Scheme)
var ParameterCodec = runtime.NewParameterCodec(Scheme)
var localSchemeBuilder = runtime.SchemeBuilder{
	sandboxv1alpha1.AddToScheme,
}

// AddToScheme adds all types of this clientset into the given scheme. This allows composition
// of clientsets, like in:
//
//	import (
//	  "k8s.io/client-go/kubernetes"
//	  clientsetscheme "k8s.io/client-go/kubernetes/scheme"
//	  aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme"
//	)
//
//	kclientset, _ := kubernetes.NewForConfig(c)
//	_ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme)
//
// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types
// correctly.
var AddToScheme = localSchemeBuilder.AddToScheme

func init() {
	v1.AddToGroupVersion(Scheme, schema.GroupVersion{Version: "v1"})
	utilruntime.Must(AddToScheme(Scheme))
}


================================================
FILE: kubernetes/pkg/client/clientset/versioned/typed/sandbox/v1alpha1/batchsandbox.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

package v1alpha1

import (
	context "context"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	scheme "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned/scheme"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	types "k8s.io/apimachinery/pkg/types"
	watch "k8s.io/apimachinery/pkg/watch"
	gentype "k8s.io/client-go/gentype"
)

// BatchSandboxesGetter has a method to return a BatchSandboxInterface.
// A group's client should implement this interface.
type BatchSandboxesGetter interface {
	BatchSandboxes(namespace string) BatchSandboxInterface
}

// BatchSandboxInterface has methods to work with BatchSandbox resources.
type BatchSandboxInterface interface {
	Create(ctx context.Context, batchSandbox *sandboxv1alpha1.BatchSandbox, opts v1.CreateOptions) (*sandboxv1alpha1.BatchSandbox, error)
	Update(ctx context.Context, batchSandbox *sandboxv1alpha1.BatchSandbox, opts v1.UpdateOptions) (*sandboxv1alpha1.BatchSandbox, error)
	// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
	UpdateStatus(ctx context.Context, batchSandbox *sandboxv1alpha1.BatchSandbox, opts v1.UpdateOptions) (*sandboxv1alpha1.BatchSandbox, error)
	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
	DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
	Get(ctx context.Context, name string, opts v1.GetOptions) (*sandboxv1alpha1.BatchSandbox, error)
	List(ctx context.Context, opts v1.ListOptions) (*sandboxv1alpha1.BatchSandboxList, error)
	Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *sandboxv1alpha1.BatchSandbox, err error)
	BatchSandboxExpansion
}

// batchSandboxes implements BatchSandboxInterface
type batchSandboxes struct {
	*gentype.ClientWithList[*sandboxv1alpha1.BatchSandbox, *sandboxv1alpha1.BatchSandboxList]
}

// newBatchSandboxes returns a BatchSandboxes
func newBatchSandboxes(c *SandboxV1alpha1Client, namespace string) *batchSandboxes {
	return &batchSandboxes{
		gentype.NewClientWithList[*sandboxv1alpha1.BatchSandbox, *sandboxv1alpha1.BatchSandboxList](
			"batchsandboxes",
			c.RESTClient(),
			scheme.ParameterCodec,
			namespace,
			func() *sandboxv1alpha1.BatchSandbox { return &sandboxv1alpha1.BatchSandbox{} },
			func() *sandboxv1alpha1.BatchSandboxList { return &sandboxv1alpha1.BatchSandboxList{} },
		),
	}
}


================================================
FILE: kubernetes/pkg/client/clientset/versioned/typed/sandbox/v1alpha1/doc.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

// This package has the automatically generated typed clients.
package v1alpha1


================================================
FILE: kubernetes/pkg/client/clientset/versioned/typed/sandbox/v1alpha1/fake/doc.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

// Package fake has the automatically generated clients.
package fake


================================================
FILE: kubernetes/pkg/client/clientset/versioned/typed/sandbox/v1alpha1/fake/fake_batchsandbox.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

package fake

import (
	v1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned/typed/sandbox/v1alpha1"
	gentype "k8s.io/client-go/gentype"
)

// fakeBatchSandboxes implements BatchSandboxInterface
type fakeBatchSandboxes struct {
	*gentype.FakeClientWithList[*v1alpha1.BatchSandbox, *v1alpha1.BatchSandboxList]
	Fake *FakeSandboxV1alpha1
}

func newFakeBatchSandboxes(fake *FakeSandboxV1alpha1, namespace string) sandboxv1alpha1.BatchSandboxInterface {
	return &fakeBatchSandboxes{
		gentype.NewFakeClientWithList[*v1alpha1.BatchSandbox, *v1alpha1.BatchSandboxList](
			fake.Fake,
			namespace,
			v1alpha1.SchemeGroupVersion.WithResource("batchsandboxes"),
			v1alpha1.SchemeGroupVersion.WithKind("BatchSandbox"),
			func() *v1alpha1.BatchSandbox { return &v1alpha1.BatchSandbox{} },
			func() *v1alpha1.BatchSandboxList { return &v1alpha1.BatchSandboxList{} },
			func(dst, src *v1alpha1.BatchSandboxList) { dst.ListMeta = src.ListMeta },
			func(list *v1alpha1.BatchSandboxList) []*v1alpha1.BatchSandbox {
				return gentype.ToPointerSlice(list.Items)
			},
			func(list *v1alpha1.BatchSandboxList, items []*v1alpha1.BatchSandbox) {
				list.Items = gentype.FromPointerSlice(items)
			},
		),
		fake,
	}
}


================================================
FILE: kubernetes/pkg/client/clientset/versioned/typed/sandbox/v1alpha1/fake/fake_pool.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

package fake

import (
	v1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned/typed/sandbox/v1alpha1"
	gentype "k8s.io/client-go/gentype"
)

// fakePools implements PoolInterface
type fakePools struct {
	*gentype.FakeClientWithList[*v1alpha1.Pool, *v1alpha1.PoolList]
	Fake *FakeSandboxV1alpha1
}

func newFakePools(fake *FakeSandboxV1alpha1, namespace string) sandboxv1alpha1.PoolInterface {
	return &fakePools{
		gentype.NewFakeClientWithList[*v1alpha1.Pool, *v1alpha1.PoolList](
			fake.Fake,
			namespace,
			v1alpha1.SchemeGroupVersion.WithResource("pools"),
			v1alpha1.SchemeGroupVersion.WithKind("Pool"),
			func() *v1alpha1.Pool { return &v1alpha1.Pool{} },
			func() *v1alpha1.PoolList { return &v1alpha1.PoolList{} },
			func(dst, src *v1alpha1.PoolList) { dst.ListMeta = src.ListMeta },
			func(list *v1alpha1.PoolList) []*v1alpha1.Pool { return gentype.ToPointerSlice(list.Items) },
			func(list *v1alpha1.PoolList, items []*v1alpha1.Pool) { list.Items = gentype.FromPointerSlice(items) },
		),
		fake,
	}
}


================================================
FILE: kubernetes/pkg/client/clientset/versioned/typed/sandbox/v1alpha1/fake/fake_sandbox_client.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

package fake

import (
	v1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned/typed/sandbox/v1alpha1"
	rest "k8s.io/client-go/rest"
	testing "k8s.io/client-go/testing"
)

type FakeSandboxV1alpha1 struct {
	*testing.Fake
}

func (c *FakeSandboxV1alpha1) BatchSandboxes(namespace string) v1alpha1.BatchSandboxInterface {
	return newFakeBatchSandboxes(c, namespace)
}

func (c *FakeSandboxV1alpha1) Pools(namespace string) v1alpha1.PoolInterface {
	return newFakePools(c, namespace)
}

// RESTClient returns a RESTClient that is used to communicate
// with API server by this client implementation.
func (c *FakeSandboxV1alpha1) RESTClient() rest.Interface {
	var ret *rest.RESTClient
	return ret
}


================================================
FILE: kubernetes/pkg/client/clientset/versioned/typed/sandbox/v1alpha1/generated_expansion.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

package v1alpha1

type BatchSandboxExpansion interface{}

type PoolExpansion interface{}


================================================
FILE: kubernetes/pkg/client/clientset/versioned/typed/sandbox/v1alpha1/pool.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

package v1alpha1

import (
	context "context"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	scheme "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned/scheme"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	types "k8s.io/apimachinery/pkg/types"
	watch "k8s.io/apimachinery/pkg/watch"
	gentype "k8s.io/client-go/gentype"
)

// PoolsGetter has a method to return a PoolInterface.
// A group's client should implement this interface.
type PoolsGetter interface {
	Pools(namespace string) PoolInterface
}

// PoolInterface has methods to work with Pool resources.
type PoolInterface interface {
	Create(ctx context.Context, pool *sandboxv1alpha1.Pool, opts v1.CreateOptions) (*sandboxv1alpha1.Pool, error)
	Update(ctx context.Context, pool *sandboxv1alpha1.Pool, opts v1.UpdateOptions) (*sandboxv1alpha1.Pool, error)
	// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
	UpdateStatus(ctx context.Context, pool *sandboxv1alpha1.Pool, opts v1.UpdateOptions) (*sandboxv1alpha1.Pool, error)
	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
	DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
	Get(ctx context.Context, name string, opts v1.GetOptions) (*sandboxv1alpha1.Pool, error)
	List(ctx context.Context, opts v1.ListOptions) (*sandboxv1alpha1.PoolList, error)
	Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *sandboxv1alpha1.Pool, err error)
	PoolExpansion
}

// pools implements PoolInterface
type pools struct {
	*gentype.ClientWithList[*sandboxv1alpha1.Pool, *sandboxv1alpha1.PoolList]
}

// newPools returns a Pools
func newPools(c *SandboxV1alpha1Client, namespace string) *pools {
	return &pools{
		gentype.NewClientWithList[*sandboxv1alpha1.Pool, *sandboxv1alpha1.PoolList](
			"pools",
			c.RESTClient(),
			scheme.ParameterCodec,
			namespace,
			func() *sandboxv1alpha1.Pool { return &sandboxv1alpha1.Pool{} },
			func() *sandboxv1alpha1.PoolList { return &sandboxv1alpha1.PoolList{} },
		),
	}
}


================================================
FILE: kubernetes/pkg/client/clientset/versioned/typed/sandbox/v1alpha1/sandbox_client.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.

package v1alpha1

import (
	http "net/http"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	scheme "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned/scheme"
	rest "k8s.io/client-go/rest"
)

type SandboxV1alpha1Interface interface {
	RESTClient() rest.Interface
	BatchSandboxesGetter
	PoolsGetter
}

// SandboxV1alpha1Client is used to interact with features provided by the sandbox.opensandbox.io group.
type SandboxV1alpha1Client struct {
	restClient rest.Interface
}

func (c *SandboxV1alpha1Client) BatchSandboxes(namespace string) BatchSandboxInterface {
	return newBatchSandboxes(c, namespace)
}

func (c *SandboxV1alpha1Client) Pools(namespace string) PoolInterface {
	return newPools(c, namespace)
}

// NewForConfig creates a new SandboxV1alpha1Client for the given config.
// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient),
// where httpClient was generated with rest.HTTPClientFor(c).
func NewForConfig(c *rest.Config) (*SandboxV1alpha1Client, error) {
	config := *c
	setConfigDefaults(&config)
	httpClient, err := rest.HTTPClientFor(&config)
	if err != nil {
		return nil, err
	}
	return NewForConfigAndClient(&config, httpClient)
}

// NewForConfigAndClient creates a new SandboxV1alpha1Client for the given config and http client.
// Note the http client provided takes precedence over the configured transport values.
func NewForConfigAndClient(c *rest.Config, h *http.Client) (*SandboxV1alpha1Client, error) {
	config := *c
	setConfigDefaults(&config)
	client, err := rest.RESTClientForConfigAndClient(&config, h)
	if err != nil {
		return nil, err
	}
	return &SandboxV1alpha1Client{client}, nil
}

// NewForConfigOrDie creates a new SandboxV1alpha1Client for the given config and
// panics if there is an error in the config.
func NewForConfigOrDie(c *rest.Config) *SandboxV1alpha1Client {
	client, err := NewForConfig(c)
	if err != nil {
		panic(err)
	}
	return client
}

// New creates a new SandboxV1alpha1Client for the given RESTClient.
func New(c rest.Interface) *SandboxV1alpha1Client {
	return &SandboxV1alpha1Client{c}
}

func setConfigDefaults(config *rest.Config) {
	gv := sandboxv1alpha1.SchemeGroupVersion
	config.GroupVersion = &gv
	config.APIPath = "/apis"
	config.NegotiatedSerializer = rest.CodecFactoryForGeneratedClient(scheme.Scheme, scheme.Codecs).WithoutConversion()

	if config.UserAgent == "" {
		config.UserAgent = rest.DefaultKubernetesUserAgent()
	}
}

// RESTClient returns a RESTClient that is used to communicate
// with API server by this client implementation.
func (c *SandboxV1alpha1Client) RESTClient() rest.Interface {
	if c == nil {
		return nil
	}
	return c.restClient
}


================================================
FILE: kubernetes/pkg/client/informers/externalversions/factory.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.

package externalversions

import (
	reflect "reflect"
	sync "sync"
	time "time"

	versioned "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned"
	internalinterfaces "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/informers/externalversions/internalinterfaces"
	sandbox "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/informers/externalversions/sandbox"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	runtime "k8s.io/apimachinery/pkg/runtime"
	schema "k8s.io/apimachinery/pkg/runtime/schema"
	cache "k8s.io/client-go/tools/cache"
)

// SharedInformerOption defines the functional option type for SharedInformerFactory.
type SharedInformerOption func(*sharedInformerFactory) *sharedInformerFactory

type sharedInformerFactory struct {
	client           versioned.Interface
	namespace        string
	tweakListOptions internalinterfaces.TweakListOptionsFunc
	lock             sync.Mutex
	defaultResync    time.Duration
	customResync     map[reflect.Type]time.Duration
	transform        cache.TransformFunc

	informers map[reflect.Type]cache.SharedIndexInformer
	// startedInformers is used for tracking which informers have been started.
	// This allows Start() to be called multiple times safely.
	startedInformers map[reflect.Type]bool
	// wg tracks how many goroutines were started.
	wg sync.WaitGroup
	// shuttingDown is true when Shutdown has been called. It may still be running
	// because it needs to wait for goroutines.
	shuttingDown bool
}

// WithCustomResyncConfig sets a custom resync period for the specified informer types.
func WithCustomResyncConfig(resyncConfig map[v1.Object]time.Duration) SharedInformerOption {
	return func(factory *sharedInformerFactory) *sharedInformerFactory {
		for k, v := range resyncConfig {
			factory.customResync[reflect.TypeOf(k)] = v
		}
		return factory
	}
}

// WithTweakListOptions sets a custom filter on all listers of the configured SharedInformerFactory.
func WithTweakListOptions(tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerOption {
	return func(factory *sharedInformerFactory) *sharedInformerFactory {
		factory.tweakListOptions = tweakListOptions
		return factory
	}
}

// WithNamespace limits the SharedInformerFactory to the specified namespace.
func WithNamespace(namespace string) SharedInformerOption {
	return func(factory *sharedInformerFactory) *sharedInformerFactory {
		factory.namespace = namespace
		return factory
	}
}

// WithTransform sets a transform on all informers.
func WithTransform(transform cache.TransformFunc) SharedInformerOption {
	return func(factory *sharedInformerFactory) *sharedInformerFactory {
		factory.transform = transform
		return factory
	}
}

// NewSharedInformerFactory constructs a new instance of sharedInformerFactory for all namespaces.
func NewSharedInformerFactory(client versioned.Interface, defaultResync time.Duration) SharedInformerFactory {
	return NewSharedInformerFactoryWithOptions(client, defaultResync)
}

// NewFilteredSharedInformerFactory constructs a new instance of sharedInformerFactory.
// Listers obtained via this SharedInformerFactory will be subject to the same filters
// as specified here.
// Deprecated: Please use NewSharedInformerFactoryWithOptions instead
func NewFilteredSharedInformerFactory(client versioned.Interface, defaultResync time.Duration, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerFactory {
	return NewSharedInformerFactoryWithOptions(client, defaultResync, WithNamespace(namespace), WithTweakListOptions(tweakListOptions))
}

// NewSharedInformerFactoryWithOptions constructs a new instance of a SharedInformerFactory with additional options.
func NewSharedInformerFactoryWithOptions(client versioned.Interface, defaultResync time.Duration, options ...SharedInformerOption) SharedInformerFactory {
	factory := &sharedInformerFactory{
		client:           client,
		namespace:        v1.NamespaceAll,
		defaultResync:    defaultResync,
		informers:        make(map[reflect.Type]cache.SharedIndexInformer),
		startedInformers: make(map[reflect.Type]bool),
		customResync:     make(map[reflect.Type]time.Duration),
	}

	// Apply all options
	for _, opt := range options {
		factory = opt(factory)
	}

	return factory
}

func (f *sharedInformerFactory) Start(stopCh <-chan struct{}) {
	f.lock.Lock()
	defer f.lock.Unlock()

	if f.shuttingDown {
		return
	}

	for informerType, informer := range f.informers {
		if !f.startedInformers[informerType] {
			f.wg.Add(1)
			// We need a new variable in each loop iteration,
			// otherwise the goroutine would use the loop variable
			// and that keeps changing.
			informer := informer
			go func() {
				defer f.wg.Done()
				informer.Run(stopCh)
			}()
			f.startedInformers[informerType] = true
		}
	}
}

func (f *sharedInformerFactory) Shutdown() {
	f.lock.Lock()
	f.shuttingDown = true
	f.lock.Unlock()

	// Will return immediately if there is nothing to wait for.
	f.wg.Wait()
}

func (f *sharedInformerFactory) WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool {
	informers := func() map[reflect.Type]cache.SharedIndexInformer {
		f.lock.Lock()
		defer f.lock.Unlock()

		informers := map[reflect.Type]cache.SharedIndexInformer{}
		for informerType, informer := range f.informers {
			if f.startedInformers[informerType] {
				informers[informerType] = informer
			}
		}
		return informers
	}()

	res := map[reflect.Type]bool{}
	for informType, informer := range informers {
		res[informType] = cache.WaitForCacheSync(stopCh, informer.HasSynced)
	}
	return res
}

// InformerFor returns the SharedIndexInformer for obj using an internal
// client.
func (f *sharedInformerFactory) InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer {
	f.lock.Lock()
	defer f.lock.Unlock()

	informerType := reflect.TypeOf(obj)
	informer, exists := f.informers[informerType]
	if exists {
		return informer
	}

	resyncPeriod, exists := f.customResync[informerType]
	if !exists {
		resyncPeriod = f.defaultResync
	}

	informer = newFunc(f.client, resyncPeriod)
	informer.SetTransform(f.transform)
	f.informers[informerType] = informer

	return informer
}

// SharedInformerFactory provides shared informers for resources in all known
// API group versions.
//
// It is typically used like this:
//
//	ctx, cancel := context.Background()
//	defer cancel()
//	factory := NewSharedInformerFactory(client, resyncPeriod)
//	defer factory.WaitForStop()    // Returns immediately if nothing was started.
//	genericInformer := factory.ForResource(resource)
//	typedInformer := factory.SomeAPIGroup().V1().SomeType()
//	factory.Start(ctx.Done())          // Start processing these informers.
//	synced := factory.WaitForCacheSync(ctx.Done())
//	for v, ok := range synced {
//	    if !ok {
//	        fmt.Fprintf(os.Stderr, "caches failed to sync: %v", v)
//	        return
//	    }
//	}
//
//	// Creating informers can also be created after Start, but then
//	// Start must be called again:
//	anotherGenericInformer := factory.ForResource(resource)
//	factory.Start(ctx.Done())
type SharedInformerFactory interface {
	internalinterfaces.SharedInformerFactory

	// Start initializes all requested informers. They are handled in goroutines
	// which run until the stop channel gets closed.
	// Warning: Start does not block. When run in a go-routine, it will race with a later WaitForCacheSync.
	Start(stopCh <-chan struct{})

	// Shutdown marks a factory as shutting down. At that point no new
	// informers can be started anymore and Start will return without
	// doing anything.
	//
	// In addition, Shutdown blocks until all goroutines have terminated. For that
	// to happen, the close channel(s) that they were started with must be closed,
	// either before Shutdown gets called or while it is waiting.
	//
	// Shutdown may be called multiple times, even concurrently. All such calls will
	// block until all goroutines have terminated.
	Shutdown()

	// WaitForCacheSync blocks until all started informers' caches were synced
	// or the stop channel gets closed.
	WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool

	// ForResource gives generic access to a shared informer of the matching type.
	ForResource(resource schema.GroupVersionResource) (GenericInformer, error)

	// InformerFor returns the SharedIndexInformer for obj using an internal
	// client.
	InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer

	Sandbox() sandbox.Interface
}

func (f *sharedInformerFactory) Sandbox() sandbox.Interface {
	return sandbox.New(f, f.namespace, f.tweakListOptions)
}


================================================
FILE: kubernetes/pkg/client/informers/externalversions/generic.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.

package externalversions

import (
	fmt "fmt"

	v1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	schema "k8s.io/apimachinery/pkg/runtime/schema"
	cache "k8s.io/client-go/tools/cache"
)

// GenericInformer is type of SharedIndexInformer which will locate and delegate to other
// sharedInformers based on type
type GenericInformer interface {
	Informer() cache.SharedIndexInformer
	Lister() cache.GenericLister
}

type genericInformer struct {
	informer cache.SharedIndexInformer
	resource schema.GroupResource
}

// Informer returns the SharedIndexInformer.
func (f *genericInformer) Informer() cache.SharedIndexInformer {
	return f.informer
}

// Lister returns the GenericLister.
func (f *genericInformer) Lister() cache.GenericLister {
	return cache.NewGenericLister(f.Informer().GetIndexer(), f.resource)
}

// ForResource gives generic access to a shared informer of the matching type
// TODO extend this to unknown resources with a client pool
func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource) (GenericInformer, error) {
	switch resource {
	// Group=sandbox.opensandbox.io, Version=v1alpha1
	case v1alpha1.SchemeGroupVersion.WithResource("batchsandboxes"):
		return &genericInformer{resource: resource.GroupResource(), informer: f.Sandbox().V1alpha1().BatchSandboxes().Informer()}, nil
	case v1alpha1.SchemeGroupVersion.WithResource("pools"):
		return &genericInformer{resource: resource.GroupResource(), informer: f.Sandbox().V1alpha1().Pools().Informer()}, nil

	}

	return nil, fmt.Errorf("no informer found for %v", resource)
}


================================================
FILE: kubernetes/pkg/client/informers/externalversions/internalinterfaces/factory_interfaces.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.

package internalinterfaces

import (
	time "time"

	versioned "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	runtime "k8s.io/apimachinery/pkg/runtime"
	cache "k8s.io/client-go/tools/cache"
)

// NewInformerFunc takes versioned.Interface and time.Duration to return a SharedIndexInformer.
type NewInformerFunc func(versioned.Interface, time.Duration) cache.SharedIndexInformer

// SharedInformerFactory a small interface to allow for adding an informer without an import cycle
type SharedInformerFactory interface {
	Start(stopCh <-chan struct{})
	InformerFor(obj runtime.Object, newFunc NewInformerFunc) cache.SharedIndexInformer
}

// TweakListOptionsFunc is a function that transforms a v1.ListOptions.
type TweakListOptionsFunc func(*v1.ListOptions)


================================================
FILE: kubernetes/pkg/client/informers/externalversions/sandbox/interface.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.

package sandbox

import (
	internalinterfaces "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/informers/externalversions/internalinterfaces"
	v1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/informers/externalversions/sandbox/v1alpha1"
)

// Interface provides access to each of this group's versions.
type Interface interface {
	// V1alpha1 provides access to shared informers for resources in V1alpha1.
	V1alpha1() v1alpha1.Interface
}

type group struct {
	factory          internalinterfaces.SharedInformerFactory
	namespace        string
	tweakListOptions internalinterfaces.TweakListOptionsFunc
}

// New returns a new Interface.
func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface {
	return &group{factory: f, namespace: namespace, tweakListOptions: tweakListOptions}
}

// V1alpha1 returns a new v1alpha1.Interface.
func (g *group) V1alpha1() v1alpha1.Interface {
	return v1alpha1.New(g.factory, g.namespace, g.tweakListOptions)
}


================================================
FILE: kubernetes/pkg/client/informers/externalversions/sandbox/v1alpha1/batchsandbox.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.

package v1alpha1

import (
	context "context"
	time "time"

	apissandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	versioned "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned"
	internalinterfaces "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/informers/externalversions/internalinterfaces"
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/listers/sandbox/v1alpha1"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	runtime "k8s.io/apimachinery/pkg/runtime"
	watch "k8s.io/apimachinery/pkg/watch"
	cache "k8s.io/client-go/tools/cache"
)

// BatchSandboxInformer provides access to a shared informer and lister for
// BatchSandboxes.
type BatchSandboxInformer interface {
	Informer() cache.SharedIndexInformer
	Lister() sandboxv1alpha1.BatchSandboxLister
}

type batchSandboxInformer struct {
	factory          internalinterfaces.SharedInformerFactory
	tweakListOptions internalinterfaces.TweakListOptionsFunc
	namespace        string
}

// NewBatchSandboxInformer constructs a new informer for BatchSandbox type.
// Always prefer using an informer factory to get a shared informer instead of getting an independent
// one. This reduces memory footprint and number of connections to the server.
func NewBatchSandboxInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer {
	return NewFilteredBatchSandboxInformer(client, namespace, resyncPeriod, indexers, nil)
}

// NewFilteredBatchSandboxInformer constructs a new informer for BatchSandbox type.
// Always prefer using an informer factory to get a shared informer instead of getting an independent
// one. This reduces memory footprint and number of connections to the server.
func NewFilteredBatchSandboxInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer {
	return cache.NewSharedIndexInformer(
		&cache.ListWatch{
			ListFunc: func(options v1.ListOptions) (runtime.Object, error) {
				if tweakListOptions != nil {
					tweakListOptions(&options)
				}
				return client.SandboxV1alpha1().BatchSandboxes(namespace).List(context.Background(), options)
			},
			WatchFunc: func(options v1.ListOptions) (watch.Interface, error) {
				if tweakListOptions != nil {
					tweakListOptions(&options)
				}
				return client.SandboxV1alpha1().BatchSandboxes(namespace).Watch(context.Background(), options)
			},
			ListWithContextFunc: func(ctx context.Context, options v1.ListOptions) (runtime.Object, error) {
				if tweakListOptions != nil {
					tweakListOptions(&options)
				}
				return client.SandboxV1alpha1().BatchSandboxes(namespace).List(ctx, options)
			},
			WatchFuncWithContext: func(ctx context.Context, options v1.ListOptions) (watch.Interface, error) {
				if tweakListOptions != nil {
					tweakListOptions(&options)
				}
				return client.SandboxV1alpha1().BatchSandboxes(namespace).Watch(ctx, options)
			},
		},
		&apissandboxv1alpha1.BatchSandbox{},
		resyncPeriod,
		indexers,
	)
}

func (f *batchSandboxInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer {
	return NewFilteredBatchSandboxInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions)
}

func (f *batchSandboxInformer) Informer() cache.SharedIndexInformer {
	return f.factory.InformerFor(&apissandboxv1alpha1.BatchSandbox{}, f.defaultInformer)
}

func (f *batchSandboxInformer) Lister() sandboxv1alpha1.BatchSandboxLister {
	return sandboxv1alpha1.NewBatchSandboxLister(f.Informer().GetIndexer())
}


================================================
FILE: kubernetes/pkg/client/informers/externalversions/sandbox/v1alpha1/interface.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.

package v1alpha1

import (
	internalinterfaces "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/informers/externalversions/internalinterfaces"
)

// Interface provides access to all the informers in this group version.
type Interface interface {
	// BatchSandboxes returns a BatchSandboxInformer.
	BatchSandboxes() BatchSandboxInformer
	// Pools returns a PoolInformer.
	Pools() PoolInformer
}

type version struct {
	factory          internalinterfaces.SharedInformerFactory
	namespace        string
	tweakListOptions internalinterfaces.TweakListOptionsFunc
}

// New returns a new Interface.
func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface {
	return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions}
}

// BatchSandboxes returns a BatchSandboxInformer.
func (v *version) BatchSandboxes() BatchSandboxInformer {
	return &batchSandboxInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions}
}

// Pools returns a PoolInformer.
func (v *version) Pools() PoolInformer {
	return &poolInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions}
}


================================================
FILE: kubernetes/pkg/client/informers/externalversions/sandbox/v1alpha1/pool.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.

package v1alpha1

import (
	context "context"
	time "time"

	apissandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	versioned "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/clientset/versioned"
	internalinterfaces "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/informers/externalversions/internalinterfaces"
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/client/listers/sandbox/v1alpha1"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	runtime "k8s.io/apimachinery/pkg/runtime"
	watch "k8s.io/apimachinery/pkg/watch"
	cache "k8s.io/client-go/tools/cache"
)

// PoolInformer provides access to a shared informer and lister for
// Pools.
type PoolInformer interface {
	Informer() cache.SharedIndexInformer
	Lister() sandboxv1alpha1.PoolLister
}

type poolInformer struct {
	factory          internalinterfaces.SharedInformerFactory
	tweakListOptions internalinterfaces.TweakListOptionsFunc
	namespace        string
}

// NewPoolInformer constructs a new informer for Pool type.
// Always prefer using an informer factory to get a shared informer instead of getting an independent
// one. This reduces memory footprint and number of connections to the server.
func NewPoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer {
	return NewFilteredPoolInformer(client, namespace, resyncPeriod, indexers, nil)
}

// NewFilteredPoolInformer constructs a new informer for Pool type.
// Always prefer using an informer factory to get a shared informer instead of getting an independent
// one. This reduces memory footprint and number of connections to the server.
func NewFilteredPoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer {
	return cache.NewSharedIndexInformer(
		&cache.ListWatch{
			ListFunc: func(options v1.ListOptions) (runtime.Object, error) {
				if tweakListOptions != nil {
					tweakListOptions(&options)
				}
				return client.SandboxV1alpha1().Pools(namespace).List(context.Background(), options)
			},
			WatchFunc: func(options v1.ListOptions) (watch.Interface, error) {
				if tweakListOptions != nil {
					tweakListOptions(&options)
				}
				return client.SandboxV1alpha1().Pools(namespace).Watch(context.Background(), options)
			},
			ListWithContextFunc: func(ctx context.Context, options v1.ListOptions) (runtime.Object, error) {
				if tweakListOptions != nil {
					tweakListOptions(&options)
				}
				return client.SandboxV1alpha1().Pools(namespace).List(ctx, options)
			},
			WatchFuncWithContext: func(ctx context.Context, options v1.ListOptions) (watch.Interface, error) {
				if tweakListOptions != nil {
					tweakListOptions(&options)
				}
				return client.SandboxV1alpha1().Pools(namespace).Watch(ctx, options)
			},
		},
		&apissandboxv1alpha1.Pool{},
		resyncPeriod,
		indexers,
	)
}

func (f *poolInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer {
	return NewFilteredPoolInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions)
}

func (f *poolInformer) Informer() cache.SharedIndexInformer {
	return f.factory.InformerFor(&apissandboxv1alpha1.Pool{}, f.defaultInformer)
}

func (f *poolInformer) Lister() sandboxv1alpha1.PoolLister {
	return sandboxv1alpha1.NewPoolLister(f.Informer().GetIndexer())
}


================================================
FILE: kubernetes/pkg/client/listers/sandbox/v1alpha1/batchsandbox.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by lister-gen. DO NOT EDIT.

package v1alpha1

import (
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	labels "k8s.io/apimachinery/pkg/labels"
	listers "k8s.io/client-go/listers"
	cache "k8s.io/client-go/tools/cache"
)

// BatchSandboxLister helps list BatchSandboxes.
// All objects returned here must be treated as read-only.
type BatchSandboxLister interface {
	// List lists all BatchSandboxes in the indexer.
	// Objects returned here must be treated as read-only.
	List(selector labels.Selector) (ret []*sandboxv1alpha1.BatchSandbox, err error)
	// BatchSandboxes returns an object that can list and get BatchSandboxes.
	BatchSandboxes(namespace string) BatchSandboxNamespaceLister
	BatchSandboxListerExpansion
}

// batchSandboxLister implements the BatchSandboxLister interface.
type batchSandboxLister struct {
	listers.ResourceIndexer[*sandboxv1alpha1.BatchSandbox]
}

// NewBatchSandboxLister returns a new BatchSandboxLister.
func NewBatchSandboxLister(indexer cache.Indexer) BatchSandboxLister {
	return &batchSandboxLister{listers.New[*sandboxv1alpha1.BatchSandbox](indexer, sandboxv1alpha1.Resource("batchsandbox"))}
}

// BatchSandboxes returns an object that can list and get BatchSandboxes.
func (s *batchSandboxLister) BatchSandboxes(namespace string) BatchSandboxNamespaceLister {
	return batchSandboxNamespaceLister{listers.NewNamespaced[*sandboxv1alpha1.BatchSandbox](s.ResourceIndexer, namespace)}
}

// BatchSandboxNamespaceLister helps list and get BatchSandboxes.
// All objects returned here must be treated as read-only.
type BatchSandboxNamespaceLister interface {
	// List lists all BatchSandboxes in the indexer for a given namespace.
	// Objects returned here must be treated as read-only.
	List(selector labels.Selector) (ret []*sandboxv1alpha1.BatchSandbox, err error)
	// Get retrieves the BatchSandbox from the indexer for a given namespace and name.
	// Objects returned here must be treated as read-only.
	Get(name string) (*sandboxv1alpha1.BatchSandbox, error)
	BatchSandboxNamespaceListerExpansion
}

// batchSandboxNamespaceLister implements the BatchSandboxNamespaceLister
// interface.
type batchSandboxNamespaceLister struct {
	listers.ResourceIndexer[*sandboxv1alpha1.BatchSandbox]
}


================================================
FILE: kubernetes/pkg/client/listers/sandbox/v1alpha1/expansion_generated.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by lister-gen. DO NOT EDIT.

package v1alpha1

// BatchSandboxListerExpansion allows custom methods to be added to
// BatchSandboxLister.
type BatchSandboxListerExpansion interface{}

// BatchSandboxNamespaceListerExpansion allows custom methods to be added to
// BatchSandboxNamespaceLister.
type BatchSandboxNamespaceListerExpansion interface{}

// PoolListerExpansion allows custom methods to be added to
// PoolLister.
type PoolListerExpansion interface{}

// PoolNamespaceListerExpansion allows custom methods to be added to
// PoolNamespaceLister.
type PoolNamespaceListerExpansion interface{}


================================================
FILE: kubernetes/pkg/client/listers/sandbox/v1alpha1/pool.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by lister-gen. DO NOT EDIT.

package v1alpha1

import (
	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	labels "k8s.io/apimachinery/pkg/labels"
	listers "k8s.io/client-go/listers"
	cache "k8s.io/client-go/tools/cache"
)

// PoolLister helps list Pools.
// All objects returned here must be treated as read-only.
type PoolLister interface {
	// List lists all Pools in the indexer.
	// Objects returned here must be treated as read-only.
	List(selector labels.Selector) (ret []*sandboxv1alpha1.Pool, err error)
	// Pools returns an object that can list and get Pools.
	Pools(namespace string) PoolNamespaceLister
	PoolListerExpansion
}

// poolLister implements the PoolLister interface.
type poolLister struct {
	listers.ResourceIndexer[*sandboxv1alpha1.Pool]
}

// NewPoolLister returns a new PoolLister.
func NewPoolLister(indexer cache.Indexer) PoolLister {
	return &poolLister{listers.New[*sandboxv1alpha1.Pool](indexer, sandboxv1alpha1.Resource("pool"))}
}

// Pools returns an object that can list and get Pools.
func (s *poolLister) Pools(namespace string) PoolNamespaceLister {
	return poolNamespaceLister{listers.NewNamespaced[*sandboxv1alpha1.Pool](s.ResourceIndexer, namespace)}
}

// PoolNamespaceLister helps list and get Pools.
// All objects returned here must be treated as read-only.
type PoolNamespaceLister interface {
	// List lists all Pools in the indexer for a given namespace.
	// Objects returned here must be treated as read-only.
	List(selector labels.Selector) (ret []*sandboxv1alpha1.Pool, err error)
	// Get retrieves the Pool from the indexer for a given namespace and name.
	// Objects returned here must be treated as read-only.
	Get(name string) (*sandboxv1alpha1.Pool, error)
	PoolNamespaceListerExpansion
}

// poolNamespaceLister implements the PoolNamespaceLister
// interface.
type poolNamespaceLister struct {
	listers.ResourceIndexer[*sandboxv1alpha1.Pool]
}


================================================
FILE: kubernetes/pkg/task-executor/client.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package task_executor

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"time"

	"k8s.io/klog/v2"
)

type Client struct {
	baseURL    string
	httpClient *http.Client
}

func NewClient(baseURL string) *Client {
	if baseURL == "" {
		klog.Warning("baseURL is empty, client may not work properly")
	}
	return &Client{
		baseURL: baseURL,
		httpClient: &http.Client{
			Timeout: 30 * time.Second,
		},
	}
}

// Set creates or updates a task on the remote server.
// If task is nil, it sends a delete request.
func (c *Client) Set(ctx context.Context, task *Task) (*Task, error) {
	if c == nil {
		return nil, fmt.Errorf("client is nil")
	}

	var req *http.Request
	var err error

	if task == nil {
		// Delete request - send nil to clear tasks
		req, err = http.NewRequestWithContext(ctx, "POST", c.baseURL+"/setTasks", bytes.NewReader([]byte("[]")))
		if err != nil {
			return nil, fmt.Errorf("failed to create request: %w", err)
		}
	} else {
		// Create/Update request
		data, err := json.Marshal([]Task{*task})
		if err != nil {
			return nil, fmt.Errorf("failed to marshal task: %w", err)
		}
		req, err = http.NewRequestWithContext(ctx, "POST", c.baseURL+"/setTasks", bytes.NewReader(data))
		if err != nil {
			return nil, fmt.Errorf("failed to create request: %w", err)
		}
	}

	req.Header.Set("Content-Type", "application/json")

	// Send request with retry
	var resp *http.Response
	resp, err = c.httpClient.Do(req)
	if err != nil {
		return nil, fmt.Errorf("network error after retries: %w", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		body, _ := io.ReadAll(resp.Body)
		return nil, fmt.Errorf("server error: status=%d, body=%s", resp.StatusCode, string(body))
	}

	// Parse response - expect array of tasks
	var tasks []Task
	if err := json.NewDecoder(resp.Body).Decode(&tasks); err != nil {
		return nil, fmt.Errorf("failed to decode response: %w", err)
	}

	if task != nil && len(tasks) > 0 {
		// Find the task we just set
		for i := range tasks {
			if tasks[i].Name == task.Name {
				return &tasks[i], nil
			}
		}
	}

	if task == nil {
		// Delete succeeded
		return nil, nil
	}

	return task, nil
}

// Get retrieves the current task list from the remote server.
func (c *Client) Get(ctx context.Context) (*Task, error) {
	if c == nil {
		return nil, fmt.Errorf("client is nil")
	}

	req, err := http.NewRequestWithContext(ctx, "GET", c.baseURL+"/getTasks", nil)
	if err != nil {
		return nil, fmt.Errorf("failed to create request: %w", err)
	}

	resp, err := c.httpClient.Do(req)
	if err != nil {
		return nil, fmt.Errorf("network error: %w", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		body, _ := io.ReadAll(resp.Body)
		return nil, fmt.Errorf("server error: status=%d, body=%s", resp.StatusCode, string(body))
	}

	// Parse response - expect array of tasks
	var tasks []Task
	if err := json.NewDecoder(resp.Body).Decode(&tasks); err != nil {
		return nil, fmt.Errorf("failed to decode response: %w", err)
	}

	// Return the first task (single task mode)
	if len(tasks) > 0 {
		return &tasks[0], nil
	}

	// No tasks
	return nil, nil
}


================================================
FILE: kubernetes/pkg/task-executor/types.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package task_executor

import (
	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// Task represents the internal local task resource (LocalTask)
// It follows the Kubernetes resource model with Metadata, Spec, and Status.
type Task struct {
	Name              string       `json:"name"`
	DeletionTimestamp *metav1.Time `json:"deletionTimestamp,omitempty"`

	Process         *Process                `json:"process,omitempty"`
	PodTemplateSpec *corev1.PodTemplateSpec `json:"podTemplateSpec,omitempty"`

	ProcessStatus *ProcessStatus    `json:"processStatus,omitempty"`
	PodStatus     *corev1.PodStatus `json:"podStatus,omitempty"`
}

type Process struct {
	// Command command
	Command []string `json:"command"`
	// Arguments to the entrypoint.
	Args []string `json:"args,omitempty"`
	// List of environment variables to set in the process.
	Env []corev1.EnvVar `json:"env,omitempty"`
	// WorkingDir process working directory.
	WorkingDir string `json:"workingDir,omitempty"`
	// TimeoutSeconds process timeout seconds.
	TimeoutSeconds *int64 `json:"timeoutSeconds,omitempty"`
}

// ProcessStatus holds a possible state of process.
// Only one of its members may be specified.
// If none of them is specified, the default one is Waiting.
type ProcessStatus struct {
	// Details about a waiting process
	// +optional
	Waiting *Waiting `json:"waiting,omitempty"`
	// Details about a running process
	// +optional
	Running *Running `json:"running,omitempty"`
	// Details about a terminated process
	// +optional
	Terminated *Terminated `json:"terminated,omitempty"`
}

// Waiting is a waiting state of a process.
type Waiting struct {
	// (brief) reason the process is not yet running.
	// +optional
	Reason string `json:"reason,omitempty"`
	// Message regarding why the process is not yet running.
	// +optional
	Message string `json:"message,omitempty"`
}

// Running is a running state of a process.
type Running struct {
	// Time at which the process was last (re-)started
	// +optional
	StartedAt metav1.Time `json:"startedAt"`
}

// Terminated is a terminated state of a process.
type Terminated struct {
	// Exit status from the last termination of the process
	ExitCode int32 `json:"exitCode"`
	// Signal from the last termination of the process
	// +optional
	Signal int32 `json:"signal,omitempty"`
	// (brief) reason from the last termination of the process
	// +optional
	Reason string `json:"reason,omitempty"`
	// Message regarding the last termination of the process
	// +optional
	Message string `json:"message,omitempty"`
	// Time at which previous execution of the process started
	// +optional
	StartedAt metav1.Time `json:"startedAt,omitempty"`
	// Time at which the process last terminated
	// +optional
	FinishedAt metav1.Time `json:"finishedAt,omitempty"`
}


================================================
FILE: kubernetes/pkg/utils/endpoints.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import (
	"encoding/json"
	"fmt"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
)

const (
	// AnnotationEndpoints is the annotation key for storing BatchSandbox endpoints
	AnnotationEndpoints = "sandbox.opensandbox.io/endpoints"
)

// GetEndpoints extracts endpoint IPs from BatchSandbox annotations
// Returns a slice of IP addresses parsed from the endpoints annotation
// The annotation format is a JSON array: ["10.244.1.5", "10.244.1.6"]
func GetEndpoints(bs *sandboxv1alpha1.BatchSandbox) ([]string, error) {
	if bs == nil {
		return nil, fmt.Errorf("BatchSandbox is nil")
	}

	if bs.Annotations == nil {
		return nil, fmt.Errorf("BatchSandbox has no annotations")
	}

	endpointsAnnotation := bs.Annotations[AnnotationEndpoints]
	if endpointsAnnotation == "" {
		return nil, fmt.Errorf("missing %s annotation", AnnotationEndpoints)
	}

	var endpoints []string
	if err := json.Unmarshal([]byte(endpointsAnnotation), &endpoints); err != nil {
		return nil, fmt.Errorf("failed to parse endpoints annotation: %w", err)
	}

	if len(endpoints) == 0 {
		return nil, fmt.Errorf("endpoints annotation contains no IPs")
	}

	return endpoints, nil
}


================================================
FILE: kubernetes/pkg/utils/endpoints_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import (
	"testing"

	sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func TestGetEndpoints(t *testing.T) {
	tests := []struct {
		name          string
		bs            *sandboxv1alpha1.BatchSandbox
		expectedIPs   []string
		expectedError string
	}{
		{
			name:          "nil BatchSandbox",
			bs:            nil,
			expectedIPs:   nil,
			expectedError: "BatchSandbox is nil",
		},
		{
			name: "no annotations",
			bs: &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Name:      "test-sandbox",
					Namespace: "default",
				},
			},
			expectedIPs:   nil,
			expectedError: "has no annotations",
		},
		{
			name: "missing endpoints annotation",
			bs: &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Name:      "test-sandbox",
					Namespace: "default",
					Annotations: map[string]string{
						"other-key": "other-value",
					},
				},
			},
			expectedIPs:   nil,
			expectedError: "missing sandbox.opensandbox.io/endpoints annotation",
		},
		{
			name: "invalid JSON annotation",
			bs: &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Name:      "test-sandbox",
					Namespace: "default",
					Annotations: map[string]string{
						AnnotationEndpoints: "invalid-json",
					},
				},
			},
			expectedIPs:   nil,
			expectedError: "failed to parse endpoints annotation",
		},
		{
			name: "empty endpoints array",
			bs: &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Name:      "test-sandbox",
					Namespace: "default",
					Annotations: map[string]string{
						AnnotationEndpoints: "[]",
					},
				},
			},
			expectedIPs:   nil,
			expectedError: "contains no IPs",
		},
		{
			name: "single endpoint",
			bs: &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Name:      "test-sandbox",
					Namespace: "default",
					Annotations: map[string]string{
						AnnotationEndpoints: `["10.244.1.5"]`,
					},
				},
			},
			expectedIPs:   []string{"10.244.1.5"},
			expectedError: "",
		},
		{
			name: "multiple endpoints",
			bs: &sandboxv1alpha1.BatchSandbox{
				ObjectMeta: metav1.ObjectMeta{
					Name:      "test-sandbox",
					Namespace: "default",
					Annotations: map[string]string{
						AnnotationEndpoints: `["10.244.1.5", "10.244.1.6", "10.244.1.7"]`,
					},
				},
			},
			expectedIPs:   []string{"10.244.1.5", "10.244.1.6", "10.244.1.7"},
			expectedError: "",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			ips, err := GetEndpoints(tt.bs)

			if tt.expectedError != "" {
				if err == nil {
					t.Errorf("expected error containing %q, got nil", tt.expectedError)
					return
				}
				if err.Error() == "" || !contains(err.Error(), tt.expectedError) {
					t.Errorf("expected error containing %q, got %q", tt.expectedError, err.Error())
				}
				return
			}

			if err != nil {
				t.Errorf("unexpected error: %v", err)
				return
			}

			if len(ips) != len(tt.expectedIPs) {
				t.Errorf("expected %d IPs, got %d", len(tt.expectedIPs), len(ips))
				return
			}

			for i, ip := range ips {
				if ip != tt.expectedIPs[i] {
					t.Errorf("expected IP[%d]=%s, got %s", i, tt.expectedIPs[i], ip)
				}
			}
		})
	}
}

func contains(s, substr string) bool {
	return len(s) >= len(substr) && (s == substr || len(s) > len(substr) &&
		(s[:len(substr)] == substr || s[len(s)-len(substr):] == substr ||
			len(s) > len(substr) && findSubstr(s, substr)))
}

func findSubstr(s, substr string) bool {
	for i := 0; i <= len(s)-len(substr); i++ {
		if s[i:i+len(substr)] == substr {
			return true
		}
	}
	return false
}


================================================
FILE: kubernetes/test/e2e/e2e_suite_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package e2e

import (
	"fmt"
	"os"
	"os/exec"
	"testing"

	. "github.com/onsi/ginkgo/v2"
	. "github.com/onsi/gomega"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/test/utils"
)

// TestE2E runs the end-to-end (e2e) test suite for the project. These tests execute in an isolated,
// temporary environment to validate project changes with the purposed to be used in CI jobs.
// The default setup requires Kind, builds/loads the Manager Docker image locally.
func TestE2E(t *testing.T) {
	RegisterFailHandler(Fail)
	_, _ = fmt.Fprintf(GinkgoWriter, "Starting sandbox-k8s integration test suite\n")
	RunSpecs(t, "e2e suite")
}

var _ = BeforeSuite(func() {
	dockerBuildArgs := os.Getenv("DOCKER_BUILD_ARGS")

	By("building the manager(Operator) image")
	makeArgs := []string{"docker-build", fmt.Sprintf("CONTROLLER_IMG=%s", utils.ControllerImage)}
	if dockerBuildArgs != "" {
		makeArgs = append(makeArgs, fmt.Sprintf("DOCKER_BUILD_ARGS=%s", dockerBuildArgs))
	}
	cmd := exec.Command("make", makeArgs...)
	_, err := utils.Run(cmd)
	ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to build the manager(Operator) image")

	By("building the task-executor image")
	makeArgs = []string{"docker-build-task-executor", fmt.Sprintf("TASK_EXECUTOR_IMG=%s", utils.TaskExecutorImage)}
	if dockerBuildArgs != "" {
		makeArgs = append(makeArgs, fmt.Sprintf("DOCKER_BUILD_ARGS=%s", dockerBuildArgs))
	}
	cmd = exec.Command("make", makeArgs...)
	_, err = utils.Run(cmd)
	ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to build the task-executor image")

	// If you want to change the e2e test vendor from Kind, ensure the image is
	// built and available before running the tests. Also, remove the following block.
	By("loading the manager(Operator) image on Kind")
	err = utils.LoadImageToKindClusterWithName(utils.ControllerImage)
	ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load the manager(Operator) image into Kind")

	By("loading the task-executor image on Kind")
	err = utils.LoadImageToKindClusterWithName(utils.TaskExecutorImage)
	ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load the task-executor image into Kind")
})

var _ = AfterSuite(func() {
})


================================================
FILE: kubernetes/test/e2e/e2e_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package e2e

import (
	"bytes"
	"encoding/json"
	"fmt"
	"os"
	"os/exec"
	"path/filepath"
	"strings"
	"text/template"
	"time"

	. "github.com/onsi/ginkgo/v2"
	. "github.com/onsi/gomega"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/test/utils"
)

// namespace where the project is deployed in
const namespace = "opensandbox-system"

var _ = Describe("Manager", Ordered, func() {
	var controllerPodName string

	// Before running the tests, set up the environment by creating the namespace,
	// enforce the restricted security policy to the namespace, installing CRDs,
	// and deploying the controller.
	BeforeAll(func() {
		By("creating manager namespace")
		cmd := exec.Command("kubectl", "create", "ns", namespace)
		_, err := utils.Run(cmd)
		Expect(err).NotTo(HaveOccurred(), "Failed to create namespace")

		By("labeling the namespace to enforce the restricted security policy")
		cmd = exec.Command("kubectl", "label", "--overwrite", "ns", namespace,
			"pod-security.kubernetes.io/enforce=restricted")
		_, err = utils.Run(cmd)
		Expect(err).NotTo(HaveOccurred(), "Failed to label namespace with restricted policy")

		By("installing CRDs")
		cmd = exec.Command("make", "install")
		_, err = utils.Run(cmd)
		Expect(err).NotTo(HaveOccurred(), "Failed to install CRDs")

		By("deploying the controller-manager")
		cmd = exec.Command("make", "deploy", fmt.Sprintf("CONTROLLER_IMG=%s", utils.ControllerImage))
		_, err = utils.Run(cmd)
		Expect(err).NotTo(HaveOccurred(), "Failed to deploy the controller-manager")
	})

	// After all tests have been executed, clean up by undeploying the controller, uninstalling CRDs,
	// and deleting the namespace.
	AfterAll(func() {
		By("cleaning up the curl pod for metrics")
		cmd := exec.Command("kubectl", "delete", "pod", "curl-metrics", "-n", namespace)
		_, _ = utils.Run(cmd)

		By("undeploying the controller-manager")
		cmd = exec.Command("make", "undeploy")
		_, _ = utils.Run(cmd)

		By("uninstalling CRDs")
		cmd = exec.Command("make", "uninstall")
		_, _ = utils.Run(cmd)

		By("removing manager namespace")
		cmd = exec.Command("kubectl", "delete", "ns", namespace)
		_, _ = utils.Run(cmd)
	})

	// After each test, check for failures and collect logs, events,
	// and pod descriptions for debugging.
	AfterEach(func() {
		specReport := CurrentSpecReport()
		if specReport.Failed() {
			By("Fetching controller manager pod logs")
			cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace)
			controllerLogs, err := utils.Run(cmd)
			if err == nil {
				_, _ = fmt.Fprintf(GinkgoWriter, "Controller logs:\n %s", controllerLogs)
			} else {
				_, _ = fmt.Fprintf(GinkgoWriter, "Failed to get Controller logs: %s", err)
			}

			By("Fetching Kubernetes events")
			cmd = exec.Command("kubectl", "get", "events", "-n", namespace, "--sort-by=.lastTimestamp")
			eventsOutput, err := utils.Run(cmd)
			if err == nil {
				_, _ = fmt.Fprintf(GinkgoWriter, "Kubernetes events:\n%s", eventsOutput)
			} else {
				_, _ = fmt.Fprintf(GinkgoWriter, "Failed to get Kubernetes events: %s", err)
			}

			By("Fetching curl-metrics logs")
			cmd = exec.Command("kubectl", "logs", "curl-metrics", "-n", namespace)
			metricsOutput, err := utils.Run(cmd)
			if err == nil {
				_, _ = fmt.Fprintf(GinkgoWriter, "Metrics logs:\n %s", metricsOutput)
			} else {
				_, _ = fmt.Fprintf(GinkgoWriter, "Failed to get curl-metrics logs: %s", err)
			}

			By("Fetching controller manager pod description")
			cmd = exec.Command("kubectl", "describe", "pod", controllerPodName, "-n", namespace)
			podDescription, err := utils.Run(cmd)
			if err == nil {
				fmt.Println("Pod description:\n", podDescription)
			} else {
				fmt.Println("Failed to describe controller pod")
			}
		}
	})

	SetDefaultEventuallyTimeout(2 * time.Minute)
	SetDefaultEventuallyPollingInterval(time.Second)

	Context("Manager", func() {
		It("should run successfully", func() {
			By("validating that the controller-manager pod is running as expected")
			verifyControllerUp := func(g Gomega) {
				// Get the name of the controller-manager pod
				goTemplate := `{{ range .items }}` +
					`{{ if not .metadata.deletionTimestamp }}` +
					`{{ .metadata.name }}` +
					`{{ "\n" }}{{ end }}{{ end }}`
				cmd := exec.Command("kubectl", "get",
					"pods", "-l", "control-plane=controller-manager",
					"-o", "go-template="+goTemplate,
					"-n", namespace,
				)

				podOutput, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred(), "Failed to retrieve controller-manager pod information")
				podNames := utils.GetNonEmptyLines(podOutput)
				g.Expect(podNames).To(HaveLen(1), "expected 1 controller pod running")
				controllerPodName = podNames[0]
				g.Expect(controllerPodName).To(ContainSubstring("controller-manager"))

				// Validate the pod's status
				cmd = exec.Command("kubectl", "get",
					"pods", controllerPodName, "-o", "jsonpath={.status.phase}",
					"-n", namespace,
				)
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal("Running"), "Incorrect controller-manager pod status")
			}
			Eventually(verifyControllerUp).Should(Succeed())
		})
	})

	Context("Pool", func() {
		BeforeAll(func() {
			By("waiting for controller to be ready")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pods", "-l", "control-plane=controller-manager",
					"-n", namespace, "-o", "jsonpath={.items[0].status.phase}")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal("Running"))
			}, 2*time.Minute).Should(Succeed())
		})

		It("should correctly create pods and maintain pool status", func() {
			const poolName = "test-pool-basic"
			const testNamespace = "default"
			const poolMin = 2
			const poolMax = 5
			const bufferMin = 1
			const bufferMax = 3

			By("creating a basic Pool")
			poolYAML, err := renderTemplate("testdata/pool-basic.yaml", map[string]interface{}{
				"PoolName":     poolName,
				"SandboxImage": utils.SandboxImage,
				"Namespace":    testNamespace,
				"BufferMax":    bufferMax,
				"BufferMin":    bufferMin,
				"PoolMax":      poolMax,
				"PoolMin":      poolMin,
			})
			Expect(err).NotTo(HaveOccurred())

			poolFile := filepath.Join("/tmp", "test-pool-basic.yaml")
			err = os.WriteFile(poolFile, []byte(poolYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(poolFile)

			cmd := exec.Command("kubectl", "apply", "-f", poolFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred(), "Failed to create Pool")

			By("verifying Pool creates pods and maintains correct status")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status}")
				statusOutput, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())

				g.Expect(statusOutput).To(ContainSubstring(`"total":`), "Pool status should have total field")
				g.Expect(statusOutput).To(ContainSubstring(`"allocated":`), "Pool status should have allocated field")
				g.Expect(statusOutput).To(ContainSubstring(`"available":`), "Pool status should have available field")

				cmd = exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status.total}")
				totalStr, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				total := 0
				if totalStr != "" {
					fmt.Sscanf(totalStr, "%d", &total)
				}
				g.Expect(total).To(BeNumerically(">=", poolMin), "Pool total should be >= poolMin")
				g.Expect(total).To(BeNumerically("<=", poolMax), "Pool total should be <= poolMax")
			}, 2*time.Minute).Should(Succeed())

			By("verifying pods are created")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pods", "-n", testNamespace,
					"-l", fmt.Sprintf("sandbox.opensandbox.io/pool-name=%s", poolName),
					"-o", "jsonpath={.items[*].metadata.name}")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).NotTo(BeEmpty(), "Pool should create pods")
			}, 2*time.Minute).Should(Succeed())

			By("cleaning up the Pool")
			cmd = exec.Command("kubectl", "delete", "pool", poolName, "-n", testNamespace)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())
		})

		It("should correctly manage capacity when poolMin and poolMax change", func() {
			const poolName = "test-pool-capacity"
			const testNamespace = "default"

			By("creating a Pool with initial capacity")
			poolYAML, err := renderTemplate("testdata/pool-basic.yaml", map[string]interface{}{
				"PoolName":     poolName,
				"SandboxImage": utils.SandboxImage,
				"Namespace":    testNamespace,
				"BufferMax":    3,
				"BufferMin":    1,
				"PoolMax":      5,
				"PoolMin":      2,
			})
			Expect(err).NotTo(HaveOccurred())

			poolFile := filepath.Join("/tmp", "test-pool-capacity.yaml")
			err = os.WriteFile(poolFile, []byte(poolYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(poolFile)

			cmd := exec.Command("kubectl", "apply", "-f", poolFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("waiting for initial Pool to be ready")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status.total}")
				totalStr, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				total := 0
				if totalStr != "" {
					fmt.Sscanf(totalStr, "%d", &total)
				}
				g.Expect(total).To(BeNumerically(">=", 2))
			}, 2*time.Minute).Should(Succeed())

			By("increasing poolMin to trigger scale up")
			poolYAML, err = renderTemplate("testdata/pool-basic.yaml", map[string]interface{}{
				"PoolName":     poolName,
				"SandboxImage": utils.SandboxImage,
				"Namespace":    testNamespace,
				"BufferMax":    3,
				"BufferMin":    1,
				"PoolMax":      10,
				"PoolMin":      5,
			})
			Expect(err).NotTo(HaveOccurred())

			err = os.WriteFile(poolFile, []byte(poolYAML), 0644)
			Expect(err).NotTo(HaveOccurred())

			cmd = exec.Command("kubectl", "apply", "-f", poolFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("verifying Pool scales up to meet new poolMin")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status.total}")
				totalStr, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				total := 0
				if totalStr != "" {
					fmt.Sscanf(totalStr, "%d", &total)
				}
				g.Expect(total).To(BeNumerically(">=", 5), "Pool should scale up to meet poolMin=5")
				g.Expect(total).To(BeNumerically("<=", 10), "Pool should not exceed poolMax=10")
			}, 2*time.Minute).Should(Succeed())

			By("decreasing poolMax to below current total")
			poolYAML, err = renderTemplate("testdata/pool-basic.yaml", map[string]interface{}{
				"PoolName":     poolName,
				"SandboxImage": utils.SandboxImage,
				"Namespace":    testNamespace,
				"BufferMax":    2,
				"BufferMin":    1,
				"PoolMax":      3,
				"PoolMin":      2,
			})
			Expect(err).NotTo(HaveOccurred())

			err = os.WriteFile(poolFile, []byte(poolYAML), 0644)
			Expect(err).NotTo(HaveOccurred())

			cmd = exec.Command("kubectl", "apply", "-f", poolFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("verifying Pool respects new poolMax constraint")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status.total}")
				totalStr, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				total := 0
				if totalStr != "" {
					fmt.Sscanf(totalStr, "%d", &total)
				}
				g.Expect(total).To(BeNumerically("<=", 3), "Pool should scale down to meet poolMax=3")
			}, 2*time.Minute).Should(Succeed())

			By("cleaning up the Pool")
			cmd = exec.Command("kubectl", "delete", "pool", poolName, "-n", testNamespace)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())
		})

		It("should upgrade pool template correctly", func() {
			const poolName = "test-pool-upgrade"
			const testNamespace = "default"
			const batchSandboxName = "test-bs-for-upgrade"

			By("creating a Pool with initial template")
			poolYAML, err := renderTemplate("testdata/pool-basic.yaml", map[string]interface{}{
				"PoolName":     poolName,
				"SandboxImage": utils.SandboxImage,
				"Namespace":    testNamespace,
				"BufferMax":    3,
				"BufferMin":    2,
				"PoolMax":      5,
				"PoolMin":      2,
			})
			Expect(err).NotTo(HaveOccurred())

			poolFile := filepath.Join("/tmp", "test-pool-upgrade.yaml")
			err = os.WriteFile(poolFile, []byte(poolYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(poolFile)

			cmd := exec.Command("kubectl", "apply", "-f", poolFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("waiting for Pool to be ready")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status.total}")
				totalStr, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(totalStr).NotTo(BeEmpty())
			}, 2*time.Minute).Should(Succeed())

			By("allocating a pod from the pool via BatchSandbox")
			batchSandboxYAML, err := renderTemplate("testdata/batchsandbox-pooled-no-expire.yaml", map[string]interface{}{
				"BatchSandboxName": batchSandboxName,
				"Namespace":        testNamespace,
				"Replicas":         1,
				"PoolName":         poolName,
			})
			Expect(err).NotTo(HaveOccurred())

			bsFile := filepath.Join("/tmp", "test-bs-upgrade.yaml")
			err = os.WriteFile(bsFile, []byte(batchSandboxYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(bsFile)

			cmd = exec.Command("kubectl", "apply", "-f", bsFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("waiting for BatchSandbox to allocate pod")
			var allocatedPodNames []string
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.status.allocated}")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal("1"))

				cmd = exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.metadata.annotations.sandbox\\.opensandbox\\.io/alloc-status}")
				allocStatusJSON, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(allocStatusJSON).NotTo(BeEmpty(), "alloc-status annotation should exist")

				var allocStatus struct {
					Pods []string `json:"pods"`
				}
				err = json.Unmarshal([]byte(allocStatusJSON), &allocStatus)
				g.Expect(err).NotTo(HaveOccurred())

				allocatedPodNames = allocStatus.Pods
				g.Expect(len(allocatedPodNames)).To(Equal(1), "Should have 1 allocated pod")
			}, 2*time.Minute).Should(Succeed())

			By("getting all pool pods")
			cmd = exec.Command("kubectl", "get", "pods", "-n", testNamespace,
				"-l", fmt.Sprintf("sandbox.opensandbox.io/pool-name=%s", poolName),
				"-o", "jsonpath={.items[*].metadata.name}")
			allPoolPodsStr, err := utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())
			allPoolPods := strings.Fields(allPoolPodsStr)

			By("calculating available pods (all pool pods - allocated pods)")
			availablePodsBeforeUpgrade := []string{}
			allocatedPodMap := make(map[string]bool)
			for _, podName := range allocatedPodNames {
				allocatedPodMap[podName] = true
			}
			for _, podName := range allPoolPods {
				if !allocatedPodMap[podName] {
					availablePodsBeforeUpgrade = append(availablePodsBeforeUpgrade, podName)
				}
			}

			By("updating Pool template with new environment variable")
			updatedPoolYAML, err := renderTemplate("testdata/pool-with-env.yaml", map[string]interface{}{
				"PoolName":     poolName,
				"Namespace":    testNamespace,
				"SandboxImage": utils.SandboxImage,
				"BufferMax":    3,
				"BufferMin":    2,
				"PoolMax":      5,
				"PoolMin":      2,
			})
			Expect(err).NotTo(HaveOccurred())

			err = os.WriteFile(poolFile, []byte(updatedPoolYAML), 0644)
			Expect(err).NotTo(HaveOccurred())

			cmd = exec.Command("kubectl", "apply", "-f", poolFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("verifying allocated pod is NOT upgraded")
			Consistently(func(g Gomega) {
				for _, allocatedPod := range allocatedPodNames {
					cmd := exec.Command("kubectl", "get", "pod", allocatedPod, "-n", testNamespace,
						"-o", "jsonpath={.metadata.name}")
					output, err := utils.Run(cmd)
					g.Expect(err).NotTo(HaveOccurred())
					g.Expect(output).To(Equal(allocatedPod), "Allocated pod should not be recreated")
				}
			}, 30*time.Second, 3*time.Second).Should(Succeed())

			By("verifying available pods are recreated with new template")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pods", "-n", testNamespace,
					"-l", fmt.Sprintf("sandbox.opensandbox.io/pool-name=%s", poolName),
					"-o", "jsonpath={.items[*].metadata.name}")
				allPodsAfterStr, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				allPodsAfter := strings.Fields(allPodsAfterStr)

				// Get currently allocated pods
				cmd = exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.metadata.annotations.sandbox\\.opensandbox\\.io/alloc-status}")
				allocStatusJSON, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())

				var allocStatus struct {
					Pods []string `json:"pods"`
				}
				err = json.Unmarshal([]byte(allocStatusJSON), &allocStatus)
				g.Expect(err).NotTo(HaveOccurred())

				currentAllocatedPods := make(map[string]bool)
				for _, podName := range allocStatus.Pods {
					currentAllocatedPods[podName] = true
				}

				// Calculate available pods after upgrade
				availablePodsAfterUpgrade := []string{}
				for _, podName := range allPodsAfter {
					if !currentAllocatedPods[podName] {
						availablePodsAfterUpgrade = append(availablePodsAfterUpgrade, podName)
					}
				}

				// Check if at least one available pod was recreated
				recreated := false
				for _, oldPod := range availablePodsBeforeUpgrade {
					found := false
					for _, newPod := range availablePodsAfterUpgrade {
						if oldPod == newPod {
							found = true
							break
						}
					}
					if !found {
						recreated = true
						break
					}
				}
				g.Expect(recreated).To(BeTrue(), "At least one available pod should be recreated")
			}, 3*time.Minute).Should(Succeed())

			By("verifying new pods have the upgraded environment variable")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pods", "-n", testNamespace,
					"-l", fmt.Sprintf("sandbox.opensandbox.io/pool-name=%s", poolName),
					"-o", "json")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())

				var podList struct {
					Items []struct {
						Metadata struct {
							Name string `json:"name"`
						} `json:"metadata"`
						Spec struct {
							Containers []struct {
								Name string `json:"name"`
								Env  []struct {
									Name  string `json:"name"`
									Value string `json:"value"`
								} `json:"env"`
							} `json:"containers"`
						} `json:"spec"`
					} `json:"items"`
				}
				err = json.Unmarshal([]byte(output), &podList)
				g.Expect(err).NotTo(HaveOccurred())

				// Get currently allocated pods
				cmd = exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.metadata.annotations.sandbox\\.opensandbox\\.io/alloc-status}")
				allocStatusJSON, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())

				var allocStatus struct {
					Pods []string `json:"pods"`
				}
				err = json.Unmarshal([]byte(allocStatusJSON), &allocStatus)
				g.Expect(err).NotTo(HaveOccurred())

				allocatedPodMap := make(map[string]bool)
				for _, podName := range allocStatus.Pods {
					allocatedPodMap[podName] = true
				}

				// Find at least one available pod with UPGRADED=true
				foundUpgraded := false
				for _, pod := range podList.Items {
					if !allocatedPodMap[pod.Metadata.Name] {
						// This is an available pod
						for _, container := range pod.Spec.Containers {
							if container.Name == "sandbox-container" {
								for _, env := range container.Env {
									if env.Name == "UPGRADED" && env.Value == "true" {
										foundUpgraded = true
										break
									}
								}
							}
						}
					}
				}
				g.Expect(foundUpgraded).To(BeTrue(), "At least one available pod should have UPGRADED=true env var")
			}, 2*time.Minute).Should(Succeed())

			By("cleaning up BatchSandbox and Pool")
			cmd = exec.Command("kubectl", "delete", "batchsandbox", batchSandboxName, "-n", testNamespace)
			_, _ = utils.Run(cmd)

			cmd = exec.Command("kubectl", "delete", "pool", poolName, "-n", testNamespace)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())
		})
	})

	Context("BatchSandbox", func() {
		BeforeAll(func() {
			By("waiting for controller to be ready")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pods", "-l", "control-plane=controller-manager",
					"-n", namespace, "-o", "jsonpath={.items[0].status.phase}")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal("Running"))
			}, 2*time.Minute).Should(Succeed())
		})

		It("should work correctly in non-pooled mode", func() {
			const batchSandboxName = "test-bs-non-pooled"
			const testNamespace = "default"
			const replicas = 2

			By("creating a non-pooled BatchSandbox")
			bsYAML, err := renderTemplate("testdata/batchsandbox-non-pooled.yaml", map[string]interface{}{
				"BatchSandboxName": batchSandboxName,
				"SandboxImage":     utils.SandboxImage,
				"Namespace":        testNamespace,
				"Replicas":         replicas,
			})
			Expect(err).NotTo(HaveOccurred())

			bsFile := filepath.Join("/tmp", "test-bs-non-pooled.yaml")
			err = os.WriteFile(bsFile, []byte(bsYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(bsFile)

			cmd := exec.Command("kubectl", "apply", "-f", bsFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("verifying pods are created directly from template")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pods", "-n", testNamespace,
					"-o", "json")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())

				var podList struct {
					Items []struct {
						Metadata struct {
							Name            string `json:"name"`
							OwnerReferences []struct {
								Kind string `json:"kind"`
								Name string `json:"name"`
								UID  string `json:"uid"`
							} `json:"ownerReferences"`
						} `json:"metadata"`
					} `json:"items"`
				}
				err = json.Unmarshal([]byte(output), &podList)
				g.Expect(err).NotTo(HaveOccurred())

				// Find pods owned by this BatchSandbox
				ownedPods := []string{}
				for _, pod := range podList.Items {
					for _, owner := range pod.Metadata.OwnerReferences {
						if owner.Kind == "BatchSandbox" && owner.Name == batchSandboxName {
							ownedPods = append(ownedPods, pod.Metadata.Name)
							break
						}
					}
				}
				g.Expect(len(ownedPods)).To(Equal(replicas), "Should create %d pods", replicas)
			}, 2*time.Minute).Should(Succeed())

			By("verifying BatchSandbox status is correctly updated")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.status}")
				statusOutput, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(statusOutput).To(ContainSubstring(fmt.Sprintf(`"replicas":%d`, replicas)))
				g.Expect(statusOutput).To(ContainSubstring(fmt.Sprintf(`"allocated":%d`, replicas)))
				g.Expect(statusOutput).To(ContainSubstring(fmt.Sprintf(`"ready":%d`, replicas)))
			}, 2*time.Minute).Should(Succeed())

			By("verifying endpoint annotation is set")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.metadata.annotations.sandbox\\.opensandbox\\.io/endpoints}")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).NotTo(BeEmpty())
				endpoints := strings.Split(output, ",")
				g.Expect(len(endpoints)).To(Equal(replicas))
			}, 30*time.Second).Should(Succeed())

			By("cleaning up BatchSandbox")
			cmd = exec.Command("kubectl", "delete", "batchsandbox", batchSandboxName, "-n", testNamespace)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("verifying pods are deleted")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pods", "-n", testNamespace, "-o", "json")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())

				var podList struct {
					Items []struct {
						Metadata struct {
							Name              string  `json:"name"`
							DeletionTimestamp *string `json:"deletionTimestamp"`
							OwnerReferences   []struct {
								Kind string `json:"kind"`
								Name string `json:"name"`
							} `json:"ownerReferences"`
						} `json:"metadata"`
					} `json:"items"`
				}
				err = json.Unmarshal([]byte(output), &podList)
				g.Expect(err).NotTo(HaveOccurred())

				// Check no pods are owned by this BatchSandbox or they have deletionTimestamp
				for _, pod := range podList.Items {
					for _, owner := range pod.Metadata.OwnerReferences {
						if owner.Kind == "BatchSandbox" && owner.Name == batchSandboxName {
							g.Expect(pod.Metadata.DeletionTimestamp).NotTo(BeNil(),
								"Pod %s owned by BatchSandbox should have deletionTimestamp set", pod.Metadata.Name)
						}
					}
				}
			}, 2*time.Minute).Should(Succeed())
		})

		It("should work correctly in pooled mode", func() {
			const poolName = "test-pool-for-bs"
			const batchSandboxName = "test-bs-pooled"
			const testNamespace = "default"
			const replicas = 2

			By("creating a Pool")
			poolYAML, err := renderTemplate("testdata/pool-basic.yaml", map[string]interface{}{
				"PoolName":     poolName,
				"SandboxImage": utils.SandboxImage,
				"Namespace":    testNamespace,
				"BufferMax":    3,
				"BufferMin":    2,
				"PoolMax":      5,
				"PoolMin":      2,
			})
			Expect(err).NotTo(HaveOccurred())

			poolFile := filepath.Join("/tmp", "test-pool-for-bs.yaml")
			err = os.WriteFile(poolFile, []byte(poolYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(poolFile)

			cmd := exec.Command("kubectl", "apply", "-f", poolFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("waiting for Pool to be ready")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status.total}")
				totalStr, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(totalStr).NotTo(BeEmpty())
			}, 2*time.Minute).Should(Succeed())

			By("creating a pooled BatchSandbox")
			bsYAML, err := renderTemplate("testdata/batchsandbox-pooled-no-expire.yaml", map[string]interface{}{
				"BatchSandboxName": batchSandboxName,
				"SandboxImage":     utils.SandboxImage,
				"Namespace":        testNamespace,
				"Replicas":         replicas,
				"PoolName":         poolName,
			})
			Expect(err).NotTo(HaveOccurred())

			bsFile := filepath.Join("/tmp", "test-bs-pooled.yaml")
			err = os.WriteFile(bsFile, []byte(bsYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(bsFile)

			cmd = exec.Command("kubectl", "apply", "-f", bsFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("verifying BatchSandbox allocates pods from pool")
			Eventually(func(g Gomega) {
				// Verify alloc-status annotation contains pool pod names
				cmd = exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.metadata.annotations.sandbox\\.opensandbox\\.io/alloc-status}")
				allocStatusJSON, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(allocStatusJSON).NotTo(BeEmpty(), "alloc-status annotation should exist")

				var allocStatus struct {
					Pods []string `json:"pods"`
				}
				err = json.Unmarshal([]byte(allocStatusJSON), &allocStatus)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(len(allocStatus.Pods)).To(Equal(replicas), "Should have %d pods in alloc-status", replicas)

				// Verify the pods in alloc-status are from the pool
				for _, podName := range allocStatus.Pods {
					cmd = exec.Command("kubectl", "get", "pod", podName, "-n", testNamespace,
						"-o", "jsonpath={.metadata.labels.sandbox\\.opensandbox\\.io/pool-name}")
					poolLabel, err := utils.Run(cmd)
					g.Expect(err).NotTo(HaveOccurred())
					g.Expect(poolLabel).To(Equal(poolName), "Pod %s should be from pool %s", podName, poolName)
				}
			}, 2*time.Minute).Should(Succeed())

			By("verifying BatchSandbox status is correctly updated")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.status}")
				statusOutput, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(statusOutput).To(ContainSubstring(fmt.Sprintf(`"replicas":%d`, replicas)))
				g.Expect(statusOutput).To(ContainSubstring(fmt.Sprintf(`"ready":%d`, replicas)))
			}, 30*time.Second).Should(Succeed())

			By("verifying endpoint annotation is set")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.metadata.annotations.sandbox\\.opensandbox\\.io/endpoints}")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).NotTo(BeEmpty())
				endpoints := strings.Split(output, ",")
				g.Expect(len(endpoints)).To(Equal(replicas))
			}, 30*time.Second).Should(Succeed())

			By("recording Pool allocated count")
			cmd = exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
				"-o", "jsonpath={.status.allocated}")
			allocatedBefore, err := utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("cleaning up BatchSandbox")
			cmd = exec.Command("kubectl", "delete", "batchsandbox", batchSandboxName, "-n", testNamespace)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("verifying pods are returned to pool")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status.allocated}")
				allocatedAfter, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())

				before := 0
				if allocatedBefore != "" {
					fmt.Sscanf(allocatedBefore, "%d", &before)
				}
				after := 0
				if allocatedAfter != "" {
					fmt.Sscanf(allocatedAfter, "%d", &after)
				}
				g.Expect(after).To(BeNumerically("<", before), "Allocated count should decrease")
			}, 30*time.Second).Should(Succeed())

			By("cleaning up Pool")
			cmd = exec.Command("kubectl", "delete", "pool", poolName, "-n", testNamespace)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())
		})

		It("should expire and delete non-pooled BatchSandbox correctly", func() {
			const batchSandboxName = "test-bs-expire-non-pooled"
			const testNamespace = "default"
			const replicas = 1

			By("creating a non-pooled BatchSandbox with expireTime")
			expireTime := time.Now().Add(45 * time.Second).UTC().Format(time.RFC3339)

			bsYAML, err := renderTemplate("testdata/batchsandbox-non-pooled-expire.yaml", map[string]interface{}{
				"BatchSandboxName": batchSandboxName,
				"Namespace":        testNamespace,
				"Replicas":         replicas,
				"ExpireTime":       expireTime,
				"SandboxImage":     utils.SandboxImage,
			})
			Expect(err).NotTo(HaveOccurred())

			bsFile := filepath.Join("/tmp", "test-bs-expire-non-pooled.yaml")
			err = os.WriteFile(bsFile, []byte(bsYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(bsFile)

			cmd := exec.Command("kubectl", "apply", "-f", bsFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("verifying BatchSandbox is created")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.status.allocated}")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal(fmt.Sprintf("%d", replicas)))
			}, 2*time.Minute).Should(Succeed())

			By("recording pod names")
			cmd = exec.Command("kubectl", "get", "pods", "-n", testNamespace, "-o", "json")
			output, err := utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			var podList struct {
				Items []struct {
					Metadata struct {
						Name            string `json:"name"`
						OwnerReferences []struct {
							Kind string `json:"kind"`
							Name string `json:"name"`
						} `json:"ownerReferences"`
					} `json:"metadata"`
				} `json:"items"`
			}
			err = json.Unmarshal([]byte(output), &podList)
			Expect(err).NotTo(HaveOccurred())

			podNamesList := []string{}
			for _, pod := range podList.Items {
				for _, owner := range pod.Metadata.OwnerReferences {
					if owner.Kind == "BatchSandbox" && owner.Name == batchSandboxName {
						podNamesList = append(podNamesList, pod.Metadata.Name)
						break
					}
				}
			}
			Expect(len(podNamesList)).To(BeNumerically(">", 0), "Should have pods owned by BatchSandbox")

			By("waiting for BatchSandbox to expire and be deleted")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace)
				_, err := utils.Run(cmd)
				g.Expect(err).To(HaveOccurred())
				g.Expect(err.Error()).To(ContainSubstring("not found"))
			}, 2*time.Minute).Should(Succeed())

			By("verifying pods are deleted")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pods", "-n", testNamespace, "-o", "json")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())

				var currentPodList struct {
					Items []struct {
						Metadata struct {
							Name              string  `json:"name"`
							DeletionTimestamp *string `json:"deletionTimestamp"`
							OwnerReferences   []struct {
								Kind string `json:"kind"`
								Name string `json:"name"`
							} `json:"ownerReferences"`
						} `json:"metadata"`
					} `json:"items"`
				}
				err = json.Unmarshal([]byte(output), &currentPodList)
				g.Expect(err).NotTo(HaveOccurred())

				// Verify no pods are owned by the deleted BatchSandbox or they have deletionTimestamp
				for _, pod := range currentPodList.Items {
					for _, owner := range pod.Metadata.OwnerReferences {
						if owner.Kind == "BatchSandbox" && owner.Name == batchSandboxName {
							g.Expect(pod.Metadata.DeletionTimestamp).NotTo(BeNil(),
								"Pod %s owned by BatchSandbox should have deletionTimestamp set", pod.Metadata.Name)
						}
					}
				}
			}, 30*time.Second).Should(Succeed())
		})

		It("should expire and return pooled BatchSandbox pods to pool", func() {
			const poolName = "test-pool-for-expire"
			const batchSandboxName = "test-bs-expire-pooled"
			const testNamespace = "default"
			const replicas = 1

			By("creating a Pool")
			poolYAML, err := renderTemplate("testdata/pool-basic.yaml", map[string]interface{}{
				"PoolName":     poolName,
				"SandboxImage": utils.SandboxImage,
				"Namespace":    testNamespace,
				"BufferMax":    3,
				"BufferMin":    2,
				"PoolMax":      5,
				"PoolMin":      2,
			})
			Expect(err).NotTo(HaveOccurred())

			poolFile := filepath.Join("/tmp", "test-pool-for-expire.yaml")
			err = os.WriteFile(poolFile, []byte(poolYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(poolFile)

			cmd := exec.Command("kubectl", "apply", "-f", poolFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("waiting for Pool to be ready")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status.total}")
				totalStr, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(totalStr).NotTo(BeEmpty())
			}, 2*time.Minute).Should(Succeed())

			By("recording Pool allocated count before BatchSandbox creation")
			cmd = exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
				"-o", "jsonpath={.status.allocated}")
			allocatedBeforeBS, err := utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("creating a pooled BatchSandbox with expireTime")
			expireTime := time.Now().Add(45 * time.Second).UTC().Format(time.RFC3339)
			bsYAML, err := renderTemplate("testdata/batchsandbox-pooled.yaml", map[string]interface{}{
				"BatchSandboxName": batchSandboxName,
				"SandboxImage":     utils.SandboxImage,
				"Namespace":        testNamespace,
				"Replicas":         replicas,
				"PoolName":         poolName,
				"ExpireTime":       expireTime,
			})
			Expect(err).NotTo(HaveOccurred())

			bsFile := filepath.Join("/tmp", "test-bs-expire-pooled.yaml")
			err = os.WriteFile(bsFile, []byte(bsYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(bsFile)

			cmd = exec.Command("kubectl", "apply", "-f", bsFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("recording pod names from alloc-status")
			var podNamesList []string
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.metadata.annotations.sandbox\\.opensandbox\\.io/alloc-status}")
				allocStatusJSON, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(allocStatusJSON).NotTo(BeEmpty())

				var allocStatus struct {
					Pods []string `json:"pods"`
				}
				err = json.Unmarshal([]byte(allocStatusJSON), &allocStatus)
				g.Expect(err).NotTo(HaveOccurred())
				podNamesList = allocStatus.Pods
				g.Expect(len(podNamesList)).To(BeNumerically(">", 0), "Should have allocated pods")
			}, 2*time.Minute).Should(Succeed())

			allocatedAfterBS := ""
			By("verifying Pool allocated count increased after BatchSandbox allocation")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status.allocated}")
				_allocatedAfterBS, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				allocatedAfterBS = _allocatedAfterBS

				before := 0
				if allocatedBeforeBS != "" {
					fmt.Sscanf(allocatedBeforeBS, "%d", &before)
				}

				after := 0
				if _allocatedAfterBS != "" {
					fmt.Sscanf(allocatedAfterBS, "%d", &after)
				}

				g.Expect(after).To(BeNumerically(">", before), "Pool allocated count should increase after BatchSandbox allocation")
			}, 30*time.Second).Should(Succeed())

			By("waiting for BatchSandbox to expire and be deleted")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace)
				_, err := utils.Run(cmd)
				g.Expect(err).To(HaveOccurred())
				g.Expect(err.Error()).To(ContainSubstring("not found"))
			}, 2*time.Minute).Should(Succeed())

			By("verifying pods still exist and are returned to pool")
			Eventually(func(g Gomega) {
				for _, podName := range podNamesList {
					cmd := exec.Command("kubectl", "get", "pod", podName, "-n", testNamespace,
						"-o", "jsonpath={.metadata.name}")
					output, err := utils.Run(cmd)
					g.Expect(err).NotTo(HaveOccurred())
					g.Expect(output).To(Equal(podName), "Pod should still exist")
				}
			}, 30*time.Second).Should(Succeed())

			By("verifying Pool allocated count decreased after BatchSandbox expiration")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status.allocated}")
				allocatedAfterExpiration, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())

				before := 0
				if allocatedAfterBS != "" {
					fmt.Sscanf(allocatedAfterBS, "%d", &before)
				}
				after := 0
				if allocatedAfterExpiration != "" {
					fmt.Sscanf(allocatedAfterExpiration, "%d", &after)
				}
				g.Expect(after).To(BeNumerically("<", before), "Allocated count should decrease")
			}, 30*time.Second).Should(Succeed())

			By("cleaning up Pool")
			cmd = exec.Command("kubectl", "delete", "pool", poolName, "-n", testNamespace)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())
		})
	})

	Context("Task", func() {
		BeforeAll(func() {
			By("waiting for controller to be ready")
			Eventually(func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pods", "-l", "control-plane=controller-manager",
					"-n", namespace, "-o", "jsonpath={.items[0].status.phase}")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal("Running"))
			}, 2*time.Minute).Should(Succeed())
		})

		It("should successfully manage Pool with task scheduling", func() {
			const poolName = "test-pool"
			const batchSandboxName = "test-batchsandbox-with-task"
			const testNamespace = "default"
			const replicas = 2

			By("creating a Pool with task-executor sidecar")
			poolTemplateFile := filepath.Join("testdata", "pool-with-task-executor.yaml")
			poolYAML, err := renderTemplate(poolTemplateFile, map[string]interface{}{
				"PoolName":          poolName,
				"Namespace":         testNamespace,
				"TaskExecutorImage": utils.TaskExecutorImage,
			})
			Expect(err).NotTo(HaveOccurred())

			poolFile := filepath.Join("/tmp", "test-pool.yaml")
			err = os.WriteFile(poolFile, []byte(poolYAML), 0644)
			Expect(err).NotTo(HaveOccurred())

			cmd := exec.Command("kubectl", "apply", "-f", poolFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred(), "Failed to create Pool")

			By("waiting for Pool to be ready")
			verifyPoolReady := func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status.total}")
				output, err := utils.Run(cmd)
				By(fmt.Sprintf("waiting for Pool to be ready, output %s", output))
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).NotTo(BeEmpty(), "Pool status.total should not be empty")
			}
			Eventually(verifyPoolReady, 2*time.Minute).Should(Succeed())

			By("creating a BatchSandbox with process-based tasks using the Pool")
			batchSandboxTemplateFile := filepath.Join("testdata", "batchsandbox-with-process-task.yaml")
			batchSandboxYAML, err := renderTemplate(batchSandboxTemplateFile, map[string]interface{}{
				"BatchSandboxName":  batchSandboxName,
				"Namespace":         testNamespace,
				"Replicas":          replicas,
				"PoolName":          poolName,
				"TaskExecutorImage": utils.TaskExecutorImage,
			})
			Expect(err).NotTo(HaveOccurred())

			batchSandboxFile := filepath.Join("/tmp", "test-batchsandbox.yaml")
			err = os.WriteFile(batchSandboxFile, []byte(batchSandboxYAML), 0644)
			Expect(err).NotTo(HaveOccurred())

			cmd = exec.Command("kubectl", "apply", "-f", batchSandboxFile)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred(), "Failed to create BatchSandbox")

			By("verifying BatchSandbox successfully allocated endpoints")
			verifyBatchSandboxAllocated := func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.status.allocated}")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal(fmt.Sprintf("%d", replicas)), "BatchSandbox should allocate %d replicas", replicas)
			}
			Eventually(verifyBatchSandboxAllocated, 2*time.Minute).Should(Succeed())

			By("verifying BatchSandbox endpoints are available")
			verifyEndpoints := func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.metadata.annotations.sandbox\\.opensandbox\\.io/endpoints}")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).NotTo(BeEmpty(), "BatchSandbox should have sandbox.opensandbox.io/endpoints annotation")
				endpoints := strings.Split(output, ",")
				g.Expect(len(endpoints)).To(Equal(replicas), "Should have %d endpoints", replicas)
			}
			Eventually(verifyEndpoints, 30*time.Second).Should(Succeed())

			By("verifying BatchSandbox status is as expected")
			verifyBatchSandboxStatus := func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.status}")
				statusOutput, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(statusOutput).To(ContainSubstring(fmt.Sprintf(`"replicas":%d`, replicas)))
				g.Expect(statusOutput).To(ContainSubstring(fmt.Sprintf(`"allocated":%d`, replicas)))
				g.Expect(statusOutput).To(ContainSubstring(fmt.Sprintf(`"ready":%d`, replicas)))
			}
			Eventually(verifyBatchSandboxStatus, 30*time.Second).Should(Succeed())

			By("verifying all tasks are successfully scheduled and succeeded")
			verifyTasksSucceeded := func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.status.taskSucceed}")
				output, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal(fmt.Sprintf("%d", replicas)), "All tasks should succeed")

				cmd = exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.status.taskFailed}")
				output, err = utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal("0"), "No tasks should fail")
			}
			Eventually(verifyTasksSucceeded, 2*time.Minute).Should(Succeed())

			By("recording Pool status before deletion")
			cmd = exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
				"-o", "jsonpath={.status.allocated}")
			poolAllocatedBefore, err := utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred())

			By("deleting the BatchSandbox")
			cmd = exec.Command("kubectl", "delete", "batchsandbox", batchSandboxName, "-n", testNamespace)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred(), "Failed to delete BatchSandbox")

			By("verifying all tasks are unloaded and BatchSandbox is deleted")
			verifyBatchSandboxDeleted := func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "batchsandbox", batchSandboxName, "-n", testNamespace)
				_, err := utils.Run(cmd)
				g.Expect(err).To(HaveOccurred(), "BatchSandbox should be deleted")
				g.Expect(err.Error()).To(ContainSubstring("not found"))
			}
			Eventually(verifyBatchSandboxDeleted, 2*time.Minute).Should(Succeed())

			By("verifying pods are returned to the Pool")
			verifyPodsReturnedToPool := func(g Gomega) {
				cmd := exec.Command("kubectl", "get", "pool", poolName, "-n", testNamespace,
					"-o", "jsonpath={.status.allocated}")
				poolAllocatedAfter, err := utils.Run(cmd)
				g.Expect(err).NotTo(HaveOccurred())

				beforeCount := 0
				if poolAllocatedBefore != "" {
					fmt.Sscanf(poolAllocatedBefore, "%d", &beforeCount)
				}
				afterCount := 0
				if poolAllocatedAfter != "" {
					fmt.Sscanf(poolAllocatedAfter, "%d", &afterCount)
				}
				g.Expect(afterCount).To(BeNumerically("<=", beforeCount),
					"Pool allocated count should decrease or stay same after BatchSandbox deletion")
			}
			Eventually(verifyPodsReturnedToPool, 30*time.Second).Should(Succeed())

			By("cleaning up the Pool")
			cmd = exec.Command("kubectl", "delete", "pool", poolName, "-n", testNamespace)
			_, err = utils.Run(cmd)
			Expect(err).NotTo(HaveOccurred(), "Failed to delete Pool")

			By("cleaning up temporary files")
			os.Remove(poolFile)
			os.Remove(batchSandboxFile)
		})
	})

})

// renderTemplate renders a YAML template file with the given data.
func renderTemplate(templateFile string, data map[string]interface{}) (string, error) {
	dir, err := utils.GetProjectDir()
	if err != nil {
		return "", err
	}

	fullPath := filepath.Join(dir, "test", "e2e", templateFile)
	tmplContent, err := os.ReadFile(fullPath)
	if err != nil {
		return "", fmt.Errorf("failed to read template file %s: %w", fullPath, err)
	}

	tmpl, err := template.New("yaml").Parse(string(tmplContent))
	if err != nil {
		return "", fmt.Errorf("failed to parse template: %w", err)
	}

	var buf bytes.Buffer
	err = tmpl.Execute(&buf, data)
	if err != nil {
		return "", fmt.Errorf("failed to execute template: %w", err)
	}

	return buf.String(), nil
}


================================================
FILE: kubernetes/test/e2e/testdata/batchsandbox-non-pooled-expire.yaml
================================================
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: {{.BatchSandboxName}}
  namespace: {{.Namespace}}
spec:
  replicas: {{.Replicas}}
  expireTime: "{{.ExpireTime}}"
  template:
    spec:
      containers:
      - name: sandbox-container
        image: {{.SandboxImage}}
        command: ["sleep", "3600"]


================================================
FILE: kubernetes/test/e2e/testdata/batchsandbox-non-pooled.yaml
================================================
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: {{.BatchSandboxName}}
  namespace: {{.Namespace}}
spec:
  replicas: {{.Replicas}}
  template:
    spec:
      containers:
      - name: sandbox-container
        image: {{.SandboxImage}}
        command: ["sleep", "3600"]

================================================
FILE: kubernetes/test/e2e/testdata/batchsandbox-pooled-no-expire.yaml
================================================
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: {{.BatchSandboxName}}
  namespace: {{.Namespace}}
spec:
  replicas: {{.Replicas}}
  poolRef: {{.PoolName}}


================================================
FILE: kubernetes/test/e2e/testdata/batchsandbox-pooled.yaml
================================================
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: {{.BatchSandboxName}}
  namespace: {{.Namespace}}
spec:
  replicas: {{.Replicas}}
  poolRef: {{.PoolName}}
  expireTime: "{{.ExpireTime}}"

================================================
FILE: kubernetes/test/e2e/testdata/batchsandbox-with-process-task.yaml
================================================
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: {{.BatchSandboxName}}
  namespace: {{.Namespace}}
spec:
  replicas: {{.Replicas}}
  poolRef: {{.PoolName}}
  taskTemplate:
    spec:
      process:
        command: ["echo"]
        args: ["Hello from task"]


================================================
FILE: kubernetes/test/e2e/testdata/pool-basic.yaml
================================================
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  name: {{.PoolName}}
  namespace: {{.Namespace}}
spec:
  template:
    spec:
      containers:
      - name: sandbox-container
        image: {{.SandboxImage}}
        command: ["sleep", "3600"]
  capacitySpec:
    bufferMax: {{.BufferMax}}
    bufferMin: {{.BufferMin}}
    poolMax: {{.PoolMax}}
    poolMin: {{.PoolMin}}

================================================
FILE: kubernetes/test/e2e/testdata/pool-with-env.yaml
================================================
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  name: {{.PoolName}}
  namespace: {{.Namespace}}
spec:
  template:
    spec:
      containers:
      - name: sandbox-container
        image: {{.SandboxImage}}
        command: ["sleep", "3600"]
        env:
        - name: UPGRADED
          value: "true"
  capacitySpec:
    bufferMax: {{.BufferMax}}
    bufferMin: {{.BufferMin}}
    poolMax: {{.PoolMax}}
    poolMin: {{.PoolMin}}


================================================
FILE: kubernetes/test/e2e/testdata/pool-with-task-executor.yaml
================================================
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  name: {{.PoolName}}
  namespace: {{.Namespace}}
spec:
  template:
    spec:
      containers:
      - name: task-executor
        image: {{.TaskExecutorImage}}
  capacitySpec:
    bufferMax: 0
    bufferMin: 0
    poolMax: 10
    poolMin: 2


================================================
FILE: kubernetes/test/e2e/testdata/runtimeclass/gvisor.yaml
================================================
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: gvisor
handler: runsc


================================================
FILE: kubernetes/test/e2e_runtime/gvisor/gvisor_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package gvisor

import (
	"encoding/json"
	"fmt"
	"os"
	"os/exec"
	"path/filepath"
	"time"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/test/utils"
	. "github.com/onsi/ginkgo/v2"
	. "github.com/onsi/gomega"
)

// runKubectl executes a kubectl command from the project root directory
func runKubectl(args ...string) (string, error) {
	cmd := exec.Command("kubectl", args...)
	cmd.Dir = "../../.." // Navigate from test/e2e_runtime/gvisor to project root
	output, err := cmd.CombinedOutput()
	if err != nil {
		return string(output), fmt.Errorf("kubectl %v failed: %w", args, err)
	}
	return string(output), nil
}

var _ = Describe("gVisor RuntimeClass", Ordered, func() {
	const testNamespace = "default"

	BeforeAll(func() {
		By("installing gVisor RuntimeClass")
		_, err := runKubectl("apply", "-f", "test/e2e_runtime/gvisor/testdata/runtimeclass.yaml")
		Expect(err).NotTo(HaveOccurred(), "Failed to create gVisor RuntimeClass")
	})

	AfterAll(func() {
		By("cleaning up RuntimeClass")
		_, _ = runKubectl("delete", "runtimeclass", RuntimeClassName, "--ignore-not-found=true")
	})

	Context("RuntimeClass API", func() {
		It("should create RuntimeClass resources", func() {
			By("verifying RuntimeClass exists")
			Eventually(func(g Gomega) {
				output, err := runKubectl("get", "runtimeclass", RuntimeClassName, "-o", "json")
				g.Expect(err).NotTo(HaveOccurred())

				var rcObj struct {
					Handler string `json:"handler"`
				}
				err = json.Unmarshal([]byte(output), &rcObj)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(rcObj.Handler).To(Equal("runsc"))
			}, 30*time.Second).Should(Succeed())
		})
	})

	Context("Pod with runtimeClassName", func() {
		var podName string

		BeforeEach(func() {
			podName = fmt.Sprintf("test-pod-gvisor-%d", time.Now().UnixNano())
		})

		AfterEach(func() {
			By("cleaning up Pod")
			if podName != "" {
				_, _ = runKubectl("delete", "pod", podName, "-n", testNamespace, "--ignore-not-found=true")
			}
		})

		It("should create Pod with runtimeClassName", func() {
			By("creating a Pod with runtimeClassName")
			podYAML := fmt.Sprintf(`apiVersion: v1
kind: Pod
metadata:
  name: %s
  namespace: %s
spec:
  runtimeClassName: %s
  containers:
  - name: test-container
    image: %s
    command: ["sleep", "3600"]
`, podName, testNamespace, RuntimeClassName, utils.SandboxImage)

			podFile := filepath.Join("/tmp", fmt.Sprintf("test-pod-%s.yaml", podName))
			err := os.WriteFile(podFile, []byte(podYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(podFile)

			_, err = runKubectl("apply", "-f", podFile)
			Expect(err).NotTo(HaveOccurred(), "Failed to create Pod")

			By("verifying Pod has runtimeClassName set")
			Eventually(func(g Gomega) {
				output, err := runKubectl("get", "pod", podName, "-n", testNamespace,
					"-o", "jsonpath={.spec.runtimeClassName}")
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal(RuntimeClassName))
			}, 30*time.Second).Should(Succeed())

			By("verifying Pod is running with gVisor")
			Eventually(func(g Gomega) {
				output, err := runKubectl("get", "pod", podName, "-n", testNamespace,
					"-o", "jsonpath={.status.phase}")
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal("Running"))
			}, 2*time.Minute).Should(Succeed())
		})
	})

	Context("Pool with gVisor RuntimeClass", func() {
		var poolName string
		var batchSandboxName string

		BeforeEach(func() {
			poolName = fmt.Sprintf("gvisor-pool-%d", time.Now().UnixNano())
			batchSandboxName = fmt.Sprintf("gvisor-bsbx-%d", time.Now().UnixNano())
		})

		AfterEach(func() {
			By("cleaning up BatchSandbox")
			if batchSandboxName != "" {
				_, _ = runKubectl("delete", "batchsandbox", batchSandboxName, "-n", testNamespace, "--ignore-not-found=true")
			}
			By("cleaning up Pool")
			if poolName != "" {
				_, _ = runKubectl("delete", "pool", poolName, "-n", testNamespace, "--ignore-not-found=true")
			}
		})

		It("should create Pool and allocate Pod with gVisor runtime", func() {
			By("creating a Pool with gVisor runtimeClassName")
			poolYAML := fmt.Sprintf(`apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  name: %s
  namespace: %s
spec:
  template:
    spec:
      runtimeClassName: %s
      containers:
        - name: sandbox-container
          image: %s
          command: ["sleep", "3600"]
  capacitySpec:
    bufferMax: 2
    bufferMin: 1
    poolMax: 5
    poolMin: 1
`, poolName, testNamespace, RuntimeClassName, utils.SandboxImage)

			poolFile := filepath.Join("/tmp", fmt.Sprintf("test-pool-%s.yaml", poolName))
			err := os.WriteFile(poolFile, []byte(poolYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(poolFile)

			_, err = runKubectl("apply", "-f", poolFile)
			Expect(err).NotTo(HaveOccurred(), "Failed to create Pool")

			By("waiting for Pool to have available pods")
			Eventually(func(g Gomega) {
				output, err := runKubectl("get", "pool", poolName, "-n", testNamespace, "-o", "json")
				g.Expect(err).NotTo(HaveOccurred())

				var poolObj struct {
					Status struct {
						Available int32 `json:"available"`
					} `json:"status"`
				}
				err = json.Unmarshal([]byte(output), &poolObj)
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(poolObj.Status.Available).To(BeNumerically(">", 0))
			}, 3*time.Minute).Should(Succeed())

			By("creating BatchSandbox with poolRef")
			bsbxYAML := fmt.Sprintf(`apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: %s
  namespace: %s
spec:
  replicas: 1
  poolRef: %s
`, batchSandboxName, testNamespace, poolName)

			bsbxFile := filepath.Join("/tmp", fmt.Sprintf("test-bsbx-%s.yaml", batchSandboxName))
			err = os.WriteFile(bsbxFile, []byte(bsbxYAML), 0644)
			Expect(err).NotTo(HaveOccurred())
			defer os.Remove(bsbxFile)

			_, err = runKubectl("apply", "-f", bsbxFile)
			Expect(err).NotTo(HaveOccurred(), "Failed to create BatchSandbox")

			By("waiting for BatchSandbox to allocate a Pod")
			var podName string
			Eventually(func(g Gomega) {
				output, err := runKubectl("get", "pods", "-n", testNamespace,
					"-l", fmt.Sprintf("sandbox.opensandbox.io/pool-name=%s", poolName),
					"-o", "jsonpath={.items[0].metadata.name}")
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).NotTo(BeEmpty())
				podName = output
			}, 2*time.Minute).Should(Succeed())

			By("verifying allocated Pod has runtimeClassName set to gVisor")
			Eventually(func(g Gomega) {
				output, err := runKubectl("get", "pod", podName, "-n", testNamespace,
					"-o", "jsonpath={.spec.runtimeClassName}")
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal(RuntimeClassName))
			}, 30*time.Second).Should(Succeed())

			By("verifying Pod is running with gVisor")
			Eventually(func(g Gomega) {
				output, err := runKubectl("get", "pod", podName, "-n", testNamespace,
					"-o", "jsonpath={.status.phase}")
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal("Running"))
			}, 2*time.Minute).Should(Succeed())

			By("verifying BatchSandbox status is ready")
			Eventually(func(g Gomega) {
				output, err := runKubectl("get", "batchsandbox", batchSandboxName, "-n", testNamespace,
					"-o", "jsonpath={.status.ready}")
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(output).To(Equal("1"))
			}, 2*time.Minute).Should(Succeed())
		})
	})
})


================================================
FILE: kubernetes/test/e2e_runtime/gvisor/suite_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package gvisor

import (
	"fmt"
	"os"
	"os/exec"
	"testing"

	"github.com/alibaba/OpenSandbox/sandbox-k8s/test/utils"
	. "github.com/onsi/ginkgo/v2"
	. "github.com/onsi/gomega"
)

const (
	// RuntimeClassName is the name of the RuntimeClass for gVisor
	RuntimeClassName = "gvisor"
)

// KindCluster is the name of the Kind cluster for gVisor tests.
// It reads from KIND_CLUSTER environment variable, defaulting to "gvisor-test".
var KindCluster = getKindCluster()

func getKindCluster() string {
	if v, ok := os.LookupEnv("KIND_CLUSTER"); ok {
		return v
	}
	return "gvisor-test"
}

// TestGVisorRuntimeClass runs the gVisor RuntimeClass end-to-end tests.
// These tests validate gVisor functionality with the Kind cluster
// configured specifically for gVisor (runsc) runtime.
func TestGVisorRuntimeClass(t *testing.T) {
	RegisterFailHandler(Fail)
	_, _ = fmt.Fprintf(GinkgoWriter, "Starting gVisor RuntimeClass E2E test suite\n")
	RunSpecs(t, "gVisor runtimeclass suite")
}

var _ = BeforeSuite(func() {
	dockerBuildArgs := os.Getenv("DOCKER_BUILD_ARGS")

	By("building task-executor image")
	makeArgs := []string{"docker-build-task-executor", fmt.Sprintf("TASK_EXECUTOR_IMG=%s", utils.TaskExecutorImage)}
	if dockerBuildArgs != "" {
		makeArgs = append(makeArgs, fmt.Sprintf("DOCKER_BUILD_ARGS=%s", dockerBuildArgs))
	}
	cmd := exec.Command("make", makeArgs...)
	cmd.Dir = "../../.." // Navigate from test/e2e_runtime/gvisor to project root
	output, err := cmd.CombinedOutput()
	ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to build task-executor image: %s", string(output))

	By("loading task-executor image on Kind")
	// Use kind command directly to load image, avoiding utils.GetProjectDir() path issues
	cmd = exec.Command("kind", "load", "docker-image", "--name", KindCluster, utils.TaskExecutorImage)
	cmd.Dir = "../../.." // Navigate from test/e2e_runtime/gvisor to project root
	output, err = cmd.CombinedOutput()
	ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load task-executor image into Kind: %s", string(output))
})

var _ = AfterSuite(func() {
})


================================================
FILE: kubernetes/test/e2e_runtime/gvisor/testdata/gvisor.yaml.tmpl
================================================
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
name: ${GVISOR_KIND_CLUSTER}
# Configure containerd to use gVisor runsc runtime with containerd-shim-runsc-v1
containerdConfigPatches:
  - |-
    [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc]
      runtime_type = "io.containerd.runsc.v1"
      [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc.options]
        TypeUrl = "io.containerd.runsc.v1.options"
        ConfigPath = "/etc/containerd/runsc.toml"
nodes:
  - role: control-plane
    image: ${GVISOR_KIND_IMAGE}
    extraMounts:
      - hostPath: ${PWD}/test/kind/gvisor/runsc
        containerPath: /usr/local/bin/runsc
        readOnly: true
      - hostPath: ${PWD}/test/kind/gvisor/containerd-shim-runsc-v1
        containerPath: /usr/local/bin/containerd-shim-runsc-v1
        readOnly: true
  - role: worker
    image: ${GVISOR_KIND_IMAGE}
    extraMounts:
      - hostPath: ${PWD}/test/kind/gvisor/runsc
        containerPath: /usr/local/bin/runsc
        readOnly: true
      - hostPath: ${PWD}/test/kind/gvisor/containerd-shim-runsc-v1
        containerPath: /usr/local/bin/containerd-shim-runsc-v1
        readOnly: true


================================================
FILE: kubernetes/test/e2e_runtime/gvisor/testdata/runtimeclass.yaml
================================================
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: gvisor
handler: runsc
scheduling:
  nodeSelector:
    kubernetes.io/arch: amd64


================================================
FILE: kubernetes/test/e2e_task/suite_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package e2e_task

import (
	"testing"

	. "github.com/onsi/ginkgo/v2"
	. "github.com/onsi/gomega"
)

func TestE2E(t *testing.T) {
	RegisterFailHandler(Fail)
	RunSpecs(t, "Task Executor E2E Suite")
}


================================================
FILE: kubernetes/test/e2e_task/task_e2e_test.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package e2e_task

import (
	"context"
	"fmt"
	"os"
	"os/exec"
	"time"

	. "github.com/onsi/ginkgo/v2"
	. "github.com/onsi/gomega"

	api "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor"
)

const (
	ImageName         = "task-executor-e2e"
	TargetContainer   = "task-e2e-target"
	ExecutorContainer = "task-e2e-executor"
	VolumeName        = "task-e2e-vol"
	HostPort          = "5758"
)

var _ = Describe("Task Executor E2E", Ordered, func() {
	var client *api.Client

	BeforeAll(func() {
		// Check docker
		_, err := exec.LookPath("docker")
		Expect(err).NotTo(HaveOccurred(), "Docker not found, skipping E2E test")

		By("Building image")
		cmd := exec.Command("docker", "build",
			"--build-arg", "PACKAGE=cmd/task-executor/main.go",
			"-t", ImageName, "-f", "../../Dockerfile", "../../")
		cmd.Stdout = os.Stdout
		cmd.Stderr = os.Stderr
		Expect(cmd.Run()).To(Succeed())

		By("Cleaning up previous runs")
		exec.Command("docker", "rm", "-f", TargetContainer, ExecutorContainer).Run()
		exec.Command("docker", "volume", "rm", VolumeName).Run()

		By("Creating shared volume")
		Expect(exec.Command("docker", "volume", "create", VolumeName).Run()).To(Succeed())

		By("Starting target container")
		targetCmd := exec.Command("docker", "run", "-d", "--name", TargetContainer,
			"-v", fmt.Sprintf("%s:/tmp/tasks", VolumeName),
			"-e", "SANDBOX_MAIN_CONTAINER=main",
			"-e", "TARGET_VAR=hello-from-target",
			"golang:1.24", "sleep", "infinity")
		targetCmd.Stdout = os.Stdout
		targetCmd.Stderr = os.Stderr
		Expect(targetCmd.Run()).To(Succeed())

		By("Starting executor container in Sidecar Mode")
		execCmd := exec.Command("docker", "run", "-d", "--name", ExecutorContainer,
			"-v", fmt.Sprintf("%s:/tmp/tasks", VolumeName),
			"--privileged",
			"-u", "0",
			"--pid=container:"+TargetContainer,
			"-p", HostPort+":5758",
			ImageName,
			"-enable-sidecar-mode=true",
			"-main-container-name=main",
			"-data-dir=/tmp/tasks")
		execCmd.Stdout = os.Stdout
		execCmd.Stderr = os.Stderr
		Expect(execCmd.Run()).To(Succeed())

		By("Waiting for executor to be ready")
		client = api.NewClient(fmt.Sprintf("http://127.0.0.1:%s", HostPort))
		Eventually(func() error {
			_, err := client.Get(context.Background())
			return err
		}, 10*time.Second, 500*time.Millisecond).Should(Succeed(), "Executor failed to become ready")
	})

	AfterAll(func() {
		By("Cleaning up containers")
		if CurrentSpecReport().Failed() {
			By("Dumping logs")
			out, _ := exec.Command("docker", "logs", ExecutorContainer).CombinedOutput()
			fmt.Printf("Executor Logs:\n%s\n", string(out))
		}
		exec.Command("docker", "rm", "-f", TargetContainer, ExecutorContainer).Run()
		exec.Command("docker", "volume", "rm", VolumeName).Run()
	})

	Context("When creating a short-lived task", func() {
		taskName := "e2e-test-1"

		It("should run and succeed", func() {
			By("Creating task")
			task := &api.Task{
				Name: taskName,
				Process: &api.Process{
					Command: []string{"sleep", "2"},
				},
			}
			_, err := client.Set(context.Background(), task)
			Expect(err).NotTo(HaveOccurred())

			By("Waiting for task to succeed")
			Eventually(func(g Gomega) {
				got, err := client.Get(context.Background())
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(got).NotTo(BeNil())
				g.Expect(got.Name).To(Equal(taskName))

				// Verify state
				if got.ProcessStatus != nil && got.ProcessStatus.Terminated != nil {
					g.Expect(got.ProcessStatus.Terminated.ExitCode).To(BeZero())
					g.Expect(got.ProcessStatus.Terminated.Reason).To(Equal("Succeeded"))
				} else {
					// Fail if not terminated yet (so Eventually retries)
					g.Expect(got.ProcessStatus).NotTo(BeNil(), "Task ProcessStatus is nil")
					g.Expect(got.ProcessStatus.Terminated).NotTo(BeNil(), "Task status: %v", got.ProcessStatus)
				}
			}, 10*time.Second, 1*time.Second).Should(Succeed())
		})

		It("should be deletable", func() {
			By("Deleting task")
			_, err := client.Set(context.Background(), nil)
			Expect(err).NotTo(HaveOccurred())

			By("Verifying deletion")
			Eventually(func() *api.Task {
				got, _ := client.Get(context.Background())
				return got
			}, 5*time.Second, 500*time.Millisecond).Should(BeNil())
		})
	})

	Context("When creating a task checking environment variables", func() {
		taskName := "e2e-env-test"

		It("should inherit environment variables from target container", func() {
			By("Creating task running 'env'")
			task := &api.Task{
				Name: taskName,
				Process: &api.Process{
					Command: []string{"env"},
				},
			}
			_, err := client.Set(context.Background(), task)
			Expect(err).NotTo(HaveOccurred())

			By("Waiting for task to succeed")
			Eventually(func(g Gomega) {
				got, err := client.Get(context.Background())
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(got).NotTo(BeNil())
				g.Expect(got.Name).To(Equal(taskName))
				g.Expect(got.ProcessStatus.Terminated).NotTo(BeNil())
				g.Expect(got.ProcessStatus.Terminated.ExitCode).To(BeZero())
			}, 10*time.Second, 1*time.Second).Should(Succeed())

			By("Verifying stdout contains target container env")
			// Read stdout.log from the executor container (which shares the volume)
			out, err := exec.Command("docker", "exec", ExecutorContainer, "cat", fmt.Sprintf("/tmp/tasks/%s/stdout.log", taskName)).CombinedOutput()
			Expect(err).NotTo(HaveOccurred(), "Failed to read stdout.log: %s", string(out))

			outputStr := string(out)
			Expect(outputStr).To(ContainSubstring("TARGET_VAR=hello-from-target"), "Task environment should inherit from target container")
		})

		It("should be deletable", func() {
			By("Deleting task")
			_, err := client.Set(context.Background(), nil)
			Expect(err).NotTo(HaveOccurred())

			By("Verifying deletion")
			Eventually(func() *api.Task {
				got, _ := client.Get(context.Background())
				return got
			}, 5*time.Second, 500*time.Millisecond).Should(BeNil())
		})
	})

	Context("When creating a task with timeout", func() {
		taskName := "e2e-timeout-test"

		It("should timeout and be terminated", func() {
			By("Creating task with 5 second timeout that runs for 30 seconds")
			timeoutSec := int64(5)
			task := &api.Task{
				Name: taskName,
				Process: &api.Process{
					Command:        []string{"sleep", "30"},
					TimeoutSeconds: &timeoutSec,
				},
			}
			_, err := client.Set(context.Background(), task)
			Expect(err).NotTo(HaveOccurred())

			By("Waiting for task to be terminated (within 15 seconds)")
			// After timeout detection, Stop is called and the process is killed.
			// Once Stop completes, the exit file is written and state becomes Failed.
			Eventually(func(g Gomega) {
				got, err := client.Get(context.Background())
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(got).NotTo(BeNil())
				g.Expect(got.Name).To(Equal(taskName))

				// Should be Terminated with exit code 137 (SIGKILL) or 143 (SIGTERM)
				// sleep responds to SIGTERM quickly, so we usually get 143
				// The state will be "Failed" after exit file is written
				if got.ProcessStatus != nil && got.ProcessStatus.Terminated != nil {
					g.Expect(got.ProcessStatus.Terminated.ExitCode).To(SatisfyAny(
						Equal(int32(137)), // SIGKILL
						Equal(int32(143)), // SIGTERM
					))
				} else {
					// Fail if not terminated yet
					g.Expect(got.ProcessStatus).NotTo(BeNil(), "Task ProcessStatus is nil")
					g.Expect(got.ProcessStatus.Terminated).NotTo(BeNil(), "Task status: %v", got.ProcessStatus)
				}
			}, 15*time.Second, 1*time.Second).Should(Succeed())

			By("Verifying the task was terminated")
			got, err := client.Get(context.Background())
			Expect(err).NotTo(HaveOccurred())
			Expect(got.ProcessStatus.Terminated).NotTo(BeNil())
			Expect(got.ProcessStatus.Terminated.ExitCode).To(SatisfyAny(
				Equal(int32(137)), // SIGKILL
				Equal(int32(143)), // SIGTERM
			))
			// State could be "Failed" (after exit file written) or "Timeout" (during stop)
			Expect(got.ProcessStatus.Terminated.Reason).To(SatisfyAny(
				Equal("Failed"),
				Equal("TaskTimeout"),
			))
		})

		It("should be deletable after timeout", func() {
			By("Deleting task")
			_, err := client.Set(context.Background(), nil)
			Expect(err).NotTo(HaveOccurred())

			By("Verifying deletion")
			Eventually(func() *api.Task {
				got, _ := client.Get(context.Background())
				return got
			}, 5*time.Second, 500*time.Millisecond).Should(BeNil())
		})
	})

	Context("When creating a task that completes before timeout", func() {
		taskName := "e2e-no-timeout-test"

		It("should succeed without timeout", func() {
			By("Creating task with 60 second timeout that completes in 2 seconds")
			timeoutSec := int64(60)
			task := &api.Task{
				Name: taskName,
				Process: &api.Process{
					Command:        []string{"sleep", "2"},
					TimeoutSeconds: &timeoutSec,
				},
			}
			_, err := client.Set(context.Background(), task)
			Expect(err).NotTo(HaveOccurred())

			By("Waiting for task to succeed")
			Eventually(func(g Gomega) {
				got, err := client.Get(context.Background())
				g.Expect(err).NotTo(HaveOccurred())
				g.Expect(got).NotTo(BeNil())
				g.Expect(got.Name).To(Equal(taskName))

				// Should succeed with exit code 0
				if got.ProcessStatus != nil && got.ProcessStatus.Terminated != nil {
					g.Expect(got.ProcessStatus.Terminated.ExitCode).To(BeZero())
					g.Expect(got.ProcessStatus.Terminated.Reason).To(Equal("Succeeded"))
				} else {
					g.Expect(got.ProcessStatus).NotTo(BeNil(), "Task ProcessStatus is nil")
					g.Expect(got.ProcessStatus.Terminated).NotTo(BeNil(), "Task status: %v", got.ProcessStatus)
				}
			}, 10*time.Second, 1*time.Second).Should(Succeed())
		})

		It("should be deletable", func() {
			By("Deleting task")
			_, err := client.Set(context.Background(), nil)
			Expect(err).NotTo(HaveOccurred())

			By("Verifying deletion")
			Eventually(func() *api.Task {
				got, _ := client.Get(context.Background())
				return got
			}, 5*time.Second, 500*time.Millisecond).Should(BeNil())
		})
	})
})


================================================
FILE: kubernetes/test/utils/image.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import "os"

var (
	// ControllerImage is the controller manager image
	// Can be overridden via CONTROLLER_IMG env var
	ControllerImage = getEnv("CONTROLLER_IMG", "controller:dev")

	// TaskExecutorImage is the task-executor image
	// Can be overridden via TASK_EXECUTOR_IMG env var
	TaskExecutorImage = getEnv("TASK_EXECUTOR_IMG", "task-executor:dev")

	// SandboxImage is the image used for sandbox containers in tests
	// Always uses TaskExecutorImage to ensure the image is available in Kind
	SandboxImage = TaskExecutorImage
)

func getEnv(key, defaultValue string) string {
	if v := os.Getenv(key); v != "" {
		return v
	}
	return defaultValue
}


================================================
FILE: kubernetes/test/utils/utils.go
================================================
// Copyright 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import (
	"bufio"
	"bytes"
	"fmt"
	"os"
	"os/exec"
	"strings"

	. "github.com/onsi/ginkgo/v2" // nolint:revive,staticcheck
)

const (
	prometheusOperatorVersion = "v0.77.1"
	prometheusOperatorURL     = "https://github.com/prometheus-operator/prometheus-operator/" +
		"releases/download/%s/bundle.yaml"

	certmanagerVersion = "v1.16.3"
	certmanagerURLTmpl = "https://github.com/cert-manager/cert-manager/releases/download/%s/cert-manager.yaml"
)

func warnError(err error) {
	_, _ = fmt.Fprintf(GinkgoWriter, "warning: %v\n", err)
}

// Run executes the provided command within this context
func Run(cmd *exec.Cmd) (string, error) {
	dir, _ := GetProjectDir()
	cmd.Dir = dir

	if err := os.Chdir(cmd.Dir); err != nil {
		_, _ = fmt.Fprintf(GinkgoWriter, "chdir dir: %q\n", err)
	}

	cmd.Env = append(os.Environ(), "GO111MODULE=on")
	command := strings.Join(cmd.Args, " ")
	_, _ = fmt.Fprintf(GinkgoWriter, "running: %q\n", command)
	output, err := cmd.CombinedOutput()
	if err != nil {
		return string(output), fmt.Errorf("%q failed with error %q: %w", command, string(output), err)
	}

	return string(output), nil
}

// InstallPrometheusOperator installs the prometheus Operator to be used to export the enabled metrics.
func InstallPrometheusOperator() error {
	url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion)
	cmd := exec.Command("kubectl", "create", "-f", url)
	_, err := Run(cmd)
	return err
}

// UninstallPrometheusOperator uninstalls the prometheus
func UninstallPrometheusOperator() {
	url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion)
	cmd := exec.Command("kubectl", "delete", "-f", url)
	if _, err := Run(cmd); err != nil {
		warnError(err)
	}
}

// IsPrometheusCRDsInstalled checks if any Prometheus CRDs are installed
// by verifying the existence of key CRDs related to Prometheus.
func IsPrometheusCRDsInstalled() bool {
	// List of common Prometheus CRDs
	prometheusCRDs := []string{
		"prometheuses.monitoring.coreos.com",
		"prometheusrules.monitoring.coreos.com",
		"prometheusagents.monitoring.coreos.com",
	}

	cmd := exec.Command("kubectl", "get", "crds", "-o", "custom-columns=NAME:.metadata.name")
	output, err := Run(cmd)
	if err != nil {
		return false
	}
	crdList := GetNonEmptyLines(output)
	for _, crd := range prometheusCRDs {
		for _, line := range crdList {
			if strings.Contains(line, crd) {
				return true
			}
		}
	}

	return false
}

// UninstallCertManager uninstalls the cert manager
func UninstallCertManager() {
	url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion)
	cmd := exec.Command("kubectl", "delete", "-f", url)
	if _, err := Run(cmd); err != nil {
		warnError(err)
	}
}

// InstallCertManager installs the cert manager bundle.
func InstallCertManager() error {
	url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion)
	cmd := exec.Command("kubectl", "apply", "-f", url)
	if _, err := Run(cmd); err != nil {
		return err
	}
	// Wait for cert-manager-webhook to be ready, which can take time if cert-manager
	// was re-installed after uninstalling on a cluster.
	cmd = exec.Command("kubectl", "wait", "deployment.apps/cert-manager-webhook",
		"--for", "condition=Available",
		"--namespace", "cert-manager",
		"--timeout", "5m",
	)

	_, err := Run(cmd)
	return err
}

// IsCertManagerCRDsInstalled checks if any Cert Manager CRDs are installed
// by verifying the existence of key CRDs related to Cert Manager.
func IsCertManagerCRDsInstalled() bool {
	// List of common Cert Manager CRDs
	certManagerCRDs := []string{
		"certificates.cert-manager.io",
		"issuers.cert-manager.io",
		"clusterissuers.cert-manager.io",
		"certificaterequests.cert-manager.io",
		"orders.acme.cert-manager.io",
		"challenges.acme.cert-manager.io",
	}

	// Execute the kubectl command to get all CRDs
	cmd := exec.Command("kubectl", "get", "crds")
	output, err := Run(cmd)
	if err != nil {
		return false
	}

	// Check if any of the Cert Manager CRDs are present
	crdList := GetNonEmptyLines(output)
	for _, crd := range certManagerCRDs {
		for _, line := range crdList {
			if strings.Contains(line, crd) {
				return true
			}
		}
	}

	return false
}

// LoadImageToKindClusterWithName loads a local docker image to the kind cluster
func LoadImageToKindClusterWithName(name string) error {
	cluster := "kind"
	if v, ok := os.LookupEnv("KIND_CLUSTER"); ok {
		cluster = v
	}
	kindOptions := []string{"load", "docker-image", name, "--name", cluster}
	cmd := exec.Command("kind", kindOptions...)
	_, err := Run(cmd)
	return err
}

// GetNonEmptyLines converts given command output string into individual objects
// according to line breakers, and ignores the empty elements in it.
func GetNonEmptyLines(output string) []string {
	var res []string
	elements := strings.Split(output, "\n")
	for _, element := range elements {
		if element != "" {
			res = append(res, element)
		}
	}

	return res
}

// GetProjectDir will return the directory where the project is
func GetProjectDir() (string, error) {
	wd, err := os.Getwd()
	if err != nil {
		return wd, fmt.Errorf("failed to get current working directory: %w", err)
	}
	wd = strings.ReplaceAll(wd, "/test/e2e", "")
	return wd, nil
}

// UncommentCode searches for target in the file and remove the comment prefix
// of the target content. The target content may span multiple lines.
func UncommentCode(filename, target, prefix string) error {
	// false positive
	// nolint:gosec
	content, err := os.ReadFile(filename)
	if err != nil {
		return fmt.Errorf("failed to read file %q: %w", filename, err)
	}
	strContent := string(content)

	idx := strings.Index(strContent, target)
	if idx < 0 {
		return fmt.Errorf("unable to find the code %q to be uncomment", target)
	}

	out := new(bytes.Buffer)
	_, err = out.Write(content[:idx])
	if err != nil {
		return fmt.Errorf("failed to write to output: %w", err)
	}

	scanner := bufio.NewScanner(bytes.NewBufferString(target))
	if !scanner.Scan() {
		return nil
	}
	for {
		if _, err = out.WriteString(strings.TrimPrefix(scanner.Text(), prefix)); err != nil {
			return fmt.Errorf("failed to write to output: %w", err)
		}
		// Avoid writing a newline in case the previous line was the last in target.
		if !scanner.Scan() {
			break
		}
		if _, err = out.WriteString("\n"); err != nil {
			return fmt.Errorf("failed to write to output: %w", err)
		}
	}

	if _, err = out.Write(content[idx+len(target):]); err != nil {
		return fmt.Errorf("failed to write to output: %w", err)
	}

	// false positive
	// nolint:gosec
	if err = os.WriteFile(filename, out.Bytes(), 0644); err != nil {
		return fmt.Errorf("failed to write file %q: %w", filename, err)
	}

	return nil
}


================================================
FILE: oseps/0001-fqdn-based-egress-control.md
================================================
---
title: FQDN-based Egress Control
authors:
  - "@hittyt"
  - "@Pangjiping"
creation-date: 2025-12-27
last-updated: 2026-01-22
status: implemented
---

# OSEP-0001: FQDN-based Egress Control

<!-- toc -->
- [Summary](#summary)
- [Motivation](#motivation)
  - [Goals](#goals)
  - [Non-Goals](#non-goals)
- [Requirements](#requirements)
- [Proposal](#proposal)
  - [Notes/Constraints/Caveats](#notesconstraintscaveats)
  - [Risks and Mitigations](#risks-and-mitigations)
- [Design Details](#design-details)
  - [API Schema](#api-schema)
  - [Architecture Overview](#architecture-overview)
  - [Layer 1: DNS Proxy](#layer-1-dns-proxy)
  - [Layer 2: Network Filter](#layer-2-network-filter)
  - [Capability Detection and Graceful Degradation](#capability-detection-and-graceful-degradation)
  - [Enforcement Modes](#enforcement-modes)
  - [Component Changes](#component-changes)
- [Test Plan](#test-plan)
- [Drawbacks](#drawbacks)
- [Alternatives](#alternatives)
- [Infrastructure Needed](#infrastructure-needed)
- [Upgrade & Migration Strategy](#upgrade--migration-strategy)
<!-- /toc -->

## Summary

This proposal introduces domain-based (FQDN) egress control for OpenSandbox. It enables users to declaratively specify which external domains a sandbox can access, using a `network_policy` field in the Sandbox Lifecycle API. The implementation uses a two-layer approach (DNS-level filtering plus optional network-layer enforcement) delivered via a **sidecar** that shares the sandbox network namespace; the application container itself does not receive extra privileges.

## Motivation

In AI Agent scenarios (e.g., Coding Agents, Data Analysis Agents), sandboxes frequently need controlled access to external services such as `api.github.com`, `pypi.org`, or `api.openai.com`. Currently, OpenSandbox lacks fine-grained network egress control.

Existing industry solutions like E2B and Modal primarily rely on IP addresses or CIDR blocks for egress control. However, this approach has critical limitations:

**Dynamic IP Challenges**: Modern cloud services and CDNs frequently change their underlying IP addresses. Manually maintaining an IP allowlist for domains like `api.github.com` is operationally expensive and error-prone.

**Security Gaps**: IP-based rules can be bypassed if multiple services share the same IP address (common in virtual hosting). Without domain-level (L7) awareness, a sandbox allowed to access one service might inadvertently access others on the same host.

**Developer Experience (DX)**: It is much more intuitive for developers to declare "allow access to `openai.com`" than to perform DNS lookups and input CIDR ranges during sandbox creation.

OpenSandbox aims to be a universal AI sandbox platform. To meet enterprise-grade security and production requirements, it must support Domain-based (FQDN) Egress Control.

### Goals

1. **Declarative API**: Provide a `network_policy.egress` field in the Sandbox Lifecycle API that accepts domain-based allow/deny rules.
2. **Wildcard Support**: Support wildcard patterns (e.g., `*.pypi.org`) for flexible policy definition.
3. **Transparent to Applications**: Sandbox applications should not require any modification to work with egress policies.
4. **Graceful Degradation**: The system should work across different privilege levels, degrading gracefully when kernel-level enforcement is unavailable.
5. **Observable**: Provide clear visibility into the current enforcement mode and policy violations.
6. **Runtime Agnostic**: Sidecar-based implementation works identically for Docker (shared network namespace) and Kubernetes (same Pod). No application-container privilege elevation; NET_ADMIN is isolated to the sidecar.

### Non-Goals

1. **L7 Deep Packet Inspection**: This proposal does not include HTTPS content inspection or TLS termination (mitmproxy-style).
2. **Ingress Control**: This proposal focuses on egress (outbound) traffic only.
3. **Rate Limiting**: Traffic rate limiting or bandwidth control is out of scope.
4. **Per-Process Policies**: Policies apply to the entire sandbox, not individual processes.
5. **IPv6-first**: Initial implementation focuses on IPv4; IPv6 support is a future enhancement.
6. **External CRD Dependencies**: This proposal intentionally avoids depending on Kubernetes NetworkPolicy, CiliumNetworkPolicy, or other external resources. All enforcement happens inside a sidecar that shares the sandbox network namespace.
7. **eBPF-based Filtering**: While eBPF offers performance benefits, nftables provides sufficient functionality for Layer 2. eBPF support may be added as a future performance optimization.

## Requirements

| ID | Requirement | Priority |
|----|-------------|----------|
| R1 | Users can specify allowed domains via SDK/API | Must Have |
| R2 | Wildcard domain matching (e.g., `*.example.com`) | Must Have |
| R3 | Default deny policy when `network_policy` is specified | Must Have |
| R4 | NET_ADMIN confined to sidecar; application container runs without added privileges; if NET_ADMIN unavailable, policy disables with warning | Must Have |
| R5 | Full network isolation when `CAP_NET_ADMIN` is available | Should Have |
| R6 | Enforcement mode is observable via API | Should Have |
| R7 | Policy violations are logged | Should Have |
| R8 | IPv6 support | Could Have |

## Proposal

We propose a **two-layer architecture** for FQDN egress control:

```
┌─────────────────────────────────────────────────────────────────────┐
│                     Sandbox Pod / Net Namespace                     │
│                                                                     │
│   ┌───────────────────┐            ┌────────────────────────────┐   │
│   │ Application       │            │ Egress Sidecar             │   │
│   │ Container         │            │ (NET_ADMIN)                │   │
│   │ (no NET_ADMIN)    │            │ - Layer 1: DNS Proxy       │   │
│   │                   │            │   · Intercepts all DNS     │   │
│   └─────────┬─────────┘            │   · Applies policy         │   │
│             │ DNS Query            │   · Learns domain→IP       │   │
│             ▼                      │ - Layer 2: Network Filter  │   │
│      (shared network namespace)    │   · nftables allowlist     │   │
│                                    │   · Blocks others / DoH    │   │
│                                    └─────────────┬──────────────┘   │
│                                                  │                  │
│                                                  ▼                  │
│                                            External Network         │
└─────────────────────────────────────────────────────────────────────┘
```

**Layer 1 (DNS Proxy)** provides the user experience benefit and relies on `CAP_NET_ADMIN` for transparent iptables REDIRECT. If the capability is missing, DNS interception cannot be installed; the policy is skipped with a warning.

**Layer 2 (Network Filter)** provides true network isolation by enforcing that only IPs learned from Layer 1 are reachable. This layer requires `CAP_NET_ADMIN` and is optional.

### Notes/Constraints/Caveats

1. **DNS-only mode is a soft limit**: Without Layer 2 (nftables), applications can bypass DNS filtering by using direct IP connections (e.g., `curl http://140.82.114.6`) or DNS-over-HTTPS/TLS (DoH/DoT). Note: hardcoded DNS servers are NOT a bypass vector because iptables REDIRECT intercepts all port 53 traffic (when `CAP_NET_ADMIN` is present).

2. **Container startup order**: The DNS proxy must be ready before any application process starts to avoid race conditions.

3. **Localhost exemption**: `localhost`, `127.0.0.1`, and container-internal communication should always be allowed.

5. **Cross-platform considerations**: The two-layer architecture uses platform abstraction:
   - **Layer 1 (DNS Proxy)**: Core logic is cross-platform (pure Go). System resolver configuration requires platform-specific code (`/etc/resolv.conf` on Linux, `netsh` on Windows).
   - **Layer 2 (Network Filter)**: Requires platform-specific implementations (nftables on Linux, WFP on Windows, pf on macOS). The system gracefully degrades to DNS-only mode on platforms without Layer 2 support.

6. **No resolv.conf modification needed**: With the simplified CAP_NET_ADMIN approach, we use `iptables REDIRECT` to intercept DNS traffic. This avoids all resolv.conf-related issues:
   - Works in read-only `/etc/resolv.conf` scenarios (Kubernetes, hardened containers)
   - More powerful interception (catches applications that hardcode DNS servers)
   - Consistent behavior across all deployment modes
- **Graceful degradation**: If iptables setup fails (e.g., missing `CAP_NET_ADMIN`), logs warning and continues without enforcement (network policy disabled)

7. **Simplified privilege model with CAP_NET_ADMIN**: When `network_policy` is specified, the runtime grants `CAP_NET_ADMIN` capability to the container:
   - **No user switching required**: Container runs as the image's original user (root or non-root)
   - **iptables REDIRECT**: DNS traffic is intercepted via iptables, which works with CAP_NET_ADMIN regardless of user
   - **No resolv.conf modification needed**: iptables redirects port 53 traffic to DNS Proxy on a non-privileged port
   - **Unified Docker/K8s behavior**: Same simple logic for both runtimes

8. **CAP_NET_ADMIN security considerations**:
   - CAP_NET_ADMIN allows network configuration within the container's network namespace
   - Container network namespace isolation limits the impact (cannot affect host or other containers)
   - This is acceptable because the sandbox itself is the primary security boundary
   - For K8s clusters with `restricted` Pod Security Standards (which prohibit any capabilities), network_policy enforcement will degrade gracefully with a warning
9. **HostNetwork is unsupported when network_policy is enabled**:
   - K8s: if `hostNetwork=true`, the server MUST reject sandbox creation when `network_policy` is set, because NET_ADMIN in hostNetwork would affect the node.
   - Docker: if `--network host` is requested with `network_policy`, the request MUST be rejected.
   - Sidecar SHOULD self-check and refuse to start (logging a warning) if it detects host network mode, to avoid touching host iptables/nftables.
9. **No resolv.conf fallback**: We intentionally avoid rewriting `/etc/resolv.conf`. If `CAP_NET_ADMIN` is unavailable and iptables REDIRECT cannot be installed, DNS interception is not possible; network_policy is disabled and a warning is logged.

### Risks and Mitigations

| Risk | Impact | Mitigation |
|------|--------|------------|
| DNS-only bypass via direct IP | Security | Document limitation clearly; recommend `CAP_NET_ADMIN` for security-critical use cases |
| DoH/DoT bypass | Security | Layer 2 blocks ports 443 to known DoH providers and port 853 (DoT) |
| Performance overhead | Reliability | DNS proxy adds <1ms latency; nftables is kernel-native with negligible overhead |
| Kernel compatibility | Compatibility | Runtime capability detection with graceful degradation |
| Application breaks due to DNS filtering | Usability | Clear error messages; policy validation at creation time |
| CAP_NET_ADMIN required | Privilege | Clear documentation; graceful degradation with warning when capability not available |
| K8s restricted PSS | Compatibility | Clusters with `restricted` Pod Security Standards prohibit capabilities; network_policy will degrade with warning |
| Malicious code with CAP_NET_ADMIN | Security | Container network namespace isolation limits impact; cannot affect host or other containers |

## Design Details

### Design Principle: Sidecar as the Egress Controller

A key design decision is that **all egress control logic resides in a dedicated sidecar** that shares the sandbox network namespace. The application container keeps its default privileges (no NET_ADMIN). This approach provides:

1. **Runtime Agnostic**: The same sidecar pattern works for Docker (network_mode: container) and Kubernetes (same Pod).
2. **Zero App-Container Privilege Elevation**: NET_ADMIN is confined to the sidecar; the application container runs unprivileged.
3. **Consistent Behavior**: Users get identical egress control behavior regardless of runtime when they opt into the sidecar.
4. **Operational Separation**: Network policy configuration, logging, and debugging are isolated in the sidecar; application image remains unchanged.

```
┌──────────────────────────────────────────────────────────────────┐
│                     OpenSandbox Server                           │
│  ┌─────────────────────┐    ┌───────────────────┐                │
│  │ DockerSandboxService│    │ K8sSandboxService │                │
│  └─────────┬───────────┘    └────────┬──────────┘                │
│            │                         │                           │
│            │ Pass network_policy     │ Pass network_policy       │
│            │ via env/config          │ via env/config            │
│            └───────────┬─────────────┘                           │
└────────────────────────┼─────────────────────────────────────────┘
                         ▼
┌──────────────────────────────────────────────────────────────────┐
│                    Sandbox (shared net namespace)                │
│                                                                  │
│  ┌───────────────────┐      ┌────────────────────────────────┐   │
│  │ Application       │      │ Egress Sidecar (NET_ADMIN)     │   │
│  │ Container         │      │ - DNS Proxy (Layer 1)          │   │
│  │ (no NET_ADMIN)    │      │ - Network Filter (Layer 2)     │   │
│  └─────────┬─────────┘      │ - Capability Detection         │   │
│            │ DNS Query      └────────────────────────────────┘   │
│            ▼                                                     │
│        (shared netns)                                            │
│                                                                  │
└──────────────────────────────────────────────────────────────────┘
```

### API Schema

Extension to `specs/sandbox-lifecycle.yml`:

```yaml
components:
  schemas:
    NetworkPolicy:
      type: object
      properties:
        egress:
          type: array
          items:
            $ref: '#/components/schemas/EgressRule'
        defaultAction:
          type: string
          enum: [allow, deny]
          default: deny
          description: Default action when no rules match
        require_full_isolation:
          type: boolean
          default: false
          description: If true, sandbox creation fails when network-layer enforcement is unavailable

    EgressRule:
      type: object
      required:
        - action
        - target
      properties:
        action:
          type: string
          enum: [allow, deny]
        target:
          type: string
          description: |
            Destination specification. Supports multiple formats:
            - FQDN: "api.github.com"
            - Wildcard domain: "*.pypi.org"
            - IP address: "10.0.0.5"
            - CIDR block: "10.0.0.0/8"
            
            Note: IP/CIDR rules require Layer 2 (nftables) to be effective.
            In dns-only mode, IP/CIDR rules will be ignored with a warning.

    CreateSandboxRequest:
      # ... existing fields ...
      properties:
        network_policy:
          $ref: '#/components/schemas/NetworkPolicy'
```

**SDK Usage Example (Python)**:

```python
from opensandbox import Sandbox, NetworkPolicy, EgressRule

sandbox = await Sandbox.create(
    image="python:3.11",
    network_policy=NetworkPolicy(
        egress=[
            # Domain rules (handled by DNS Proxy)
            EgressRule(action="allow", target="api.github.com"),
            EgressRule(action="allow", target="*.pypi.org"),
            
            # IP/CIDR rules (handled by nftables directly)
            EgressRule(action="allow", target="10.0.0.5"),       # Single IP
            EgressRule(action="allow", target="10.96.0.0/12"),   # K8s Service CIDR
        ],
        defaultAction="deny",
    ),
)
```

### Architecture Overview

```
┌─────────────────────────────────────────────────────────────────────────────┐
│                              Server (Python)                                │
│  ┌──────────────────────────────────────────────────────────────────────┐   │
│  │ CreateSandboxRequest                                                 │   │
│  │   network_policy:                                                    │   │
│  │     egress:                                                          │   │
│  │       - {action: allow, target: "api.github.com"}                    │   │
│  │       - {action: allow, target: "*.pypi.org"}                        │   │
│  └───────────────────────────────┬──────────────────────────────────────┘   │
│                                  │                                          │
│                                  ▼                                          │
│  ┌──────────────────────────────────────────────────────────────────────┐   │
│  │ DockerSandboxService / K8sSandboxService                             │   │
│  │   1. Start egress sidecar (CAP_NET_ADMIN) + app container (shared ns) │   │
│  │   2. Inject OPENSANDBOX_EGRESS_TOKEN into sidecar                    │   │
│  │   3. (Optional) Seed policy from env OPENSANDBOX_EGRESS_RULES        │   │
│  │   4. Wait for sidecar /healthz = 200                                 │   │
│  │   5. POST network_policy to /policy with header                      │   │
│  │      "OPENSANDBOX-EGRESS-AUTH: <token>"                              │   │
│  └───────────────────────────────┬──────────────────────────────────────┘   │
└──────────────────────────────────┼──────────────────────────────────────────┘
                                   │
                                   ▼
┌─────────────────────────────────────────────────────────────────────────────┐
│                           Sandbox (shared netns)                             │
│  ┌──────────────────────────────────────────────────────────────────────┐   │
│  │ Egress Sidecar (NET_ADMIN)                                           │   │
│  │   1. Load optional bootstrap from OPENSANDBOX_EGRESS_RULES (else deny-all)│   │
│  │   2. Accept updates via HTTP /policy (with auth header)              │   │
│  │   3. Start DNS Proxy on 127.0.0.1:15353 (non-privileged port)        │   │
│  │   4. Setup iptables REDIRECT 53→15353 (CAP_NET_ADMIN)                │   │
│  │   5. Probe nftables capability (fallback to dns-only)                │   │
│  │   6. Initialize network filter if available                          │   │
│  └──────────────────────────────────────────────────────────────────────┘   │
└─────────────────────────────────────────────────────────────────────────────┘
```

### Layer 1: DNS Proxy

The DNS proxy runs inside the egress sidecar and handles all DNS queries from the sandbox shared network namespace.

#### Listening Address Selection

With the iptables REDIRECT approach, the DNS proxy binds to a **non-privileged port** (15353) and iptables redirects traffic from port 53:

| Approach | Port | Privilege Needed | Notes |
|----------|------|-----------------|-------|
| iptables REDIRECT | `127.0.0.1:15353` | CAP_NET_ADMIN | ✅ **Recommended** - works without root |
| Direct binding | `127.0.0.1:53` | root user | ❌ Requires root to bind privileged port |
| Modify resolv.conf | `127.0.0.1:53` | writable resolv.conf | ❌ Not always writable (K8s, hardened) |

> **Note**: By using iptables REDIRECT, we avoid needing root to bind to port 53, and avoid needing to modify `/etc/resolv.conf`. All DNS traffic to port 53 is transparently redirected to our proxy on port 15353.

#### Startup Sequence

```go
func (p *DNSProxy) Start() error {
    // 1. Bind to non-privileged port (doesn't require root)
    addr := "127.0.0.1:15353"
    server := &dns.Server{Addr: addr, Net: "udp", Handler: p}
    
    go func() {
        if err := server.ListenAndServe(); err != nil {
            logs.Error("[dns] proxy server error: %v", err)
        }
    }()
    
    p.server = server
    return nil
}

func (c *Controller) setupIptablesRedirect() error {
    // 2. Setup iptables REDIRECT (requires CAP_NET_ADMIN, NOT root)
    rules := [][]string{
        {"iptables", "-t", "nat", "-A", "OUTPUT", "-p", "udp",
         "--dport", "53", "-j", "REDIRECT", "--to-port", "15353"},
        {"iptables", "-t", "nat", "-A", "OUTPUT", "-p", "tcp",
         "--dport", "53", "-j", "REDIRECT", "--to-port", "15353"},
    }
    
    for _, args := range rules {
        if output, err := exec.Command(args[0], args[1:]...).CombinedOutput(); err != nil {
            return fmt.Errorf("%v failed: %w (output: %s)", args, err, output)
        }
    }
    return nil
}
```

#### Discovering Upstream DNS Servers

The DNS Proxy needs to know where to forward allowed queries. It reads the container's original `/etc/resolv.conf` to discover upstream DNS servers:

```go
func (p *DNSProxy) initUpstream() error {
    // Read resolv.conf to discover upstream DNS servers
    content, err := os.ReadFile("/etc/resolv.conf")
    if err != nil {
        // Use fallback DNS servers if resolv.conf is unreadable
        p.upstream = []string{"8.8.8.8:53", "1.1.1.1:53"}
        return nil
    }
    
    // Parse nameserver entries
    p.upstream = parseNameservers(content)
    if len(p.upstream) == 0 {
        p.upstream = []string{"8.8.8.8:53", "1.1.1.1:53"}
    }
    return nil
}
```

> **Note**: With iptables REDIRECT, we don't modify `/etc/resolv.conf`. We only read it to discover upstream servers.

#### DNS Interception via iptables

**Simplified Approach (CAP_NET_ADMIN only)**:

With the simplified design, we only use iptables REDIRECT. The logic is straightforward:

```go
func (c *Controller) setupNetworkPolicy() error {
    // Setup iptables REDIRECT (requires CAP_NET_ADMIN)
    if err := c.setupIptablesRedirect(); err != nil {
        // Graceful degradation: log warning but don't fail
        logs.Warn("[egress] iptables setup failed: %v", err)
        logs.Warn("[egress] network_policy will NOT be enforced")
        logs.Warn("[egress] ensure container has CAP_NET_ADMIN capability")
        
        // Continue running sidecar (other functionality still works)
        // The sandbox still works, just without network policy enforcement
        return nil
    }
    
    logs.Info("[egress] network policy active (iptables REDIRECT mode)")
    return nil
}
```

**Key Design Decision**: Always graceful degradation. If iptables fails:
- Log clear warning messages
- Continue running sidecar (other functionality still works)
- User can see via logs/status that policy is not enforced
- No error thrown, no container crash

> **Note**: The `require_full_isolation` field in the API schema allows users to **opt-in** to strict mode where sandbox creation fails if policy cannot be enforced. But the default is graceful degradation.

**Implementation Details**:

```go
// pkg/egress/dns/proxy.go

package dns

import (
    "net"
    "strings"
    "sync"
    "time"

    "github.com/miekg/dns"
)

type DNSProxy struct {
    policy       *NetworkPolicy
    upstream     string              // e.g., "8.8.8.8:53"
    server       *dns.Server
    resolvedIPs  sync.Map            // domain -> []ResolvedIP
    onIPLearned  func(domain string, ips []net.IP)
}

// ResolvedIP tracks IPs learned from DNS queries
type ResolvedIP struct {
    IP net.IP
}

func (p *DNSProxy) ServeDNS(w dns.ResponseWriter, r *dns.Msg) {
    if len(r.Question) == 0 {
        p.refuseQuery(w, r)
        return
    }

    domain := strings.TrimSuffix(r.Question[0].Name, ".")
    
    // Always allow localhost
    if p.isLocalhost(domain) {
        p.forwardQuery(w, r)
        return
    }

    // Check policy
    action := p.policy.Evaluate(domain)
    if action == ActionDeny {
        p.logDenied(domain)
        p.respondNXDomain(w, r)
        return
    }

    // Forward to upstream and learn IPs
    resp, err := p.forwardAndLearn(r, domain)
    if err != nil {
        p.respondServerFailure(w, r)
        return
    }

    w.WriteMsg(resp)
}

func (p *DNSProxy) forwardAndLearn(r *dns.Msg, domain string) (*dns.Msg, error) {
    client := &dns.Client{Timeout: 5 * time.Second}
    resp, _, err := client.Exchange(r, p.upstream)
    if err != nil {
        return nil, err
    }

    // Extract IPs from response
    var ips []net.IP
    for _, rr := range resp.Answer {
        switch v := rr.(type) {
        case *dns.A:
            ips = append(ips, v.A)
        case *dns.AAAA:
            ips = append(ips, v.AAAA)
        }
    }

    // Store resolved IPs and notify network filter
    if len(ips) > 0 {
        p.storeResolvedIPs(domain, ips)
        
        // Notify network filter layer
        if p.onIPLearned != nil {
            p.onIPLearned(domain, ips)
        }
    }

    return resp, nil
}

func (p *DNSProxy) respondNXDomain(w dns.ResponseWriter, r *dns.Msg) {
    m := new(dns.Msg)
    m.SetRcode(r, dns.RcodeNameError)
    m.Authoritative = true
    w.WriteMsg(m)
}
```

**Policy Matching**:

```go
// pkg/egress/policy.go

type NetworkPolicy struct {
    Egress        []EgressRule `json:"egress"`
    DefaultAction Action       `json:"defaultAction"`
}

type TargetType int

const (
    TargetTypeDomain TargetType = iota  // FQDN or wildcard
    TargetTypeIP                         // Single IP address
    TargetTypeCIDR                       // CIDR block
)

type EgressRule struct {
    Action Action `json:"action"`
    Target string `json:"target"`
    
    // Parsed target (internal)
    targetType  TargetType
    domainRegex *regexp.Regexp  // for TargetTypeDomain
    ip          net.IP          // for TargetTypeIP
    cidr        *net.IPNet      // for TargetTypeCIDR
}

func (r *EgressRule) Parse() error {
    // Try CIDR first
    if _, cidr, err := net.ParseCIDR(r.Target); err == nil {
        r.targetType = TargetTypeCIDR
        r.cidr = cidr
        return nil
    }
    
    // Try single IP
    if ip := net.ParseIP(r.Target); ip != nil {
        r.targetType = TargetTypeIP
        r.ip = ip
        return nil
    }
    
    // Treat as domain (FQDN or wildcard)
    r.targetType = TargetTypeDomain
    return nil
}

func (p *NetworkPolicy) Evaluate(domain string) Action {
    domain = strings.ToLower(domain)
    
    for _, rule := range p.Egress {
        if rule.MatchesDomain(domain) {
            return rule.Action
        }
    }
    
    return p.DefaultAction
}

func (r *EgressRule) MatchesDomain(domain string) bool {
    if r.targetType != TargetTypeDomain {
        return false
    }
    
    pattern := strings.ToLower(r.Target)
    domain = strings.ToLower(domain)
    
    // Exact match
    if pattern == domain {
        return true
    }
    
    // Wildcard match: *.example.com matches foo.example.com, bar.example.com
    if strings.HasPrefix(pattern, "*.") {
        suffix := pattern[1:] // ".example.com"
        return strings.HasSuffix(domain, suffix) || domain == pattern[2:]
    }
    
    return false
}

func (r *EgressRule) MatchesIP(ip net.IP) bool {
    switch r.targetType {
    case TargetTypeIP:
        return r.ip.Equal(ip)
    case TargetTypeCIDR:
        return r.cidr.Contains(ip)
    default:
        return false
    }
}
```

**Static IP/CIDR Rules Initialization**:

At startup, the controller parses all rules and adds static IP/CIDR entries directly to nftables:

```go
func (c *Controller) initializeStaticRules() error {
    for _, rule := range c.policy.Egress {
        if err := rule.Parse(); err != nil {
            return err
        }
        
        if rule.Action != ActionAllow {
            continue
        }
        
        switch rule.targetType {
        case TargetTypeIP:
            if c.netFilter != nil {
                c.netFilter.AddAllowedIPs([]net.IP{rule.ip})
                logs.Info("[egress] static IP allowed: %s", rule.ip)
            } else {
                logs.Warn("[egress] IP rule %s ignored (nftables unavailable)", rule.Target)
            }
            
        case TargetTypeCIDR:
            if c.netFilter != nil {
                c.netFilter.AddAllowedCIDR(rule.cidr)
                logs.Info("[egress] static CIDR allowed: %s", rule.cidr)
            } else {
                logs.Warn("[egress] CIDR rule %s ignored (nftables unavailable)", rule.Target)
            }
        }
    }
    return nil
}
```

### Layer 2: Network Filter

When `CAP_NET_ADMIN` is available, the sidecar sets up kernel-level packet filtering.

**nftables Implementation**:

```go
// pkg/egress/netfilter/nftables.go

package netfilter

import (
    "net"
    "sync"

    "github.com/google/nftables"
    "github.com/google/nftables/expr"
)

type NftablesFilter struct {
    conn       *nftables.Conn
    table      *nftables.Table
    chain      *nftables.Chain
    allowedSet *nftables.Set
    mu         sync.Mutex
}

func NewNftablesFilter() (*NftablesFilter, error) {
    conn, err := nftables.New()
    if err != nil {
        return nil, err
    }

    f := &NftablesFilter{conn: conn}
    if err := f.initialize(); err != nil {
        conn.CloseLasting()
        return nil, err
    }

    return f, nil
}

func (f *NftablesFilter) initialize() error {
    // Create table
    f.table = &nftables.Table{
        Family: nftables.TableFamilyIPv4,
        Name:   "opensandbox_egress",
    }
    f.conn.AddTable(f.table)

    // Create set for allowed IPs
    f.allowedSet = &nftables.Set{
        Table:   f.table,
        Name:    "allowed_ips",
        KeyType: nftables.TypeIPAddr,
    }
    if err := f.conn.AddSet(f.allowedSet, nil); err != nil {
        return err
    }

    // Create output chain with default drop
    f.chain = &nftables.Chain{
        Name:     "output",
        Table:    f.table,
        Type:     nftables.ChainTypeFilter,
        Hooknum:  nftables.ChainHookOutput,
        Priority: nftables.ChainPriorityFilter,
        Policy:   nftables.ChainPolicyPtr(nftables.ChainPolicyDrop),
    }
    f.conn.AddChain(f.chain)

    // Allow localhost
    f.addLocalhostRules()

    // Allow established connections
    f.addEstablishedRule()

    // Allow IPs in the allowed set
    f.conn.AddRule(&nftables.Rule{
        Table: f.table,
        Chain: f.chain,
        Exprs: []expr.Any{
            // Match destination IP in allowed_ips set
            &expr.Payload{
                DestRegister: 1,
                Base:         expr.PayloadBaseNetworkHeader,
                Offset:       16, // dst IP offset in IPv4
                Len:          4,
            },
            &expr.Lookup{
                SourceRegister: 1,
                SetName:        f.allowedSet.Name,
            },
            &expr.Verdict{Kind: expr.VerdictAccept},
        },
    })

    // Block DoH (known providers on port 443)
    f.blockDoHProviders()

    // Block DoT (port 853)
    f.blockPort(853)

    return f.conn.Flush()
}

func (f *NftablesFilter) AddAllowedIPs(ips []net.IP) error {
    f.mu.Lock()
    defer f.mu.Unlock()

    elements := make([]nftables.SetElement, 0, len(ips))
    for _, ip := range ips {
        if ipv4 := ip.To4(); ipv4 != nil {
            elements = append(elements, nftables.SetElement{Key: ipv4})
        }
    }

    if len(elements) == 0 {
        return nil
    }

    if err := f.conn.SetAddElements(f.allowedSet, elements); err != nil {
        return err
    }

    return f.conn.Flush()
}

func (f *NftablesFilter) AddAllowedCIDR(cidr *net.IPNet) error {
    f.mu.Lock()
    defer f.mu.Unlock()

    // nftables supports prefix matching via interval sets
    // Add the CIDR as a prefix rule
    f.conn.AddRule(&nftables.Rule{
        Table: f.table,
        Chain: f.chain,
        Exprs: []expr.Any{
            // Match destination IP in CIDR range
            &expr.Payload{
                DestRegister: 1,
                Base:         expr.PayloadBaseNetworkHeader,
                Offset:       16, // dst IP offset in IPv4
                Len:          4,
            },
            &expr.Bitwise{
                SourceRegister: 1,
                DestRegister:   1,
                Len:            4,
                Mask:           cidr.Mask,
                Xor:            []byte{0, 0, 0, 0},
            },
            &expr.Cmp{
                Op:       expr.CmpOpEq,
                Register: 1,
                Data:     cidr.IP.To4(),
            },
            &expr.Verdict{Kind: expr.VerdictAccept},
        },
    })

    return f.conn.Flush()
}
```

### Capability Detection and Graceful Degradation

```go
// pkg/egress/controller.go

package egress

import (
    "errors"
    "syscall"

    "github.com/beego/beego/v2/core/logs"
)

type EnforcementMode int

const (
    ModeDisabled   EnforcementMode = iota // No network_policy configured
    ModeDNSOnly                           // DNS filtering only (soft limit)
    ModeNftables                          // DNS + nftables (full isolation)
)

func (m EnforcementMode) String() string {
    return [...]string{"disabled", "dns-only", "dns+nftables"}[m]
}

func (m EnforcementMode) IsFullIsolation() bool {
    return m == ModeNftables
}

type Controller struct {
    mode        EnforcementMode
    policy      *NetworkPolicy
    dnsProxy    *DNSProxy
    netFilter   NetFilter
}

type NetFilter interface {
    AddAllowedIPs(ips []net.IP) error
    AddAllowedCIDR(cidr *net.IPNet) error  // For static CIDR rules
    Close() error
}

func NewController(policy *NetworkPolicy) (*Controller, error) {
    ctrl := &Controller{policy: policy}

    // No policy = default deny-all fallback
    // - DNS proxy still runs with deny-all baseline
    // - No resolv.conf modification
    // - No network filtering if nftables unavailable
    // - External access denied unless rules are provided
    if policy == nil || len(policy.Egress) == 0 {
        ctrl.mode = ModeDNSOnly
        ctrl.policy = &NetworkPolicy{DefaultAction: ActionDeny}
        logs.Info("[egress] no network_policy configured; enforcing default deny-all")
    }

    // Probe capabilities in order of preference
    mode, netFilter := ctrl.probeCapabilities()
    ctrl.mode = mode
    ctrl.netFilter = netFilter

    // Fail if full isolation is required but unavailable
    if policy.RequireFullIsolation && !mode.IsFullIsolation() {
        return nil, errors.New("network_policy.require_full_isolation is true but CAP_NET_ADMIN is not available")
    }

    // Start DNS proxy
    dnsProxy, err := NewDNSProxy(policy)
    if err != nil {
        return nil, err
    }
    ctrl.dnsProxy = dnsProxy

    // Wire DNS proxy to network filter
    if netFilter != nil {
        dnsProxy.onIPLearned = func(domain string, ips []net.IP) {
            if err := netFilter.AddAllowedIPs(ips); err != nil {
                logs.Warn("[egress] failed to add IPs to filter: %v", err)
            }
        }
    }

    logs.Info("[egress] control mode: %s", mode)
    if !mode.IsFullIsolation() {
        logs.Warn("[egress] running in dns-only mode; direct IP connections can bypass policy")
        logs.Warn("[egress] for full isolation, run container with CAP_NET_ADMIN")
    }

    return ctrl, nil
}

func (c *Controller) probeCapabilities() (EnforcementMode, NetFilter) {
    // Try nftables for Layer 2 network filtering
    if nft, err := NewNftablesFilter(); err == nil {
        logs.Debug("[egress] nftables probe succeeded")
        return ModeNftables, nft
    } else {
        logs.Debug("[egress] nftables probe failed: %v", err)
    }

    // Fallback to DNS-only (no Layer 2 protection)
    return ModeDNSOnly, nil
}

func isPermissionError(err error) bool {
    var errno syscall.Errno
    if errors.As(err, &errno) {
        return errno == syscall.EPERM || errno == syscall.EACCES
    }
    return false
}

func (c *Controller) Mode() EnforcementMode {
    return c.mode
}

func (c *Controller) Start() error {
    if c.mode == ModeDisabled {
        return nil
    }
    return c.dnsProxy.Start()
}

func (c *Controller) Stop() error {
    if c.dnsProxy != nil {
        c.dnsProxy.Stop()
    }
    if c.netFilter != nil {
        c.netFilter.Close()
    }
    return nil
}
```

### Enforcement Modes

| Mode | DNS Filtering | Network Filtering | Bypass Possible | Privilege Required |
|------|--------------|-------------------|-----------------|-------------------|
| `disabled` | No | No | N/A | None |
| `dns-only` | Yes (iptables REDIRECT) | No | Yes (direct IP, DoH) | `CAP_NET_ADMIN` (if absent → falls back to `disabled` with warning) |
| `dns+nftables` | Yes (iptables REDIRECT) | Yes (nftables) | No | `CAP_NET_ADMIN` |

> **Note**: All enforcement modes (except `disabled`) require `CAP_NET_ADMIN` for iptables REDIRECT. The difference is whether nftables-based network filtering is available for full isolation.

### Cross-Platform Support

The implementation uses Go build tags to provide platform-specific implementations while maintaining a unified interface.

| Component | Linux | Windows | macOS | Notes |
|-----------|-------|---------|-------|-------|
| DNS Proxy Server | ✅ `miekg/dns` | ✅ `miekg/dns` | ✅ `miekg/dns` | Pure Go, cross-platform |
| Policy Matching | ✅ | ✅ | ✅ | Pure logic, no OS deps |
| DNS Interception | iptables REDIRECT | netsh / WFP (future) | pf (future) | Platform-specific |
| Network Filter | nftables | WFP (future) | pf (future) | Platform-specific |

**Implementation Strategy**:

```go
// pkg/egress/interception/interception.go
// Platform-specific DNS interception via build tags

//go:build linux
func SetupDNSInterception(proxyPort int) error {
    // iptables REDIRECT - requires CAP_NET_ADMIN, not root
    rules := [][]string{
        {"iptables", "-t", "nat", "-A", "OUTPUT", "-p", "udp",
         "--dport", "53", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", proxyPort)},
        {"iptables", "-t", "nat", "-A", "OUTPUT", "-p", "tcp",
         "--dport", "53", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", proxyPort)},
    }
    for _, args := range rules {
        if err := exec.Command(args[0], args[1:]...).Run(); err != nil {
            return err
        }
    }
    return nil
}

//go:build windows
func SetupDNSInterception(proxyPort int) error {
    // Windows: fallback to netsh for now (future: WFP)
    return exec.Command("netsh", "interface", "ip", "set", "dns",
        "name=Ethernet", "static", "127.0.0.1").Run()
}
```

**Phased Platform Support**:

| Phase | Platform | Layer 1 | Layer 2 | Priority |
|-------|----------|---------|---------|----------|
| 1 | Linux | ✅ DNS Proxy | ✅ nftables | High (production) |
| 2 | Windows | ✅ DNS Proxy | ❌ DNS-only | Medium (Windows containers) |
| 3 | Windows | ✅ DNS Proxy | ✅ WFP | Low (full Windows support) |
| 4 | macOS | ✅ DNS Proxy | ❌ DNS-only | Low (dev environment) |

**Windows Platform Notes**:

The simplified CAP_NET_ADMIN approach is Linux-specific. Windows containers require different handling:

| Aspect | Linux | Windows |
|--------|-------|---------|
| Network Filter | iptables (CAP_NET_ADMIN) | Windows Filtering Platform (WFP) |
| DNS Config | Not needed (iptables REDIRECT) | netsh / Registry |
| Privilege Model | CAP_NET_ADMIN capability | Administrator privilege |

**Windows Strategy** (Future work):
- Use WFP APIs for network filtering (requires Administrator)
- DNS proxy with netsh configuration as fallback
- Windows container support is lower priority (Phase 3+)

### Simplified Privilege Model: CAP_NET_ADMIN confined to Sidecar

The sidecar holds the only elevated capability (`CAP_NET_ADMIN`) needed for iptables/nftables. The application container runs with its original user and no added capabilities. No `resolv.conf` modification is required; DNS is intercepted transparently in the shared network namespace.

#### Deployment Flow (Docker)

- Create an egress sidecar container with `--cap-add=NET_ADMIN` (no root required).
- Run the application container with `--network container:<sidecar>` so they share one network namespace.
- Sidecar starts DNS proxy on `127.0.0.1:15353`, installs iptables REDIRECT 53→15353, probes nftables.
- Server waits for sidecar `/healthz` 200, then POSTs the declared sandbox network policy to sidecar `/policy`
  with header `OPENSANDBOX-EGRESS-AUTH: <token>` (token injected via env `OPENSANDBOX_EGRESS_TOKEN`).
- No `OPENSANDBOX_NETWORK_POLICY` env/config injection path is used anymore.

#### Deployment Flow (Kubernetes)

- Pod spec includes two containers: `egress-sidecar` (with `capabilities.add: [NET_ADMIN]`) and the application container (no extra caps).
- Both containers share the pod network namespace by default; sidecar listens on `127.0.0.1:15353`.
- Server (inside cluster) waits for sidecar `/healthz` 200 on the Pod IP, then POSTs the sandbox `networkPolicy`
  to `/policy` with `OPENSANDBOX-EGRESS-AUTH` header. Token comes from `OPENSANDBOX_EGRESS_TOKEN` env on the sidecar.
- HostNetwork + network_policy is rejected.

#### Behavior When CAP_NET_ADMIN Is Unavailable

- Sidecar logs a warning and disables enforcement; application container still runs unprivileged.
- No resolv.conf fallback is attempted.

#### Sidecar Network Setup

```go
// pkg/egress/controller.go

func (c *Controller) Start() error {
    if c.policy == nil || len(c.policy.Egress) == 0 {
        logs.Info("[egress] no network_policy, enforcing default deny-all")
    }

    // Start DNS Proxy on non-privileged port (no root needed)
    c.dnsProxy = NewDNSProxy(c.policy, "127.0.0.1:15353")
    if err := c.dnsProxy.Start(); err != nil {
        return fmt.Errorf("failed to start DNS proxy: %w", err)
    }

    // Setup iptables REDIRECT (requires CAP_NET_ADMIN, NOT root)
    if err := c.setupIptablesRedirect(); err != nil {
        logs.Warn("[egress] iptables setup failed: %v", err)
        logs.Warn("[egress] network_policy will NOT be enforced")
        logs.Warn("[egress] ensure sidecar has CAP_NET_ADMIN capability")
        return nil  // Continue running sidecar (other functionality still works)
    }

    logs.Info("[egress] network policy active (iptables REDIRECT mode)")
    return nil
}

func (c *Controller) setupIptablesRedirect() error {
    rules := [][]string{
        {"iptables", "-t", "nat", "-A", "OUTPUT", "-p", "udp",
         "--dport", "53", "-j", "REDIRECT", "--to-port", "15353"},
        {"iptables", "-t", "nat", "-A", "OUTPUT", "-p", "tcp",
         "--dport", "53", "-j", "REDIRECT", "--to-port", "15353"},
    }

    for _, args := range rules {
        cmd := exec.Command(args[0], args[1:]...)
        if output, err := cmd.CombinedOutput(); err != nil {
            return fmt.Errorf("%v failed: %w (output: %s)", args, err, output)
        }
    }
    return nil
}
```

#### Why iptables Works Without Root

| Requirement | Traditional (resolv.conf) | Simplified (iptables) |
|-------------|--------------------------|----------------------|
| Root user | ✅ Required | ❌ Not required |
| CAP_NET_ADMIN | Optional | ✅ Required |
| Modify filesystem | ✅ /etc/resolv.conf | ❌ No |
| K8s PSS compatible | ❌ restricted prohibits root | ⚠️ baseline allows capabilities |

The key insight is that `CAP_NET_ADMIN` grants permission to modify network configuration (including iptables rules) **regardless of the user ID**. A non-root user with CAP_NET_ADMIN can successfully run iptables commands.

### Component Changes

#### 1. Server (`server/`)

**`server/src/api/schema.py`**: Add `NetworkPolicy` schema classes.

**`server/src/services/docker.py`** (sidecar pattern):
- Create an egress sidecar container when `network_policy` is present.
- Add `CAP_NET_ADMIN` only to the sidecar.
- Set `OPENSANDBOX_EGRESS_TOKEN` env (random per-sandbox) and optionally `OPENSANDBOX_EGRESS_HTTP_ADDR`.
- Run the application container with `network_mode: "container:<sidecar>"` (shared netns), no extra caps.
- Wait for sidecar `/healthz` 200, then POST `networkPolicy` to `/policy` with header `OPENSANDBOX-EGRESS-AUTH: <token>`.
- Reject `--network host` when `network_policy` is set (hostNetwork not supported).

**`server/src/services/k8s/batchsandbox_provider.py`** (Pod pattern):
- Pod spec includes `egress-sidecar` with `capabilities.add: [NET_ADMIN]` and the application container without extra caps.
- Sidecar env includes `OPENSANDBOX_EGRESS_TOKEN` (and `OPENSANDBOX_EGRESS_HTTP_ADDR` if non-default); may optionally seed `OPENSANDBOX_EGRESS_RULES`.
- Server (inside cluster) waits for `/healthz` on the Pod IP, then POSTs `networkPolicy` to `/policy` with header `OPENSANDBOX-EGRESS-AUTH`.
- Reject `hostNetwork=true` when `network_policy` is set.

#### 2. Sidecar Implementation

New packages:
- `pkg/egress/` - Main controller
- `pkg/egress/dns/` - DNS proxy implementation
- `pkg/egress/policy/` - Policy parsing and matching
- `pkg/egress/netfilter/` - nftables/iptables implementation

**Startup integration** (`main.go` or `bootstrap.sh`):

```go
func main() {
    ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
    defer cancel()

    initial, _ := dnsproxy.LoadPolicyFromEnvVar("OPENSANDBOX_EGRESS_RULES")
    proxy, err := dnsproxy.New(initial, "") // start default deny-all if nil
    if err != nil { log.Fatal(err) }
    go startPolicyServer(ctx, proxy, os.Getenv("OPENSANDBOX_EGRESS_HTTP_ADDR"), os.Getenv("OPENSANDBOX_EGRESS_TOKEN"))

    if err := proxy.Start(ctx); err != nil { log.Fatal(err) }
    if err := iptables.SetupRedirect(15353); err != nil { log.Fatal(err) }

    <-ctx.Done()
}
```

#### 3. SDKs

**Python SDK** (`sdks/sandbox/python/`):

```python
# opensandbox/models.py

from typing import List, Literal
from pydantic import BaseModel

class EgressRule(BaseModel):
    action: Literal["allow", "deny"]
    target: str  # FQDN, wildcard, IP, or CIDR (e.g., "*.pypi.org", "10.0.0.0/8")

class NetworkPolicy(BaseModel):
    egress: List[EgressRule]
    defaultAction: Literal["allow", "deny"] = "deny"
    require_full_isolation: bool = False
```

#### 4. Specs

Update `specs/sandbox-lifecycle.yml` with the schema defined in [API Schema](#api-schema).

## Test Plan

### Unit Tests

| Test Case | Description |
|-----------|-------------|
| Policy parsing | Valid/invalid policy JSON parsing |
| Domain matching | Exact match, wildcard match, case insensitivity |
| Target parsing | FQDN, wildcard, IP, CIDR format detection |
| DNS response handling | IP extraction and learning |

### Integration Tests

| Test Case | Description |
|-----------|-------------|
| DNS proxy blocks denied domain | Query for denied domain returns NXDOMAIN |
| DNS proxy allows permitted domain | Query for allowed domain returns real IPs |
| Network filter blocks direct IP | `curl http://<ip>` fails when domain not allowed |
| Graceful degradation | System works in dns-only mode without CAP_NET_ADMIN |
| Enforcement mode observable | `/status` API returns correct mode |

### E2E Tests

| Test Case | Description |
|-----------|-------------|
| Python SDK with network_policy | Create sandbox with policy, verify curl behavior |
| Bypass attempt | Verify DoH/direct IP blocked with full isolation |
| Localhost access | Internal services (Jupyter, sidecar endpoints) still work |

## Drawbacks

1. **Increased Complexity**: Adds and operates a sidecar with multiple enforcement modes.
2. **Kernel Dependencies**: Full isolation requires nftables support in the kernel.
3. **DNS-only Limitations**: Security-conscious users must understand the bypass risks.
4. **Debugging Difficulty**: Network issues become harder to diagnose with filtering enabled.

## Alternatives

### Alternative 1: Sidecar Proxy (Envoy/mitmproxy)

**Approach**: Run a transparent proxy sidecar that intercepts all egress traffic.

**Pros**:
- L7 visibility (can inspect HTTP headers, TLS SNI)
- No kernel dependencies

**Cons**:
- Performance overhead (user-space proxy)
- TLS interception requires certificate injection
- Additional container resource usage
- Complex configuration

**Decision**: Rejected due to performance overhead and complexity for the common case.

### Alternative 2: External NetworkPolicy Controller (K8s only)

**Approach**: Generate Cilium/Calico NetworkPolicy CRDs instead of in-container enforcement.

**Pros**:
- Leverages existing K8s network policy infrastructure
- No container modifications needed

**Cons**:
- Kubernetes-only; doesn't work for Docker runtime
- Requires Cilium/Calico CNI with FQDN support
- Less portable
- Adds external dependencies and complexity
- Behavior may differ between runtimes

**Decision**: Rejected. The sidecar already provides a unified path across Docker and Kubernetes; adding an external NetworkPolicy controller would reintroduce runtime-specific dependencies.

### Alternative 3: LD_PRELOAD Hook

**Approach**: Inject a shared library that intercepts DNS-related libc calls.

**Pros**:
- Works without network privileges

**Cons**:
- Doesn't work with statically-linked binaries (Go, Rust)
- Fragile across different libc implementations
- Can be bypassed by direct syscalls

**Decision**: Rejected due to limited compatibility.

## Infrastructure Needed

- **Go Dependencies**:
  - `github.com/miekg/dns` - DNS server/client library (cross-platform)
  - `github.com/google/nftables` - nftables Go bindings (Linux only)
  - `golang.zx2c4.com/wireguard/windows` (future) - WFP bindings (Windows only)

- **Container Requirements**:

  | Requirement | When Needed | Notes |
  |-------------|-------------|-------|
  | `CAP_NET_ADMIN` | When `network_policy` specified | Enables iptables REDIRECT without root |
  | iptables binary | When `network_policy` specified | Usually present in Linux containers |
  | No filesystem write needed | N/A | iptables REDIRECT doesn't modify resolv.conf |

- **Build Requirements**:
  - Go 1.21+ with build tag support
  - Platform-specific files using `//go:build` tags following existing sidecar patterns

## Upgrade & Migration Strategy

### Backward Compatibility

- **Default baseline is deny-all**: egress sidecar enforces deny-all until explicit policy is provided.
- **Opt-in rules**: Users specify `network_policy` to open destinations (allow or explicit deny rules).
- **Graceful degradation**: If `CAP_NET_ADMIN` is unavailable, DNS interception may be skipped but default deny remains at the proxy layer.

**Behavior Matrix**:

| Scenario | DNS Proxy | iptables REDIRECT | CAP_NET_ADMIN | Network Filter | External Access |
|----------|-----------|-------------------|---------------|----------------|-----------------|
| No `network_policy` | ✅ On (:15353) | ⚠️ Attempted; warn if unavailable | ⚠️ Required for redirect | ⚠️ If capable | 🔒 Deny-all baseline |
| `network_policy` specified | ✅ On (:15353) | ✅ 53→15353 | ✅ Added | ⚡ If capable | 🔒 Policy-based |

### Migration Path

1. **Phase 1 (MVP)**: DNS Proxy with iptables REDIRECT for DNS interception
2. **Phase 2**: Add nftables-based network filtering (Layer 2) for full isolation

> **Note**: The same sidecar implementation works for both Docker and Kubernetes runtimes. No runtime-specific code paths are needed.

### Documentation Updates

- Add egress control section to SDK documentation
- Add security considerations page explaining enforcement modes
- Add troubleshooting guide for network policy issues


================================================
FILE: oseps/0002-kubernetes-sigs-agent-sandbox-support.md
================================================
---
title: kubernetes-sigs/agent-sandbox Support
authors:
  - "@jwx0925"
creation-date: 2026-01-23
last-updated: 2026-01-23
status: implemented
---

# OSEP-0002: kubernetes-sigs/agent-sandbox Support

<!-- toc -->
- [Summary](#summary)
- [Motivation](#motivation)
  - [Goals](#goals)
  - [Non-Goals](#non-goals)
- [Requirements](#requirements)
- [Proposal](#proposal)
  - [Notes/Constraints/Caveats](#notesconstraintscaveats)
  - [Risks and Mitigations](#risks-and-mitigations)
- [Design Details](#design-details)
- [Test Plan](#test-plan)
- [Drawbacks](#drawbacks)
- [Alternatives](#alternatives)
- [Infrastructure Needed](#infrastructure-needed)
- [Upgrade & Migration Strategy](#upgrade--migration-strategy)
<!-- /toc -->

## Summary

Add first-class support for `kubernetes-sigs/agent-sandbox` as a runtime backend
for OpenSandbox. This enables a Kubernetes-native sandbox lifecycle while
keeping the existing OpenSandbox SDK and API contract, and introduces a
dedicated ingress path for direct sandbox access.

## Motivation

OpenSandbox already provides a Kubernetes runtime roadmap and an SDK-first
experience, but users running `kubernetes-sigs/agent-sandbox` must currently
integrate it manually. A native integration unifies lifecycle management,
observability, and routing, letting teams adopt OpenSandbox without changing
their existing Kubernetes operational model.

### Goals

- Support creating, querying, and terminating sandboxes backed by
  `kubernetes-sigs/agent-sandbox` via the OpenSandbox server API.
- Provide two supported access paths:
  1) Biz -> OpenSandbox SDK -> OpenSandbox server -> K8s API server ->
     `agent-sandbox` pod.
  2) Biz -> OpenSandbox SDK -> OpenSandbox ingress
     (`components/ingress`) -> `agent-sandbox` pod.
- Preserve existing API and SDK behavior for non-agent-sandbox runtimes.

### Non-Goals

- Replacing or removing the existing Docker runtime.
- Implementing a full Kubernetes operator for OpenSandbox itself.
- Changing the sandbox lifecycle API or SDKs in a breaking way.

## Requirements

- Must use the existing OpenSandbox lifecycle API and SDKs without breaking
  changes.
- Must use the Kubernetes API server as the control plane for provisioning.
- Must support routing to sandbox pods through the existing ingress component.
- Must keep security posture aligned with current OpenSandbox auth and
  isolation requirements.

## Proposal

Introduce an `agent-sandbox` runtime implementation in the OpenSandbox server
that provisions sandboxes by creating and managing
`kubernetes-sigs/agent-sandbox` resources (and their resulting pods) through the
Kubernetes API server. The server remains the source of truth for sandbox
lifecycle and uses K8s-native status signals for reconciliation.

Implementation approach: extend the server with a new `agent-sandbox`
`SandboxService` implementation that reuses the existing Kubernetes helper code
in `server/services/k8s` as much as possible, since both flows submit resources
to the Kubernetes API server.

For access, support two primary chains:

1. Lifecycle API path
   - Biz -> OpenSandbox SDK -> OpenSandbox server -> K8s API server ->
     `agent-sandbox` pod
2. Direct ingress path
   - Biz -> OpenSandbox SDK -> OpenSandbox ingress
     (`components/ingress`) -> `agent-sandbox` pod

Both paths should expose the same sandbox endpoints (exec, file operations,
metrics) while allowing ingress routing policies to be configured per cluster.

```mermaid
flowchart LR
    A[Biz] --> B[OpenSandbox SDK]
    B --> C[OpenSandbox Server]
    C --> D[K8s API Server]
    D --> E[agent-sandbox Controller]
    E --> F[Sandbox Pod]
```

```mermaid
flowchart LR
    A[Biz] --> B[OpenSandbox SDK]
    B --> C[OpenSandbox Ingress]
    C --> D[Sandbox Pod]
```

### Notes/Constraints/Caveats

- The `agent-sandbox` controller lifecycle and CRD schema are external; the
  integration must track upstream changes.
- Sandbox pod images must include `execd` (or use an init/sidecar injection
  strategy consistent with existing runtimes).

### Risks and Mitigations

- Risk: K8s API latency or controller reconciliation delays cause slower
  sandbox readiness. Mitigation: asynchronous provisioning with explicit
  readiness checks and timeouts.
- Risk: CRD or API changes in `kubernetes-sigs/agent-sandbox` break integration.
  Mitigation: versioned runtime adapter and compatibility matrix in docs.
- Risk: ingress routing misconfiguration exposes pods. Mitigation: enforce
  namespace scoping, label selectors, and explicit port allowlists.

## Design Details

### Runtime Type and Configuration
- Add a new runtime type in server config, e.g. `runtime.type = agent-sandbox`.
- New config fields:
  - `runtime.kubernetes.kubeconfig` (optional; in-cluster supported)
  - `runtime.kubernetes.namespace`
  - `runtime.agent_sandbox.template` (CRD spec template or defaults)
  - `runtime.agent_sandbox.execd_mode` (embedded image vs init/sidecar)
  - `runtime.agent_sandbox.ingress_enabled` (default true)

### Lifecycle Flow
1. `POST /sandboxes`:
   - Validate request and build `agent-sandbox` CR or pod spec.
   - Create resource via K8s API server.
   - Persist sandbox record with runtime metadata and labels.
2. `GET /sandboxes/{id}`:
   - Read resource status and pod phase.
   - Map to OpenSandbox lifecycle states.
3. `DELETE /sandboxes/{id}`:
   - Delete `agent-sandbox` resource and cleanup related objects.

### Ingress Routing
- Extend `components/ingress` to recognize `agent-sandbox` pods through labels
  (e.g., `opensandbox.io/sandbox-id`).
- Map sandbox IDs and ports to ingress routes following existing router
  semantics.

### Observability and Metrics
- Surface pod readiness, node placement, and resource usage in server logs and
  metrics for troubleshooting.

### Implementation Plan
- Add a new `agent_sandbox` runtime module and a `SandboxService` implementation
  in the server layer.
- Reuse shared Kubernetes client setup, apply/delete helpers, and watch/status
  utilities from `server/services/k8s` to avoid duplicating API plumbing.
- Add a runtime adapter that maps OpenSandbox lifecycle states to
  `agent-sandbox` CRD/pod status, including readiness/termination conditions.
- Store the created resource name/namespace and labels in the sandbox metadata
  for reconciliation and cleanup.
- Extend server configuration to enable `agent-sandbox`, including CRD template
  or spec defaults and `execd` injection strategy (image vs init/sidecar).
- Add routing integration in `components/ingress` to discover pods by labels and
  publish routes for sandbox ports.
- Provide an example under `examples/` that creates a sandbox, executes a
  command, and tears it down using the SDK against the `agent-sandbox` runtime.

```mermaid
sequenceDiagram
    participant Biz
    participant SDK as OpenSandbox SDK
    participant Srv as OpenSandbox Server
    participant K8s as K8s API Server
    participant Ctrl as agent-sandbox Controller
    participant Pod as Sandbox Pod

    Biz->>SDK: create sandbox
    SDK->>Srv: POST /sandboxes
    Srv->>K8s: create agent-sandbox resource
    K8s->>Ctrl: reconcile CRD
    Ctrl->>Pod: create pod
    Srv->>K8s: watch status
    Srv-->>SDK: sandbox ready
```

## Test Plan

- Unit tests for runtime adapter: spec generation, status mapping, cleanup.
- Integration tests with a local K8s cluster and `agent-sandbox` installed:
  create/list/delete sandbox, exec command, file ops, metrics.
- Ingress tests: ensure routing to the correct sandbox pod and port.

## Drawbacks

- Adds dependency on `agent-sandbox` CRD stability and controller behavior.
- Operational complexity for teams without existing Kubernetes expertise.

## Alternatives

- Continue with a native OpenSandbox Kubernetes runtime only. Rejected because
  it does not meet users already standardized on `agent-sandbox`.
- Provide an external adapter service instead of embedding in the server.
  Rejected due to added operational components and split observability.

## Infrastructure Needed

- Kubernetes cluster with `kubernetes-sigs/agent-sandbox` installed for CI/E2E.
- Optional: test images that bundle `execd` for sandbox pods.

## Upgrade & Migration Strategy

- Backwards compatible; default runtime remains unchanged.
- Enable by configuration; no migration required for existing Docker runtime
  users.


================================================
FILE: oseps/0003-volume-and-volumebinding-support.md
================================================
---
title: Volume Support
authors:
  - "@hittyt"
creation-date: 2026-01-29
last-updated: 2026-02-11
status: implementing
---

# OSEP-0003: Volume Support

<!-- toc -->
- [Summary](#summary)
- [Motivation](#motivation)
  - [Goals](#goals)
  - [Non-Goals](#non-goals)
- [Requirements](#requirements)
- [Proposal](#proposal)
  - [Notes/Constraints/Caveats](#notesconstraintscaveats)
  - [Risks and Mitigations](#risks-and-mitigations)
- [Design Details](#design-details)
- [Test Plan](#test-plan)
- [Drawbacks](#drawbacks)
- [Alternatives](#alternatives)
- [Infrastructure Needed](#infrastructure-needed)
- [Upgrade & Migration Strategy](#upgrade--migration-strategy)
<!-- /toc -->

## Summary

Introduce a runtime-neutral volume model in the Lifecycle API to enable persistent storage mounts across Docker and Kubernetes sandboxes. The proposal adds explicit volume definitions, mount semantics, and security constraints so that artifacts can persist beyond sandbox lifecycles without relying on file transfers.

This proposal focuses on file persistence via filesystem mounts. It is not a general-purpose storage abstraction (e.g., block or object storage APIs); those are only supported indirectly when exposed as a filesystem by the runtime or host.

```text
Time --------------------------------------------------------------->

Volume lifecycle:  [provisioned]-------------------------[retained]--->
Sandbox lifecycle:           [create]---[running]---[stop/delete]
                              |                         |
                          bind volume              unbind volume
```

## Motivation

OpenSandbox users running long-lived agents need artifacts (web pages, images, reports) to persist after a sandbox is terminated or restarted. Today, the API only supports transient filesystem operations via upload/download and provides no mount semantics; as a result, users must move large outputs out-of-band. This proposal adds first-class storage semantics while maintaining runtime portability and security boundaries.

### Goals

- Add a volume mount field to the Lifecycle API without breaking existing clients.
- Support Docker bind mounts (local path), Docker named volumes, and OSS mounts as the initial MVP.
- Provide a runtime-neutral `pvc` backend that maps to Docker named volumes and Kubernetes PersistentVolumeClaims, enabling portable cross-container data sharing.
- Provide secure, explicit controls for read/write access and path isolation.
- Keep runtime-specific details out of the core API where possible.

### Non-Goals

- Full-featured storage orchestration (auto-provisioning, snapshots, backups).
- Automatic cross-sandbox sharing or locking semantics are out of scope; only explicit volume mounts are supported.
- Guaranteeing portability for every storage backend in every runtime.
- Managing backend storage lifecycle (provisioning, resizing, and cleanup) is out of scope; users own and manage underlying storage resources independently.

## Requirements

- Backward compatible with existing sandbox creation requests.
- Works with both Docker and Kubernetes runtimes.
- Enforces path safety and explicit read/write permissions.
- Supports per-sandbox isolation (via subPath or equivalent).
- Clear error messages when a runtime does not support a requested backend.

## Proposal

Add a new optional field to the Lifecycle API:
- `volumes[]`: defines storage mounts for the sandbox. Each entry includes a named backend-specific struct (e.g., `host`, `ossfs`, `pvc`, `nfs`) and common mount settings (`name`, `mountPath`, `readOnly`, `subPath`).

The core API describes what storage is required using strongly-typed backend definitions. Each backend type has its own dedicated struct with explicit fields, making the schema self-documenting and enabling compile-time validation in typed SDKs. Runtime providers translate the model into platform-specific mounts.

### Notes/Constraints/Caveats

- Sandbox runtime (Docker/Kubernetes) and storage backend (host/ossfs/pvc) are independent dimensions. The API is designed so the same SDK request can target different runtimes; if a runtime cannot support a backend, it must return a clear validation error.
- OSS/S3/GitFS are popular production backends; this proposal keeps the model extensible so these can be supported early by adding new backend structs.
- The MVP targets Docker with `host`, `pvc`, and `ossfs` backends, and Kubernetes with `host`, `ossfs`, and `pvc` backends. The `pvc` backend is runtime-neutral: it maps to Docker named volumes in Docker and PersistentVolumeClaims in Kubernetes. Other backends (e.g., `nfs`) are described for future extension and may be unsupported initially.
- Kubernetes template merging currently replaces lists; this proposal requires list-merge or append behavior for volumes/volumeMounts to preserve user input.
- Exactly one backend struct must be specified per volume entry; specifying zero or multiple backend structs is a validation error.

### Risks and Mitigations

- Security risk: Docker hostPath mounts can expose host data. Mitigation: enforce allowlist prefixes, forbid path traversal, and use `readOnly: true` for read-only access when appropriate.
- Portability risk: different backends behave differently. Mitigation: keep core API minimal and require explicit backend selection.
- Operational risk: storage misconfiguration causes startup failures. Mitigation: validate mounts early and provide clear error responses.

## Design Details

### API schema changes
Add to `CreateSandboxRequest`:

```yaml
volumes:
  # Host path mount (read-write by default)
  - name: workdir
    host:
      path: "/data/opensandbox/user-a"
    mountPath: /mnt/work
    subPath: "task-001"

  # OSSFS mount
  - name: data
    ossfs:
      bucket: "my-bucket"
      endpoint: "oss-cn-hangzhou.aliyuncs.com"
      path: "/sandbox/user-a"
      accessKeyId: "AKIDEXAMPLE"
      accessKeySecret: "SECRETEXAMPLE"
      version: "2.0"
    mountPath: /mnt/data

  # PVC mount (platform-managed named volume, read-only)
  # Kubernetes: maps to PersistentVolumeClaim
  # Docker: maps to named volume
  - name: models
    pvc:
      claimName: "shared-models-pvc"
    mountPath: /mnt/models
    readOnly: true

  # NFS mount (future, read-only)
  - name: shared
    nfs:
      server: "nfs.example.com"
      path: "/exports/sandbox"
      options: "nfsvers=4.1,hard,timeo=600"
    mountPath: /mnt/shared
    readOnly: true
```

### Core semantics
- `volumes[]` declares storage mounts. Each volume entry contains:
  - `name`: unique identifier for the volume within the sandbox.
  - Exactly one backend struct (`host`, `ossfs`, `pvc`, `nfs`, etc.) with backend-specific typed fields.
  - `mountPath`: absolute path inside the container where the volume is mounted.
  - `readOnly` (optional): if true, the volume is mounted as read-only. Defaults to false (read-write).
  - `subPath` (optional): subdirectory under the backend path to mount.

### Backend struct definitions
Each backend type is defined as a distinct struct with explicit typed fields:

**`host`** - Host path bind mount:
| Field | Type | Required | Description |
|-------|------|----------|-------------|
| `path` | string | Yes | Absolute path on the host filesystem |

**`ossfs`** - Alibaba Cloud OSS mount via ossfs:
| Field | Type | Required | Description |
|-------|------|----------|-------------|
| `bucket` | string | Yes | OSS bucket name |
| `endpoint` | string | Yes | OSS endpoint URL (e.g., `oss-cn-hangzhou.aliyuncs.com`) |
| `accessKeyId` | string | Yes | Access key ID for inline authentication |
| `accessKeySecret` | string | Yes | Access key secret for inline authentication |
| `version` | string | No | ossfs version: `1.0` or `2.0` (default: `2.0`) |
| `options` | []string | No | Mount options list (e.g., `["allow_other", "umask=0022"]`) |

**`pvc`** - Platform-managed named volume:
| Field | Type | Required | Description |
|-------|------|----------|-------------|
| `claimName` | string | Yes | Name of the volume on the target platform (PVC name in Kubernetes, Docker volume name in Docker) |

The `pvc` backend is a runtime-neutral abstraction for referencing a pre-existing, platform-managed named volume. The semantics are identical across runtimes: claim an existing volume by name, mount it into the container, and leave volume lifecycle management to the user. In Kubernetes this maps to a PersistentVolumeClaim; in Docker this maps to a named volume (created via `docker volume create`).

**`nfs`** - NFS mount (future):
| Field | Type | Required | Description |
|-------|------|----------|-------------|
| `server` | string | Yes | NFS server hostname or IP |
| `path` | string | Yes | Absolute export path on the NFS server |
| `options` | string | No | Comma-separated mount options (e.g., `nfsvers=4.1,hard,timeo=600`) |

Additional backends (e.g., `s3`) can be added by defining new structs following this pattern.

### Backend constraints
Validation rules for each backend struct to reduce runtime-only failures:

- **`host`**: `path` must be an absolute path (e.g., `/data/opensandbox/user-a`). Reject relative paths and require normalization before validation.
- **`ossfs`**: `bucket` must be a valid bucket name. `endpoint` must be a valid OSS endpoint. `accessKeyId` and `accessKeySecret` are required for current MVP. `version` must be `1.0` or `2.0`; if omitted, defaults to `2.0`. In OSSFS backend, `subPath` represents bucket prefix. The runtime performs the mount during sandbox creation.
- **`pvc`**: `claimName` must be a valid resource name (DNS label: lowercase alphanumeric and hyphens, max 63 characters). The volume identified by `claimName` must already exist on the target platform; the runtime validates existence before container creation. In Kubernetes, the PVC must exist in the same namespace as the sandbox pod. In Docker, a named volume with the given name must exist (created via `docker volume create`); if the volume does not exist, the request fails validation rather than auto-creating it, to maintain explicit volume lifecycle management.
- **`nfs`**: `server` must be a valid hostname or IP. `path` must be an absolute path (e.g., `/exports/sandbox`).

These constraints are enforced in request validation and surfaced as clear API errors; runtimes may apply stricter checks.

### Permissions and ownership
Volume permissions are a frequent source of runtime failures and must be explicit in the contract:
- Default behavior: OpenSandbox does not automatically fix ownership or permissions on mounted storage. Users are responsible for ensuring the backend target is writable by the sandbox process UID/GID.
- Docker `host`: host path permissions are enforced by the host filesystem. Even with `readOnly: false`, writes will fail if the host path is not writable by the container user.
- Docker `pvc` (named volume): Docker named volumes created with the default `local` driver are owned by root. If the container runs as a non-root user, write access depends on the volume's filesystem permissions. Users should ensure correct ownership when creating the volume or use an init process to fix permissions.
- Kubernetes: filesystem permissions vary by storage driver. Future enhancement: add optional `fsGroup` field to backend structs that support it for pod-level volume access control.

### Concurrency and isolation
SubPath provides path-level isolation, not concurrency control. If multiple sandboxes mount the same volume without distinct `subPath` values and use `readOnly: false`, they may overwrite each other. OpenSandbox does not provide file-locking or coordination; users are responsible for handling concurrent access safely.

### Docker mapping
- `host` backend maps to bind mounts. `host.path + subPath` resolves to a concrete host directory.
- The host config uses `mounts`/`binds` with `ReadOnly` set from `readOnly` field.
- If the resolved host path does not exist, the request fails validation (do not auto-create host directories in MVP to avoid permission and security pitfalls).
- Allowed host paths are restricted by a server-side allowlist; users must specify a `host.path` under permitted prefixes. The allowlist is an operator-configured policy and should be documented for users of a given deployment.
- `pvc` backend maps to Docker named volumes. `pvc.claimName` is used as the Docker volume name in the bind string (e.g., `my-volume:/mnt/data:rw`). Docker recognizes non-absolute-path sources as named volume references. The named volume must already exist (created via `docker volume create`); if it does not exist, the request fails validation. When `subPath` is specified, the runtime resolves the volume's host-side `Mountpoint` via `docker volume inspect` and appends the `subPath` to produce a standard bind mount (e.g., `/var/lib/docker/volumes/my-volume/_data/subdir:/mnt/data:rw`). This requires the volume to use the `local` driver; non-local drivers are rejected when `subPath` is present because their `Mountpoint` may not be a real filesystem path. The resolved path must exist on the host; if it does not, the request fails validation.
- `ossfs` backend requires the runtime to mount OSS via ossfs during sandbox creation. Current MVP uses inline credentials (`accessKeyId`/`accessKeySecret`). In OSSFS backend, `subPath` is treated as bucket prefix and is resolved/validated on host before bind-mounting into the container. If the runtime does not support ossfs mounting, the request is rejected.

### Kubernetes mapping
- `pvc` backend maps to Kubernetes `persistentVolumeClaim` volume source: `pvc.claimName` → `volumes[].persistentVolumeClaim.claimName`.
- `nfs` backend maps to Kubernetes `nfs` volume source: `nfs.server` → `volumes[].nfs.server`, `nfs.path` → `volumes[].nfs.path`.
- `mountPath` maps to `volumeMounts.mountPath`.
- `subPath` maps to `volumeMounts.subPath`.
- `ossfs` backend maps to OSS CSI driver or equivalent runtime-specific mount configured with the struct fields.
- `host` backend maps to `hostPath` volume source and is node-local. For persistence guarantees in multi-node clusters, users must pin scheduling (node affinity) or use LocalPersistentVolume; otherwise data can disappear if the pod is rescheduled.

### Example: Host path mount
Create a sandbox that mounts a host directory:

```yaml
volumes:
  - name: workdir
    host:
      path: "/data/opensandbox/user-a"
    mountPath: /mnt/work
    subPath: "task-001"
```

Python SDK example (host):

```python
from opensandbox.api.lifecycle.client import AuthenticatedClient
from opensandbox.api.lifecycle.api.sandboxes import post_sandboxes
from opensandbox.api.lifecycle.models.create_sandbox_request import CreateSandboxRequest
from opensandbox.api.lifecycle.models.image_spec import ImageSpec
from opensandbox.api.lifecycle.models.resource_limits import ResourceLimits
from opensandbox.api.lifecycle.models.volume import Volume
from opensandbox.api.lifecycle.models.host import Host

client = AuthenticatedClient(base_url="https://api.opensandbox.io", token="YOUR_API_KEY")

resource_limits = ResourceLimits.from_dict({"cpu": "500m", "memory": "512Mi"})
request = CreateSandboxRequest(
    image=ImageSpec(uri="python:3.11"),
    timeout=3600,
    resource_limits=resource_limits,
    entrypoint=["python", "-c", "print('hello')"],
    volumes=[
        Volume(
            name="workdir",
            host=Host(
                path="/data/opensandbox/user-a",
            ),
            mount_path="/mnt/work",
            sub_path="task-001",
        )
    ],
)

post_sandboxes.sync(client=client, body=request)
```

### Example: OSSFS mount
Create a sandbox that mounts an OSS bucket via ossfs:

```yaml
volumes:
  - name: workdir
    ossfs:
      bucket: "my-bucket"
      endpoint: "oss-cn-hangzhou.aliyuncs.com"
      path: "/sandbox/user-a"
      accessKeyId: "AKIDEXAMPLE"
      accessKeySecret: "SECRETEXAMPLE"
      version: "2.0"
      options:
        - "allow_other"
        - "umask=0022"
    mountPath: /mnt/work
    subPath: "task-001"
```

Runtime mapping (Docker):
- host path: runtime resolves target path under configured mount root (e.g., `/mnt/ossfs/<bucket>/<path>`), performs on-demand mount (or reuses existing mount), then bind-mounts into the container
- container path: `/mnt/work`
- readOnly: false (default, read-write)

### Example: Python SDK (lifecycle client)
Use the Python SDK lifecycle client to create a sandbox with an OSSFS volume mount (future typed model):

```python
from opensandbox.api.lifecycle.client import AuthenticatedClient
from opensandbox.api.lifecycle.api.sandboxes import post_sandboxes
from opensandbox.api.lifecycle.models.create_sandbox_request import CreateSandboxRequest
from opensandbox.api.lifecycle.models.image_spec import ImageSpec
from opensandbox.api.lifecycle.models.resource_limits import ResourceLimits
from opensandbox.api.lifecycle.models.volume import Volume
from opensandbox.api.lifecycle.models.ossfs import OSSFS

client = AuthenticatedClient(base_url="https://api.opensandbox.io", token="YOUR_API_KEY")

resource_limits = ResourceLimits.from_dict({"cpu": "500m", "memory": "512Mi"})
request = CreateSandboxRequest(
    image=ImageSpec(uri="python:3.11"),
    timeout=3600,
    resource_limits=resource_limits,
    entrypoint=["python", "-c", "print('hello')"],
    volumes=[
        Volume(
            name="workdir",
            ossfs=OSSFS(
                bucket="my-bucket",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                path="/sandbox/user-a",
                access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
                version="2.0",
                options=["allow_other", "umask=0022"],
            ),
            mount_path="/mnt/work",
            sub_path="task-001",
        )
    ],
)

post_sandboxes.sync(client=client, body=request)
```

### Example: PVC mount (cross-runtime)
The `pvc` backend provides a portable way to reference platform-managed named volumes. The same API request works on both Docker and Kubernetes:

```yaml
volumes:
  - name: shared-data
    pvc:
      claimName: "my-shared-volume"
    mountPath: /mnt/data
    subPath: "task-001"
```

Runtime mapping (Docker):
The `claimName` is used as the Docker named volume name. The volume must already exist (created via `docker volume create my-shared-volume`). When `subPath` is specified, the runtime resolves the volume's host-side `Mountpoint` via `docker volume inspect` and appends the subPath to produce a standard bind mount:
```text
# Docker bind string generated by the runtime (with subPath):
# Mountpoint = /var/lib/docker/volumes/my-shared-volume/_data
/var/lib/docker/volumes/my-shared-volume/_data/task-001:/mnt/data:rw

# Without subPath, the named volume is used directly:
# my-shared-volume:/mnt/data:rw
```

Runtime mapping (Kubernetes):
The `claimName` maps to a PersistentVolumeClaim in the same namespace.
```yaml
volumes:
  - name: shared-data
    persistentVolumeClaim:
      claimName: my-shared-volume
containers:
  - name: sandbox
    volumeMounts:
      - name: shared-data
        mountPath: /mnt/data
        subPath: task-001
```

Python SDK example (PVC):

```python
from opensandbox.api.lifecycle.client import AuthenticatedClient
from opensandbox.api.lifecycle.api.sandboxes import post_sandboxes
from opensandbox.api.lifecycle.models.create_sandbox_request import CreateSandboxRequest
from opensandbox.api.lifecycle.models.image_spec import ImageSpec
from opensandbox.api.lifecycle.models.resource_limits import ResourceLimits
from opensandbox.api.lifecycle.models.volume import Volume
from opensandbox.api.lifecycle.models.pvc import PVC

client = AuthenticatedClient(base_url="https://api.opensandbox.io", token="YOUR_API_KEY")

resource_limits = ResourceLimits.from_dict({"cpu": "500m", "memory": "512Mi"})
request = CreateSandboxRequest(
    image=ImageSpec(uri="python:3.11"),
    timeout=3600,
    resource_limits=resource_limits,
    entrypoint=["python", "-c", "print('hello')"],
    volumes=[
        Volume(
            name="shared-data",
            pvc=PVC(
                claim_name="my-shared-volume",
            ),
            mount_path="/mnt/data",
            sub_path="task-001",
        )
    ],
)

post_sandboxes.sync(client=client, body=request)
```

#### Cross-container data sharing with PVC (Docker)
Multiple sandboxes can share data through the same named volume. This is more convenient and secure than using host paths, as Docker manages the storage location and no host paths need to be exposed:

```python
# Sandbox A: writes data to the shared volume
sandbox_a = CreateSandboxRequest(
    image=ImageSpec(uri="python:3.11"),
    entrypoint=["python", "-c", "open('/mnt/shared/result.txt','w').write('hello')"],
    volumes=[
        Volume(name="shared", pvc=PVC(claim_name="team-data"), mount_path="/mnt/shared")
    ],
)

# Sandbox B: reads data from the same shared volume
sandbox_b = CreateSandboxRequest(
    image=ImageSpec(uri="python:3.11"),
    entrypoint=["python", "-c", "print(open('/mnt/shared/result.txt').read())"],
    volumes=[
        Volume(name="shared", pvc=PVC(claim_name="team-data"), mount_path="/mnt/shared")
    ],
)
```

### Example: Kubernetes NFS (future)
Create a sandbox that mounts an NFS export with subPath isolation (non-MVP):

```yaml
volumes:
  - name: workdir
    nfs:
      server: "nfs.example.com"
      path: "/exports/sandbox"
      options: "nfsvers=4.1,hard,timeo=600"
    mountPath: /mnt/work
    subPath: "task-001"
```

Runtime mapping (Kubernetes):
```yaml
volumes:
  - name: workdir
    nfs:
      server: nfs.example.com
      path: /exports/sandbox
containers:
  - name: sandbox
    volumeMounts:
      - name: workdir
        mountPath: /mnt/work
        readOnly: false
        subPath: task-001
```

Python SDK example (NFS, future):

```python
from opensandbox.api.lifecycle.client import AuthenticatedClient
from opensandbox.api.lifecycle.api.sandboxes import post_sandboxes
from opensandbox.api.lifecycle.models.create_sandbox_request import CreateSandboxRequest
from opensandbox.api.lifecycle.models.image_spec import ImageSpec
from opensandbox.api.lifecycle.models.resource_limits import ResourceLimits
from opensandbox.api.lifecycle.models.volume import Volume
from opensandbox.api.lifecycle.models.nfs import NFS

client = AuthenticatedClient(base_url="https://api.opensandbox.io", token="YOUR_API_KEY")

resource_limits = ResourceLimits.from_dict({"cpu": "500m", "memory": "512Mi"})
request = CreateSandboxRequest(
    image=ImageSpec(uri="python:3.11"),
    timeout=3600,
    resource_limits=resource_limits,
    entrypoint=["python", "-c", "print('hello')"],
    volumes=[
        Volume(
            name="workdir",
            nfs=NFS(
                server="nfs.example.com",
                path="/exports/sandbox",
                options="nfsvers=4.1,hard,timeo=600",
            ),
            mount_path="/mnt/work",
            sub_path="task-001",
        )
    ],
)

post_sandboxes.sync(client=client, body=request)
```

### Provider validation
- Reject unsupported backend types per runtime (e.g., `nfs` is only valid in Kubernetes).
- Validate that exactly one backend struct is specified per volume entry.
- Normalize and validate `subPath` against traversal; reject `..` and absolute path inputs.
- Enforce allowlist prefixes for `host.path` in Docker.
- For `ossfs` backend, validate required fields (`bucket`, `endpoint`, `accessKeyId`, `accessKeySecret`).
- For `pvc` backend, validate `claimName` is a valid DNS label (lowercase alphanumeric and hyphens, max 63 characters). In Kubernetes, validate the PVC exists in the same namespace. In Docker, validate the named volume exists via the Docker API (`docker volume inspect`).
- For `nfs` backend, validate required fields (`server`, `path`).
- `subPath` is created if missing under the resolved backend path; if creation fails due to permissions or policy, the request is rejected.

### Configuration (example)
Host path allowlists are configured by the control plane (server/execd) and enforced at validation time. Example `config.toml`:

```toml
[storage]
allow_host_paths = ["/data/opensandbox", "/tmp/sandbox"]
ossfs_mount_root = "/mnt/ossfs"
```

## Test Plan

- Unit tests for schema validation and path normalization.
- Unit tests for backend struct validation:
  - Reject volume entries with zero or multiple backend structs.
  - Validate required fields per backend type.
- Provider unit tests:
  - Docker `host`: bind mount generation, read-only enforcement, allowlist rejection.
  - Docker `pvc`: named volume bind generation, volume existence validation, read-only enforcement, `claimName` format validation, rejection when volume does not exist, `subPath` resolution via `Mountpoint` for `local` driver, rejection of `subPath` for non-local drivers, rejection when resolved subPath does not exist.
  - Docker `ossfs`: mount option validation, inline credential validation (`accessKeyId`/`accessKeySecret`), version validation (`1.0`/`2.0`), `subPath`-as-prefix resolution, mount failure handling.
  - Kubernetes `pvc`: PVC reference validation, volume mount generation.
- Integration tests:
  - Docker: sandbox creation with `host` volume, sandbox creation with `pvc` (named volume), `pvc` with `subPath` mount, cross-container data sharing via named volume.
  - Kubernetes: sandbox creation with `pvc`, sandbox creation with `host` volume.
- Negative tests for unsupported backends and invalid paths.

## Drawbacks

- Adds API surface area and increases runtime provider complexity.
- Docker bind mounts introduce security considerations and operational policy requirements.

## Alternatives

- Keep using file upload/download only: simpler but does not satisfy persistence requirements.
- Use runtime-specific `extensions` only: faster to ship but fractures API consistency and increases client complexity.

## Infrastructure Needed

The runtime must have the ability to perform filesystem mounts for the requested backend types. For `ossfs` backend, the runtime must have ossfs 1.0 or 2.0 installed; the MVP assumes the runtime can mount using the struct fields provided in the request.

## Upgrade & Migration Strategy

This change is additive for volume support and supports OSSFS inline credentials (`accessKeyId`/`accessKeySecret`). If a client submits volume fields to a runtime that does not support them, the API will return a clear validation error.

## Kubernetes Feasibility (Design Only)

Kubernetes runtime is not implemented in this phase, but API compatibility is preserved by design:

- Keep request schema runtime-neutral: `volumes[].ossfs` has consistent shape across Docker and Kubernetes.
- Introduce runtime adapters:
  - Docker adapter performs host-side ossfs mount + bind using inline credentials.
  - Kubernetes adapter can map OSSFS fields to native Secret/CSI references in a future phase.
- Keep failure semantics aligned:
  - Missing credential reference -> validation error with shared error code family.
  - Runtime unsupported backend -> explicit `UNSUPPORTED_VOLUME_BACKEND`.
- Keep `subPath` semantics aligned:
  - API meaning remains "`subPath` is mounted under backend path".
  - Docker resolves to host path (`subPath` as OSS prefix); Kubernetes maps to `volumeMounts.subPath`.


================================================
FILE: oseps/0004-secure-container-runtime.md
================================================
---
title: Pluggable Secure Container Runtime Support
authors:
  - "@hittyt"
creation-date: 2026-02-05
last-updated: 2026-02-09
status: implementing
---

# OSEP-0004: Pluggable Secure Container Runtime Support

<!-- toc -->
- [Summary](#summary)
- [Motivation](#motivation)
  - [Goals](#goals)
  - [Non-Goals](#non-goals)
- [Requirements](#requirements)
- [Proposal](#proposal)
  - [Notes/Constraints/Caveats](#notesconstraintscaveats)
  - [Risks and Mitigations](#risks-and-mitigations)
- [Design Details](#design-details)
  - [API and SDK Impact](#api-and-sdk-impact)
  - [Server Configuration](#server-configuration)
  - [Infrastructure Prerequisites](#infrastructure-prerequisites)
  - [Runtime Resolver](#runtime-resolver)
  - [Startup Validation](#startup-validation)
  - [Docker Mode Implementation](#docker-mode-implementation)
  - [Kubernetes Mode Implementation](#kubernetes-mode-implementation)
    - [BatchSandboxProvider](#batchsandboxprovider)
    - [AgentSandboxProvider](#agentsandboxprovider)
    - [Pooled Sandbox Consistency](#pooled-sandbox-consistency)
- [Test Plan](#test-plan)
- [Drawbacks](#drawbacks)
- [Alternatives](#alternatives)
- [Infrastructure Needed](#infrastructure-needed)
- [Upgrade & Migration Strategy](#upgrade--migration-strategy)
<!-- /toc -->

## Summary

This proposal introduces secure container runtime support for OpenSandbox, enabling sandboxes to run in secure container runtimes such as gVisor, Firecracker, and Kata Containers. This provides hardware-level isolation for executing untrusted AI-generated code, protecting the host system from potential malicious behavior.

The secure runtime is configured at the **server level**: administrators choose a single secure runtime in the server configuration, and all sandboxes on that server transparently use it. SDK users and API callers require no code changes — the isolation upgrade is entirely an infrastructure-level decision.

## Motivation

OpenSandbox is designed to execute untrusted code generated by AI models (such as Claude, GPT-4, Gemini). While standard container isolation (runc) provides process-level isolation, it may not be sufficient for scenarios where:

1. **Untrusted Code Execution**: AI-generated code could potentially contain malicious behavior, including container escape attempts
2. **Multi-tenant Environments**: Different users' sandboxes may require stronger isolation guarantees
3. **Compliance Requirements**: Some industries require hardware-level virtualization for security compliance

Secure container runtimes like gVisor, Firecracker, and Kata Containers provide additional isolation layers:

| Runtime | Isolation Mechanism | Use Case |
|---------|-------------------|----------|
| gVisor | User-space kernel (syscall interception) | General workloads, low overhead |
| Kata Containers (QEMU) | Full VM with QEMU hypervisor | Maximum isolation, compatibility |
| Kata Containers (Firecracker) | MicroVM with Firecracker hypervisor | High density, minimal footprint |
| Kata Containers (CLH) | Cloud Hypervisor | Balanced performance and isolation |

### Goals

1. **Server-Level Configuration**: Secure runtime is configured once at the server level; all sandboxes use the same runtime
2. **Transparent to SDK Users**: No SDK or API changes required — upgrading isolation is purely an infrastructure decision
3. **Dual-Mode Compatibility**: Work seamlessly in both Local Docker and Kubernetes deployment modes
4. **Graceful Fallback**: Default to standard runc when no secure runtime is configured
5. **Validation**: Verify runtime availability at server startup and before sandbox creation, with clear error messages

### Non-Goals

1. **Runtime Installation**: OpenSandbox will not install or configure secure container runtimes; this is the responsibility of infrastructure administrators
2. **Per-Request Runtime Selection**: SDK users cannot choose or override the secure runtime on a per-sandbox basis; this is an infrastructure-level decision managed by administrators
3. **Runtime-Specific Features**: Exposing all features of each secure runtime (e.g., gVisor platforms, Kata hypervisors) is out of scope for the initial implementation
4. **Performance Optimization**: Tuning secure runtimes for optimal performance is left to operators
5. **Multiple Runtimes on One Server**: A single server instance supports exactly one secure runtime; mixed runtimes require separate server deployments

## Requirements

| ID | Requirement | Priority |
|----|-------------|----------|
| R1 | Server configuration defines the secure runtime for all sandboxes | Must Have |
| R2 | Support gVisor, Kata (including Firecracker backend) as runtime types | Must Have |
| R3 | Validate runtime availability at server startup | Must Have |
| R4 | Work in both Docker and Kubernetes modes | Must Have |
| R5 | Default to runc when no secure runtime is configured | Must Have |
| R6 | Clear error messages when configured runtime is unavailable | Should Have |
| R7 | No SDK or API changes required for existing users | Should Have |

## Proposal

We propose adding a `[secure_runtime]` section to the server configuration file (`~/.sandbox.toml`). When configured, **all sandboxes** on that server transparently run in the specified secure runtime. No changes to the Sandbox Lifecycle API or SDKs are required.

```
Server Config                              Backend
┌──────────────────────┐                ┌─────────────────┐
│ [secure_runtime]     │                │ Docker:         │
│ type = "gvisor"      │     ┌────→     │   --runtime=    │
│ docker_runtime       │     │          │     runsc       │
│   = "runsc"          │─────┤          ├─────────────────┤
│ k8s_runtime_class    │     │          │ Kubernetes:     │
│   = "gvisor"         │     └────→     │   runtimeClass- │
│                      │                │     Name: gvisor│
└──────────────────────┘                └─────────────────┘
         ▲
         │ Infrastructure admin configures once
         │ SDK users require NO code changes
```

### Notes/Constraints/Caveats

1. **Infrastructure Dependency**: Secure runtimes must be pre-installed and configured on the host (Docker) or cluster (Kubernetes) before use

2. **Performance Overhead**: Secure runtimes add latency and resource overhead compared to runc:

     | Runtime | Isolation Mechanism | Startup Overhead | Memory Overhead | Best For |
     |---------|---------------------|------------------|-----------------|----------|
     | **runc** (default) | Process-level cgroups | ~0ms | Minimal | Trusted workloads, local development |
     | **gVisor** | User-space kernel (syscall interception) | ~10-50ms | ~50MB | General workloads with low overhead |
     | **Kata (QEMU)** | Full VM with QEMU hypervisor | ~500ms | ~20-50MB | Maximum compatibility and isolation |
     | **Kata (Firecracker)** | MicroVM with Firecracker hypervisor | ~125ms | ~5MB | High density, minimal footprint |
     | **Kata (CLH)** | Cloud Hypervisor | ~200ms | ~10-20MB | Balanced performance and isolation |

     Warm start performance (from pre-warmed Pool):

     | Runtime | Cold Start | Warm Start (from Pool) | Memory per Sandbox |
     |---------|-----------|------------------------|-------------------|
     | runc | ~500ms | ~50ms | ~5MB |
     | gVisor | ~550ms | ~100ms | ~50MB |
     | Kata (QEMU) | ~1000ms | ~200ms | ~20-50MB |
     | Kata (Firecracker) | ~625ms | ~125ms | ~5MB |

     The actual hypervisor is determined by the `RuntimeClass` handler configured by the SRE administrator (e.g., `kata-qemu`, `kata-clh`, `kata-fc`).

     > **Note**: Firecracker is not a standalone OCI runtime. In this OSEP, `secure_runtime="firecracker"` maps to Kata Containers with the Firecracker hypervisor backend (`kata-fc`). See [Server Configuration](#server-configuration) for details.

3. **Compatibility**: Not all container images work with all secure runtimes:
   - gVisor: Some syscalls may not be implemented; check [gVisor compatibility](https://gvisor.dev/docs/user_guide/compatibility/)
   - Kata (QEMU/CLH): Generally most compatible but highest overhead
   - Kata + Firecracker (`kata-fc`): Limited device support; some workloads requiring specific kernel features may not work

4. **execd Injection**: The execd binary injection mechanism must work within secure runtime constraints

5. **Pooled Sandbox Consistency (Kubernetes)**: In Kubernetes mode with resource pools (Pool CRD), the Pool's `runtimeClassName` must match the server's `[secure_runtime]` configuration. Since both are managed by the same SRE administrator, this is an operational requirement validated at server startup.

### Risks and Mitigations

| Risk | Impact | Mitigation |
|------|--------|------------|
| Runtime unavailable at creation time | Sandbox creation fails | Pre-validation with clear error messages |
| Syscall compatibility issues | Application may not work | Document known limitations per runtime |
| Performance degradation | Slower sandbox creation | Allow users to choose based on security/performance tradeoff |
| Configuration complexity | Operational burden | Provide sensible defaults and clear documentation |

## Design Details

> **Note**: Code snippets in this section are illustrative and demonstrate the design intent. Actual implementation may differ in structure and details.

### API and SDK Impact

**No changes to the Sandbox Lifecycle API or SDKs are required.**

The `CreateSandboxRequest` schema remains unchanged. The secure runtime is applied transparently by the server based on its configuration. Existing SDK code works as-is:

```python
# This code works identically whether the server uses runc or gVisor.
# The SDK user does not need to know or care about the secure runtime.
sandbox = await Sandbox.create(
    image="python:3.11",
    entrypoint=["python", "-c", "print('hello')"],
)
```

This is a key advantage of server-level configuration: upgrading from runc to gVisor is a pure infrastructure change that requires zero application code modifications.

### Server Configuration

Extension to `~/.sandbox.toml`. A single `[secure_runtime]` section configures the secure runtime for **all sandboxes** on this server:

```toml
[runtime]
type = "docker"  # or "kubernetes"
execd_image = "opensandbox/execd:v1.0.7"

# Secure container runtime configuration.
# When enabled, ALL sandboxes on this server use the specified runtime.
# Comment out or leave type empty to use standard runc.
[secure_runtime]
# Runtime type identifier. Supported values:
#   "gvisor"      - gVisor (runsc), user-space kernel isolation
#   "kata"        - Kata Containers (QEMU backend), VM-level isolation
#   "firecracker" - Kata Containers with Firecracker backend (K8s only)
#   ""            - Standard runc (default, no secure runtime)
type = ""

# Docker mode: --runtime parameter name
# Ignored when runtime.type = "kubernetes"
docker_runtime = "runsc"

# Kubernetes mode: pod.spec.runtimeClassName value
# Ignored when runtime.type = "docker"
k8s_runtime_class = "gvisor"
```

**Configuration examples** (pick ONE per server, these are separate config files):

Example 1 — gVisor on Docker:

```toml
# ~/.sandbox.toml
[runtime]
type = "docker"
execd_image = "opensandbox/execd:v1.0.7"

[secure_runtime]
type = "gvisor"
docker_runtime = "runsc"
k8s_runtime_class = "gvisor"
```

Example 2 — Kata Containers (QEMU) on Kubernetes:

```toml
# ~/.sandbox.toml
[runtime]
type = "kubernetes"
execd_image = "opensandbox/execd:v1.0.7"

[secure_runtime]
type = "kata"
docker_runtime = "kata-runtime"
k8s_runtime_class = "kata-qemu"
```

Example 3 — Kata + Firecracker on Kubernetes:

> Firecracker is a VMM, not an OCI runtime. It cannot serve as a CRI implementation directly. This OSEP recommends using Firecracker via Kata Containers (`kata-fc` handler), which is the mature, production-ready approach. The alternative (`firecracker-containerd`) is less actively maintained and not recommended.

```toml
# ~/.sandbox.toml
[runtime]
type = "kubernetes"
execd_image = "opensandbox/execd:latest"

[secure_runtime]
type = "firecracker"
docker_runtime = ""              # Not supported in Docker mode
k8s_runtime_class = "kata-fc"
```

### Infrastructure Prerequisites

OpenSandbox does not install secure runtimes. The following must be configured by infrastructure administrators.

#### Docker Mode - gVisor Setup

**Step 1: Install gVisor runsc**

For Docker mode, you only need to install the **runsc** OCI runtime:

```bash
# Ubuntu/Debian
curl -fsSL https://gvisor.dev/archive.key | sudo gpg --dearmor -o /usr/share/keyrings/gvisor-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/gvisor-archive-keyring.gpg] https://storage.googleapis.com/gvisor/releases release main" | \
  sudo tee /etc/apt/sources.list.d/gvisor.list
sudo apt-get update && sudo apt-get install -y runsc

# Verify installation
runsc --version
```

> **Note**: For Docker mode, only `runsc` is required. The `containerd-shim-runsc-v1` is only needed for Kubernetes/containerd.

**Step 2: Configure Docker daemon**

Use the `runsc install` command to automatically configure Docker daemon:

```bash
sudo runsc install
```

Or manually edit `/etc/docker/daemon.json`:

```json
{
  "runtimes": {
    "runsc": {
      "path": "/usr/bin/runsc",
      "runtimeArgs": [
        "--platform=systrap",
        "--network=host"
      ]
    }
  }
}
```

```bash
sudo systemctl restart docker
```

**Step 3: Verify installation**

```bash
docker run --runtime=runsc hello-world
```

#### Docker Mode - Kata Containers Setup

##### System Requirements

Kata Containers requires hardware virtualization support. Verify your system meets the following requirements:

**Hardware Virtualization Support:**
```bash
# Check if CPU supports hardware virtualization (VT-x for Intel, AMD-V for AMD)
lscpu | grep Virtualization
# Expected output: Virtualization: VT-x (Intel) or AMD-V (AMD)

# Alternatively on Intel
grep -E --color=auto 'vmx|svm' /proc/cpuinfo
# Expected: vmx (Intel) or svm (AMD) flags present
```

**KVM Module:**
```bash
# Check if KVM module is loaded
lsmod | grep kvm
# Expected: kvm_intel (Intel) or kvm_amd (AMD)

# If not loaded, load KVM module
sudo modprobe kvm_intel  # For Intel
# or
sudo modprobe kvm_amd    # For AMD
```

**Kernel Requirements:**
- Linux kernel 5.10 or later recommended
- KVM enabled in kernel config

**Docker Requirements:**
- Docker 20.10 or later
- `/etc/docker/daemon.json` configured for Kata runtime

##### Installation

Download and install Kata Containers static binaries from GitHub releases:

```bash
# Find the latest release at https://github.com/kata-containers/kata-containers/releases
KATA_VERSION="3.27.0"
wget https://github.com/kata-containers/kata-containers/releases/download/${KATA_VERSION}/kata-static-${KATA_VERSION}-amd64.tar.zst

# Extract to root directory - Kata will be installed in /opt/kata
zstd -d kata-static-${KATA_VERSION}-amd64.tar.zst
tar -xvf kata-static-${KATA_VERSION}-amd64.tar -C /

# Create symbolic links for PATH access
sudo ln -sf /opt/kata/bin/kata-runtime /usr/local/bin/kata-runtime
sudo ln -sf /opt/kata/bin/containerd-shim-kata-v2 /usr/local/bin/containerd-shim-kata-v2

# Verify installation
kata-runtime --version
```

##### Configure Docker Daemon

Edit `/etc/docker/daemon.json` to register Kata as a runtime:

```json
{
  "default-runtime": "runc",
  "runtimes": {
    "kata": {
      "runtimeType": "io.containerd.kata.v2"
    }
  }
}
```

Restart Docker to apply changes:

```bash
sudo systemctl restart docker

# Verify Kata is available in Docker
docker info | grep -A5 Runtimes
# Expected output should include "io.containerd.runc.v2 kata"
```

#### Kubernetes Mode - RuntimeClass Setup

Cluster administrators must create RuntimeClass resources:

```yaml
# gVisor RuntimeClass
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: gvisor
handler: runsc  # Matches containerd handler name
scheduling:
  nodeSelector:
    kubernetes.io/arch: amd64

---
# Kata Containers (QEMU backend) RuntimeClass
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: kata-qemu
handler: kata-qemu

---
# Kata Containers (Firecracker backend) RuntimeClass
# This is what secure_runtime="firecracker" maps to
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: kata-fc
handler: kata-fc
```

containerd configuration (`/etc/containerd/config.toml`):

```toml
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc]
          runtime_type = "io.containerd.runsc.v1"
          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc.options]
            TypeUrl = "io.containerd.runsc.v1.options"
            ConfigPath = "/etc/containerd/runsc.toml"

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata-qemu]
  runtime_type = "io.containerd.kata-qemu.v2"

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata-fc]
  runtime_type = "io.containerd.kata-fc.v2"
```

Create the gVisor configuration file:

```bash
sudo tee /etc/containerd/runsc.toml > /dev/null <<'EOF'
[runsc]
  platform = "ptrace"
EOF
```

Restart containerd:

```bash
sudo systemctl restart containerd
```

##### Kata Containers on Kubernetes

Follow the [official Kata Containers installation guide](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/README.md).

Quick installation using Helm:

```bash
# Install kata-deploy which will set up Kata Containers via DaemonSet
helm install kata-deploy "oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy" --version "3.27.0" --namespace kube-system --create-namespace

# Wait for kata-deploy pods to be ready
kubectl wait --for=condition=ready pod -l name=kata-deploy -n kube-system --timeout=300s
```

> **Note**: The `kata-deploy` DaemonSet will automatically configure containerd on all nodes. Manual containerd configuration is not required when using kata-deploy.

Verify installation:

```bash
# Check RuntimeClasses
kubectl get runtimeclass

# Expected output:
# NAME         HANDLER     AGE
# kata         kata-qemu   10m
# kata-qemu    kata-qemu   10m
# kata-clh     kata-clh    10m
# kata-fc      kata-fc     10m

# Test Kata with a simple pod
kubectl run test-kata --restart=Never --image=hello-world --runtime-class=kata-qemu
kubectl logs test-kata
kubectl delete pod test-kata
```

### Runtime Resolver

The server reads `[secure_runtime]` at startup and resolves it to the backend-specific identifier based on the deployment mode:

```python
class SecureRuntimeResolver:
    """Resolves secure runtime config to backend-specific parameters."""
    
    def __init__(self, config: AppConfig):
        self.secure_runtime = config.secure_runtime  # may be None
        self.runtime_mode = config.runtime.type       # "docker" or "kubernetes"
    
    def get_docker_runtime(self) -> Optional[str]:
        """Return Docker --runtime value, or None for runc."""
        if not self.secure_runtime or not self.secure_runtime.type:
            return None
        if not self.secure_runtime.docker_runtime:
            raise ConfigError(
                f"Secure runtime '{self.secure_runtime.type}' is not supported "
                f"in Docker mode (docker_runtime is empty)."
            )
        return self.secure_runtime.docker_runtime
    
    def get_k8s_runtime_class(self) -> Optional[str]:
        """Return K8s runtimeClassName, or None for cluster default."""
        if not self.secure_runtime or not self.secure_runtime.type:
            return None
        return self.secure_runtime.k8s_runtime_class
```

### Startup Validation

The server validates the configured secure runtime at startup, failing fast if the runtime is unavailable:

```python
def validate_secure_runtime_on_startup(config: AppConfig, docker_client=None, k8s_client=None):
    """Validate secure runtime availability at server startup."""
    sr = config.secure_runtime
    if not sr or not sr.type:
        logger.info("No secure runtime configured; using standard runc.")
        return
    
    if config.runtime.type == "docker":
        if not sr.docker_runtime:
            raise ConfigError(
                f"secure_runtime.type='{sr.type}' but docker_runtime is empty. "
                f"This runtime is not supported in Docker mode."
            )
        info = docker_client.info()
        available = info.get("Runtimes", {}).keys()
        if sr.docker_runtime not in available:
            raise ConfigError(
                f"Docker runtime '{sr.docker_runtime}' is not available. "
                f"Available runtimes: {list(available)}. "
                f"Please install and configure it in /etc/docker/daemon.json."
            )
    else:  # kubernetes
        try:
            k8s_client.read_runtime_class(sr.k8s_runtime_class)
        except ApiException as e:
            if e.status == 404:
                raise ConfigError(
                    f"RuntimeClass '{sr.k8s_runtime_class}' does not exist. "
                    f"Please create it in the cluster."
                )
            raise
    
    logger.info(f"Secure runtime '{sr.type}' validated successfully.")
```

### Docker Mode Implementation

Changes to `server/src/services/docker.py`. The runtime is read from server config, not from the request:

```python
class DockerSandboxService(SandboxService):
    def __init__(self, config: Optional[AppConfig] = None):
        # ... existing initialization ...
        self.resolver = SecureRuntimeResolver(self.app_config)
        # Runtime is resolved once at init; already validated at startup
        self.docker_runtime = self.resolver.get_docker_runtime()
    
    async def create_sandbox(self, request: CreateSandboxRequest) -> CreateSandboxResponse:
        # ... existing code ...
        
        container = self.docker_client.containers.run(
            image=request.image.uri,
            # ... other parameters ...
            runtime=self.docker_runtime,  # "runsc", "kata-runtime", or None
        )
```

### Kubernetes Mode Implementation

Both Kubernetes workload providers inject `runtimeClassName` from server config. The `runtimeClassName` is resolved once at service initialization (already validated at startup).

#### BatchSandboxProvider

Changes to `server/src/services/k8s/batchsandbox_provider.py`:

- **CRD**: `sandbox.opensandbox.io/v1alpha1` BatchSandbox
- **Pod spec path**: `spec.template.spec`

```python
class BatchSandboxProvider:
    def __init__(self, config: AppConfig, ...):
        # ... existing initialization ...
        self.resolver = SecureRuntimeResolver(config)
        self.runtime_class = self.resolver.get_k8s_runtime_class()
    
    def create_workload(self, request: CreateSandboxRequest, ...):
        # ... existing code ...

        if self.runtime_class:
            runtime_manifest["spec"]["template"]["spec"]["runtimeClassName"] = self.runtime_class
        
        # ... template merge ...
```

#### AgentSandboxProvider

Changes to `server/src/services/k8s/agent_sandbox_provider.py`:

- **CRD**: `agents.x-k8s.io/v1alpha1` Sandbox
- **Pod spec path**: `spec.podTemplate.spec`

```python
class AgentSandboxProvider:
    def __init__(self, config: AppConfig, ...):
        # ... existing initialization ...
        self.resolver = SecureRuntimeResolver(config)
        self.runtime_class = self.resolver.get_k8s_runtime_class()
    
    def create_workload(self, request: CreateSandboxRequest, ...):
        # ... existing code ...

        pod_spec = self._build_pod_spec(request, ...)
        if self.runtime_class:
            pod_spec["runtimeClassName"] = self.runtime_class

        runtime_manifest["spec"]["podTemplate"]["spec"] = pod_spec
        # ... template merge ...
```

#### Provider Comparison

| Aspect | BatchSandboxProvider | AgentSandboxProvider |
|--------|---------------------|---------------------|
| CRD Kind | `BatchSandbox` | `Sandbox` |
| Pod Spec Path | `spec.template.spec` | `spec.podTemplate.spec` |
| Pool Support | Yes (`poolRef`) | No |
| Runtime Source | Server config | Server config |

#### Pooled Sandbox Consistency

In Kubernetes mode with resource pools (Pool CRD), the Pool's `runtimeClassName` must match the server's `[secure_runtime]` configuration. Since both are managed by the same SRE administrator, this is an operational requirement.

**Pool configuration by SRE administrator:**

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: Pool
metadata:
  name: gvisor-pool
spec:
  template:
    spec:
      runtimeClassName: "gvisor"  # Must match server's secure_runtime.k8s_runtime_class
      containers:
      - name: sandbox-container
        image: python:3.11
  capacitySpec:
    bufferMax: 10
    bufferMin: 2
    poolMax: 20
    poolMin: 5
```

The server validates this consistency at startup. If the Pool's `runtimeClassName` does not match the server config, the server logs a warning and refuses to use that pool.

### Compatibility Matrix

| Secure Runtime | Local Docker | Kubernetes | Notes |
|---------------|--------------|------------|-------|
| gVisor (runsc) | Full support | Full support | Docker `--runtime=runsc`; K8s via RuntimeClass |
| Kata Containers | Full support | Full support | Docker `--runtime=kata-runtime`; K8s via RuntimeClass |
| Firecracker | Not supported | Via Kata (`kata-fc`) | Not a Docker OCI runtime; use Kata with Firecracker hypervisor backend in K8s |
| Custom runtimes | Via config | Via RuntimeClass | Requires pre-installation |

## Test Plan

### Unit Tests

| Test Case | Description |
|-----------|-------------|
| Config parsing | Verify `SecureRuntimeConfig` correctly parses TOML |
| Resolver (Docker) | Verify `get_docker_runtime()` returns correct value or None |
| Resolver (K8s) | Verify `get_k8s_runtime_class()` returns correct value or None |
| Empty type handling | Verify fallback to runc when `type = ""` |
| Firecracker in Docker | Verify error when `docker_runtime` is empty in Docker mode |

### Integration Tests

| Test Case | Description |
|-----------|-------------|
| Startup validation (Docker) | Server fails to start when configured runtime not in Docker daemon |
| Startup validation (K8s) | Server fails to start when RuntimeClass doesn't exist |
| Docker + gVisor | Create sandbox on Docker host with `[secure_runtime] type = "gvisor"` |
| Docker + Kata | Create sandbox on Docker host with `[secure_runtime] type = "kata"` |
| K8s + gVisor | Create sandbox in cluster with gVisor RuntimeClass |
| K8s + kata-fc | Create sandbox in cluster with kata-fc RuntimeClass |
| Pool consistency | Server warns when Pool runtimeClassName doesn't match config |

### E2E Tests

| Test Case | Description |
|-----------|-------------|
| SDK unaware of runtime | SDK creates sandbox without any runtime parameter; runs in gVisor |
| Runtime isolation verification | Verify syscall interception in gVisor sandbox |
| Fallback behavior | Verify standard runc when `[secure_runtime]` not configured |
| execd injection under gVisor | Verify execd binary injection works within gVisor runtime |

## Drawbacks

1. **Operational Complexity**: Administrators must install and configure secure runtimes
2. **Performance Overhead**: Secure runtimes add startup latency and memory overhead
3. **Compatibility Issues**: Some workloads may not work with certain runtimes
4. **Documentation Burden**: Requires comprehensive setup guides for each runtime

## Alternatives

### Alternative 1: Per-Request Runtime Selection

**Approach**: Add a `secureRuntime` field to `CreateSandboxRequest`, allowing SDK users to choose the runtime per sandbox (e.g., `secure_runtime="gvisor"`).

**Pros**:
- Maximum flexibility for users
- Different sandboxes can use different runtimes on the same server
- Supports mixed security levels (trusted vs untrusted workloads)

**Cons**:
- Secure runtime is fundamentally an infrastructure decision, not a per-request decision
- API callers could potentially downgrade security
- Adds complexity to SDK and API surface
- Most deployments only use one runtime; per-request selection is rarely needed

**Decision**: Rejected. Secure runtime selection is an infrastructure-level concern that should be managed by administrators, consistent with how Docker (`daemon.json`) and Kubernetes (`RuntimeClass`) handle runtime configuration. Per-request selection may be revisited as a future enhancement if demand arises.

### Alternative 2: Automatic Runtime Detection

**Approach**: Automatically detect and use the most secure available runtime.

**Pros**:
- Zero configuration
- Always uses best available isolation

**Cons**:
- Unpredictable behavior across environments
- May break workloads with runtime incompatibilities
- Performance impact without administrator consent

**Decision**: Rejected. Explicit administrator choice is preferred for security/performance tradeoffs.

## Infrastructure Needed

- **Testing Environments**:
  - Docker host with gVisor (runsc) configured
  - Docker host with Kata Containers (kata-runtime) configured
  - Kubernetes cluster with gVisor RuntimeClass (`runsc`)
  - Kubernetes cluster with Kata QEMU RuntimeClass (`kata-qemu`)
  - Kubernetes cluster with Kata + Firecracker RuntimeClass (`kata-fc`)

- **CI/CD Updates**:
  - Add integration tests for secure runtime validation
  - Add E2E tests with gVisor-enabled environment

- **Documentation**:
  - User guide: How to use secure runtimes
  - Admin guide: How to set up gVisor/Kata/Firecracker
  - API reference updates

## Upgrade & Migration Strategy

### Backward Compatibility

- **No API breaking changes**: `CreateSandboxRequest` schema is unchanged
- **No SDK changes**: Existing SDK code works as-is
- **Default behavior unchanged**: Without `[secure_runtime]` config, sandboxes use standard runc
- **Existing configurations work**: The new `[secure_runtime]` section is optional

### Migration Path

1. **Phase 1**: Install and configure secure runtime on infrastructure (Docker daemon or K8s RuntimeClass)
2. **Phase 2**: Add `[secure_runtime]` section to server configuration
3. **Phase 3**: Restart server — all sandboxes now use the secure runtime
4. No SDK or application code changes required at any phase

### Documentation Updates

- Add infrastructure setup guide for gVisor/Kata/Firecracker
- Add server configuration reference for `[secure_runtime]`
- Add troubleshooting guide for runtime compatibility issues


================================================
FILE: oseps/0005-client-side-sandbox-pool.md
================================================
---
title: Client-Side Sandbox Pool
authors:
  - "@ninan"
creation-date: 2026-03-02
last-updated: 2026-03-06
status: implementing
---

# OSEP-0005: Client-Side Sandbox Pool

<!-- toc -->
- [Summary](#summary)
- [Motivation](#motivation)
  - [Goals](#goals)
  - [Non-Goals](#non-goals)
- [Requirements](#requirements)
- [Proposal](#proposal)
  - [Functional Boundaries](#functional-boundaries)
  - [Notes/Constraints/Caveats](#notesconstraintscaveats)
  - [Risks and Mitigations](#risks-and-mitigations)
- [Design Details](#design-details)
  - [Design Reading Guide](#design-reading-guide)
  - [Terminology](#terminology)
  - [Class Model](#class-model)
  - [Public API](#public-api)
  - [Core Model: Properties and Constraints](#core-model-properties-and-constraints)
  - [Configuration](#configuration)
  - [State Store Abstraction](#state-store-abstraction)
  - [Pool and Sandbox Lifecycle](#pool-and-sandbox-lifecycle)
    - [Lifecycle operation pseudocode](#lifecycle-operation-pseudocode)
  - [Acquire Flow and Method Semantics](#acquire-flow-and-method-semantics)
    - [Acquire pseudocode](#acquire-pseudocode)
    - [Acquire sequence (simplified)](#acquire-sequence-simplified)
  - [Reconcile Loop](#reconcile-loop)
    - [Reconcile pseudocode](#reconcile-pseudocode)
    - [Reconcile sequence (simplified)](#reconcile-sequence-simplified)
  - [Failure Handling and Recovery](#failure-handling-and-recovery)
    - [Failure and backoff pseudocode](#failure-and-backoff-pseudocode)
  - [Observability](#observability)
  - [Compatibility and Evolution](#compatibility-and-evolution)
- [Test Plan](#test-plan)
- [Drawbacks](#drawbacks)
- [Alternatives](#alternatives)
- [Infrastructure Needed](#infrastructure-needed)
- [Upgrade & Migration Strategy](#upgrade--migration-strategy)
<!-- /toc -->

## Summary

This proposal introduces a client-side `SandboxPool` in the SDK for acquiring
ready sandboxes with predictable latency. The pool is an SDK-local component,
strictly decoupled from runtime-side pooling and infrastructure internals.

Pool-managed sandboxes are created through standard lifecycle create APIs.
Idle records use a fixed key TTL of 24h in the state store and are naturally
evicted on expiry. Callers can specify sandbox timeout duration at `acquire`
time.

Sandboxes are still treated as ephemeral and non-reusable. The pool only
maintains an idle buffer target; runtime remains the source of truth for hard
resource limits.

## Motivation

Per-request sandbox creation introduces avoidable cold-start cost. A client-side
reserve of clean, ready sandboxes improves first-byte latency while preserving a
clear caller-owned capacity model.

### Goals

- Define a first-class SDK abstraction for idle-buffer sandbox pooling.
- Provide clear and deterministic acquire behavior when idle is available or empty.
- Unify single-node and distributed modes behind one storage interface.
- Keep runtime coupling out of pool control logic.
- Preserve compatibility with existing SDK usage.
- Make caller responsibility explicit for cost and fallback strategy.

### Non-Goals

- Introducing or modifying runtime-side pool implementations.
- Auto-discovering backend resource limits from runtime/infrastructure.
- Guaranteeing zero cold starts under unlimited burst.
- Coupling pool behavior to Kubernetes, Docker, or any specific backend.
- Shipping a built-in opinionated distributed backend (e.g., Redis/etcd/SQL).
- Building strict global capacity accounting in SDK.

## Requirements

- Must work using only existing lifecycle APIs.
- Must not assume runtime-specific capabilities.
- Must not require lifecycle OpenAPI schema changes.
- Must expose deterministic behavior when idle buffer is empty.
- Must keep config explicit and caller-controlled.
- Must expose pool health, counters, and acquire latency metrics.

## Proposal

Add SDK-level `SandboxPool` that pre-creates and manages a target idle buffer
of clean, borrowable sandboxes.

Callers:
- `acquire` a sandbox,
- optionally provide `sandboxTimeout` for the acquired sandbox,
- use the sandbox,
- terminate sandbox via existing `sandbox.kill()` when done.

The pool is treated as a purely client-layer construct:

- No runtime coupling in control logic.
- No runtime-specific optimization assumptions.
- No hidden server-side autoscaling behavior.

Idle buffering is caller-owned and best-effort:
- `maxIdle` is a standby target/cap (not strict guarantee).
- Runtime enforces hard resource/quota limits.

Create compatibility:
- Pool create paths use existing lifecycle create APIs directly.
- Pool does not require any special extension key/value convention.

### Functional Boundaries

This OSEP explicitly defines the following boundaries:

- **In scope**
  - SDK-side model, APIs, and control loop.
  - Deterministic pool behavior under normal and degraded conditions.
  - Idle-buffer management for clean, ready sandboxes.
  - A pluggable state-store interface used by both single-node and distributed modes.
- **Out of scope**
  - Runtime-side scheduler policy.
  - Backend capacity introspection.
  - Any specific distributed datastore implementation bundled by default.

### Notes/Constraints/Caveats

- Runtime-level pooling may coexist but is irrelevant to this SDK model.
- Sandboxes are ephemeral and non-reusable after use.
- Runtime is authoritative for capacity limits; SDK pool does not enforce global hard caps.

### Risks and Mitigations

- Risk: Frequent empty-idle events under burst traffic.
  Mitigation: configurable empty behavior (`DIRECT_CREATE` or `FAIL_FAST`) and metrics.
- Risk: Backend state/lifecycle changes break assumptions.
  Mitigation: connect-on-acquire validation, stale-id cleanup, and adapter-based
  state handling.
- Risk: Multi-process replenish may issue duplicate create attempts.
  Mitigation: distributed primary-lock ownership, idempotent store operations,
  backoff, and runtime-side quota protection.

## Design Details

### Design Reading Guide

Recommended reading order for implementation and review:

1. **Class Model + Public API**: understand responsibilities and entrypoints.
2. **State Store Abstraction**: lock down single-node/distributed correctness contracts.
3. **Acquire Flow**: understand foreground request behavior and deterministic outcomes.
4. **Reconcile Loop**: understand background convergence and recovery behavior.
5. **Failure Handling**: verify retry/degrade/backoff behavior and caller actions.

### Terminology

- **Idle sandbox**: healthy sandbox ID currently available for borrow.
- **Authoritative store**: the single source of truth for idle membership.
- **Best-effort maxIdle**: convergence target, not a strict availability guarantee.
- **Leader (Primary)**: current lock owner for one `poolName`; allowed to run
  reconcile maintenance write paths.
- **Follower (Non-Leader)**: node that does not currently hold leader lock.

### Class Model

```mermaid
classDiagram
    class SandboxPool {
      <<interface>>
      +start()
      +acquire(sandboxTimeout, policy) Sandbox
      +resize(maxIdle)
      +snapshot() PoolSnapshot
      +shutdown(graceful)
    }

    class DefaultSandboxPool {
      -config PoolConfig
      -reconciler PoolReconciler
      -stateStore PoolStateStore
      +start()
      +acquire(sandboxTimeout, policy) Sandbox
      +resize(maxIdle)
      +snapshot() PoolSnapshot
      +shutdown(graceful)
    }

    class Sandbox {
      +sandboxId String
    }

    class PoolConfig {
      +poolName String
      +ownerId String
      +maxIdle Int
      +warmupConcurrency Int
      +primaryLockTtl Duration
      +emptyBehavior EmptyBehavior
      +stateStore PoolStateStore
    }

    class PoolStateStore {
      <<interface>>
      +tryTakeIdle(poolName) String?
      +putIdle(poolName, sandboxId)
      +removeIdle(poolName, sandboxId)
      +tryAcquirePrimaryLock(poolName, ownerId, ttl) bool
      +renewPrimaryLock(poolName, ownerId, ttl) bool
      +releasePrimaryLock(poolName, ownerId)
      +reapExpiredIdle(poolName, now)
      +snapshotCounters(poolName) StoreCounters
    }

    class PoolSnapshot {
      +state PoolState
      +idleCount Int
      +lastError String
    }

    class PoolReconciler {
      +reconcileTick()
      +runPrimaryReplenishOnce()
      +retireExpiredIdle()
      +applyBackoff()
    }

    class AcquirePolicy {
      <<enumeration>>
      FAIL_FAST
      DIRECT_CREATE
    }

    class EmptyBehavior {
      <<enumeration>>
      FAIL_FAST
      DIRECT_CREATE
    }

    class PoolState {
      <<enumeration>>
      HEALTHY
      DEGRADED
      DRAINING
      STOPPED
    }

    SandboxPool <|.. DefaultSandboxPool
    DefaultSandboxPool --> PoolConfig : uses
    DefaultSandboxPool --> PoolReconciler : owns
    DefaultSandboxPool --> PoolSnapshot : returns
    DefaultSandboxPool --> Sandbox : returns
    DefaultSandboxPool --> AcquirePolicy : parameter
    DefaultSandboxPool --> PoolStateStore : persists state
    PoolSnapshot --> PoolState : includes
```

### Public API

Language-neutral contract (normative semantics, not tied to any SDK syntax):

```text
SandboxPool
  - start()
  - acquire(sandboxTimeout?, policy=DIRECT_CREATE) -> Sandbox
  - resize(maxIdle)
  - snapshot() -> PoolSnapshot
  - shutdown(graceful=true)

AcquirePolicy
  - FAIL_FAST
  - DIRECT_CREATE

PoolStateStore
  - tryTakeIdle(poolName) -> sandboxId?
  - putIdle(poolName, sandboxId)
  - removeIdle(poolName, sandboxId)
  - tryAcquirePrimaryLock(poolName, ownerId, ttl) -> bool
  - renewPrimaryLock(poolName, ownerId, ttl) -> bool
  - releasePrimaryLock(poolName, ownerId)
  - reapExpiredIdle(poolName, now)
```

Method intent:
- `acquire`: primary pool operation; it takes/creates a sandbox ID internally and
  returns a connected sandbox instance (`Sandbox` in host SDK terms).
- `PoolStateStore`: stores only IDs and pool coordination state; it must not store
  language runtime sandbox objects.
- `runPrimaryReplenishOnce` (internal): primary-only maintenance write path;
  independent from caller-facing `acquire` flow.

### Core Model: Properties and Constraints

Model entities:
- **Sandbox**: connected sandbox client object created from `sandboxId` on demand.
- **Sandbox ID**: canonical identity managed by pool and store.
- **Idle reserve**: clean and borrowable sandboxes only.

Constraints:
- Soft target: pool tries to keep `idle` near `maxIdle`
- Idle eligibility is validated at `acquire` connection time; stale IDs are
  removed and fallback to direct create is applied.
- Runtime authority: hard capacity/quota is enforced by runtime, not by SDK pool.

Counter transition rules:
- `acquire` from idle: `idle - 1`
- `replenish create success`: `idle + 1` (after persisted to `PoolStateStore`)
- `idle retire`: `idle - 1`

### Configuration

Configuration keys:
- `poolName` (required): user-defined readable name and namespace key for this logical pool.
- `ownerId` (required in distributed mode): unique process identity used for primary lock ownership.
- `maxIdle` (required): standby idle target/cap.
- `warmupConcurrency` (optional): max concurrent creation workers.
- `primaryLockTtl` (optional): lock TTL for distributed primary ownership.
- `emptyBehavior` (optional): behavior when idle buffer is empty (`DIRECT_CREATE` or `FAIL_FAST`).
- `stateStore` (required): injected implementation of `PoolStateStore`.

Default derivation (when omitted):
- `warmupConcurrency = max(1, ceil(maxIdle * 0.2))`
- `primaryLockTtl` should be larger than one reconcile tick interval.
- `idleTtl` is fixed at 24h (non-configurable in V1).
- `emptyBehavior = DIRECT_CREATE` (default). Caller may explicitly set
  `FAIL_FAST` for fail-fast semantics.
- `putIdle` may use an implementation-defined safety margin and write
  `effectiveIdleTtl = idleTtl - ttlSafetyMargin`; `effectiveIdleTtl` should stay
  greater than one reconcile tick interval.
- caller-provided numeric values override defaults for configurable keys.

### State Store Abstraction

The SDK pool logic is implementation-invariant and always uses a `PoolStateStore`
interface. Deployment mode is decided by which implementation is injected:

- `InMemoryPoolStateStore`: single-node/local mode.
- User-provided remote datastore implementation: distributed mode.

Contract semantics (normative):
- Pool scoping: all operations are namespaced by `poolName`; no cross-pool leakage.
- Atomic take: one idle sandbox can only be taken by one acquire operation.
- Idempotent put/remove operations for idle membership.
- Ordering: `tryTakeIdle` should prefer FIFO (oldest idle first) as a
  best-effort implementation goal. Strict FIFO is not required across all
  backends.
- Snapshot consistency at least eventually consistent for counters.

Lock semantics (normative):
- Primary lock semantics for distributed safety:
  - Only the current leader lock holder may execute **reconcile maintenance**
    writes (`putIdle`, `reapExpiredIdle`).
  - Foreground acquire-path write (`tryTakeIdle`) is allowed on **all** nodes,
    including leader and followers.
  - `removeIdle` on stale-id cleanup is an acquire-path cleanup write and is
    allowed on all nodes.
  - Lock ownership must be time-bounded (`ttl`) and renewable by owner only.
  - `tryAcquirePrimaryLock` is best-effort mutually exclusive by `poolName`.
  - Lock loss must cause immediate stop of replenish attempts on that node.

Idle TTL semantics (normative):
  - Idle entries are written with logical `idleTtl=24h`.
  - Store may apply a small `ttlSafetyMargin` when writing keys, as long as
    `effectiveIdleTtl > reconcileTickInterval`.
  - Distributed stores should rely on backend TTL expiry.
  - Single-node in-memory store must track `expiresAt` and evict expired entries
    via lazy-on-acquire and periodic sweep.
  - `reapExpiredIdle` is a unified store hook invoked by reconcile:
    - In-memory store: performs active sweep.
    - TTL-capable distributed store: may be no-op.
- Store data model scope:
  - Store persists only `sandboxId` and idle/lock coordination metadata.
  - Store must not require serialization of SDK language objects.

Implementation-owned settings:
- Any optional coordination/locking policy for distributed replenish is managed
  by each `PoolStateStore` implementation, not top-level `SandboxPool` config keys.

This keeps SDK behavior unified across modes while avoiding coupling to any
specific distributed system.

Distributed role boundary (normative):

| Responsibility area | Leader (lock owner) | Follower (non-leader) |
|---|---|---|
| Foreground `acquire` (`tryTakeIdle`) | Allowed | Allowed |
| Foreground stale-id cleanup (`removeIdle`) | Allowed | Allowed |
| Direct-create fallback in `acquire` | Allowed | Allowed |
| Reconcile replenish (`createSandbox` + `putIdle`) | Allowed | Not allowed |
| Reconcile TTL reap (`reapExpiredIdle`) | Allowed | Not allowed |
| Lock renew/release for reconcile ownership | Allowed | Not allowed (must fail/reject) |

Rule of thumb:
- Leader is a **background maintenance role**, not a request-routing role.
- Leader must continue serving foreground acquires exactly like any other node.
- Losing leader lock only stops reconcile maintenance on that node; it must not
  stop foreground acquire handling.

Pool naming rules:
- `poolName` is user-defined and human-readable.
- `poolName` must be stable for one logical pool lifecycle.
- Different business pools must use different `poolName` values.

#### PoolStateStore compliance matrix (required)

User-provided distributed stores must pass the following contract checks before
being considered production-ready:

| Contract area | Scenario | Expected result |
|---|---|---|
| Atomic idle take | Two concurrent `tryTakeIdle` requests target one idle `sandboxId` | Exactly one caller succeeds; the other receives empty result |
| Idempotent put | Duplicate `putIdle(poolName, sandboxId)` retries | Idle membership remains single-copy; counters do not overcount |
| Idempotent remove | Duplicate `removeIdle(poolName, sandboxId)` retries | Operation remains successful/no-op on second attempt |
| FIFO preference | Multiple idle entries with different insertion times | `tryTakeIdle` returns oldest-first as best effort (strict global FIFO not required) |
| Primary lock acquire | Multiple nodes call `tryAcquirePrimaryLock` concurrently | At most one node becomes current primary for that `poolName` window |
| Primary lock renew | Non-owner tries `renewPrimaryLock` | Renew is rejected; ownership is unchanged |
| Primary lock failover | Current primary crashes and lock TTL expires | Another node can acquire lock and continue replenish |
| Idle TTL expiry | Idle entry reaches 24h TTL | Entry is no longer borrowable and is removed/expired |
| Reconcile write ownership | Non-leader tries `putIdle` from reconcile path | Write is rejected (must not be applied) |
| Pool isolation | Same `sandboxId` key pattern used across different `poolName` values | No cross-pool take/remove visibility |
| Eventual counters | Mixed put/take/create/fail under load | `snapshotCounters` converges to actual membership within implementation SLA |

Implementation note:
- The SDK should provide a reusable compliance test suite that runs the above
  scenarios against any `PoolStateStore` implementation.

### Pool and Sandbox Lifecycle

Pool lifecycle:

```mermaid
stateDiagram-v2
    [*] --> Created
    Created --> Starting: start()
    Starting --> Running
    Running --> Draining: shutdown(graceful=true)
    Running --> Stopped: shutdown(graceful=false)
    Draining --> Stopped
    Stopped --> [*]
```

#### Lifecycle operation pseudocode

```text
function start(pool):
  if pool.state in [RUNNING, STARTING]:
    return
  pool.state = STARTING
  spawn reconcile worker (periodic tick)
  if pool.config.maxIdle > 0:
    trigger immediate reconcile tick for warmup
  pool.state = RUNNING

function resize(pool, newMaxIdle):
  validate newMaxIdle >= 0
  pool.config.maxIdle = newMaxIdle
  trigger reconcile tick (do not block caller on convergence)

function shutdown(pool, graceful=true):
  stop accepting new acquire requests
  if !graceful:
    stop reconcile worker immediately
    pool.state = STOPPED
    return

  pool.state = DRAINING
  stop reconcile worker
  // no force-return path: borrowed sandboxes remain caller-owned
  wait until in-flight pool operations finish or drainTimeout reached
  pool.state = STOPPED
```

Sandbox state model:

This is a runtime-facing reference model used by pool logic. It is descriptive,
not a strict SDK-owned lifecycle contract.

```mermaid
stateDiagram-v2
    [*] --> Creating
    Creating --> Ready: health check pass
    Creating --> Terminated: create/check failed
    Ready --> InUse: acquire
    InUse --> Terminated: sandbox.kill() or timeout
    InUse --> Terminated: unrecoverable runtime failure
    Ready --> Retiring: idle ttl exceeded
    Retiring --> Terminated
    Terminated --> [*]
```

### Acquire Flow and Method Semantics

`acquire` flow:

Diagram note: this flowchart is an overview. Normative behavior is defined by
the pseudocode and method semantics below.

```mermaid
flowchart TD
    A[Acquire request] --> B{Idle sandboxId available?}
    B -- yes --> C[Atomically take idle sandboxId]
    C --> C1{Connect succeeds?}
    C1 -- yes --> C2[Return connected sandbox instance]
    C1 -- no --> C3[Remove stale idle id and try direct create]
    B -- no --> D{Acquire policy}

    D -- FAIL_FAST --> E["Return SandboxException(code=POOL_EMPTY)"]
    D -- DIRECT_CREATE --> I[Attempt direct create -> connect -> optional renew]
    C3 --> I
    I --> J{Success?}
    J -- yes --> K[Return connected sandbox instance]
    J -- no --> H["Return original create/connect error"]
```

Method semantics:
- `acquire`: returns a connected sandbox instance. Internally it first tries atomic idle-take
  by `sandboxId`, validates by connect, cleans stale IDs on connect failure, then
  applies empty behavior (`DIRECT_CREATE` default, or `FAIL_FAST` if configured).
  It may apply `sandboxTimeout` by calling lifecycle `renew`.

#### Acquire pseudocode (normative)

```text
function acquire(pool, sandboxTimeout, policy):
  sandboxId = stateStore.tryTakeIdle(pool.config.poolName) // atomic
  if sandboxId != null:
    try:
      handle = lifecycle.connectById(sandboxId) // host SDK's connect equivalent
      if sandboxTimeout != null:
        lifecycle.renew(sandboxId, sandboxTimeout) // throw original timeout/renew error on failure
      return handle
    catch e:
      // small-probability stale idle (killed externally/runtime reclaimed)
      // best-effort cleanup then fallback cold start
      stateStore.removeIdle(pool.config.poolName, sandboxId)
      lifecycle.tryKill(sandboxId)

  if policy == FAIL_FAST:
    throw SandboxException(code=POOL_EMPTY)

  // direct create uses standard create with 24h idle-style timeout.
  // create/connect failure handling and cleanup reuse existing lifecycle logic.
  createdId = lifecycle.createSandbox(timeout=24h)
  createdHandle = lifecycle.connectById(createdId)
  if sandboxTimeout != null:
    lifecycle.renew(createdId, sandboxTimeout)
  return createdHandle
```

#### Acquire sequence (simplified, informative)

```mermaid
sequenceDiagram
    participant Caller
    participant Pool as SandboxPool
    participant Store as PoolStateStore
    participant API as Lifecycle API

    Caller->>Pool: acquire(timeout, policy)
    Pool->>Store: tryTakeIdle(poolName)
    alt idle hit
        Store-->>Pool: sandboxId
        Pool->>API: connect(sandboxId)
        alt connect ok
            API-->>Pool: connected
            Pool-->>Caller: Sandbox
        else connect failed
            API-->>Pool: failed
            Pool->>Store: removeIdle(poolName, sandboxId)
            Pool->>API: create sandbox(timeout=24h)
            API-->>Pool: sandboxId / failure
            Pool->>API: connect(createdId)
            Pool->>API: renew(createdId, sandboxTimeout?) 
            API-->>Pool: connected / failed
            Pool-->>Caller: Sandbox or original create/connect error
        end
    else idle miss + FAIL_FAST
        Store-->>Pool: null
        Pool-->>Caller: POOL_EMPTY
    else idle miss + DIRECT_CREATE
        Store-->>Pool: null
        Pool->>API: create sandbox(timeout=24h)
        API-->>Pool: sandboxId / failure
        Pool->>API: connect(createdId)
        Pool->>API: renew(createdId, sandboxTimeout?)
        API-->>Pool: connected / failed
        Pool-->>Caller: Sandbox or original create/connect error
    end
```

Kill-only model:
- Pool does not expose return/finalize APIs.
- Caller ends sandbox lifecycle via existing `sandbox.kill()` (or runtime timeout).
- Pool does not track borrowed sandbox terminal state as a hard capacity source of truth.

Important behavior:
- Borrowing from idle at `idle == maxIdle` is expected and correct.
- Runtime capacity/quota remains authoritative under burst.
- 24h idle-key TTL reduces stale-id probability but does not guarantee runtime
  state is still `Running`; acquire handles this small-probability case by
  cleaning stale id and degrading to direct create.

### Reconcile Loop

The pool runs a background reconcile loop that fires on a periodic tick. Each
tick drives through four ordered phases:

Diagram note: this flowchart is an overview. Normative behavior is defined by
the pseudocode below.

```mermaid
flowchart TD
    A[Reconcile tick] --> B["Snapshot counters: idle"]
    B --> C["Rely on key TTL expiry (fixed 24h); optional local sweep in in-memory store"]
    C --> E["Compute deficit:
        target = maxIdle
        deficit = target − idle"]

    E --> F{deficit > 0?}
    F -- no --> J[Assess health]
    F -- yes --> G{In backoff?}
    G -- yes --> J
    G -- no --> H["Create min(deficit, warmupConcurrency) sandboxes"]
    H --> I{Create outcome}
    I -- all OK --> I1["New sandboxes → Ready → idle reserve
        idle ▲ — clear failure counter"]
    I -- partial / fail --> I2["Failed creates recorded
        increment failure counter"]
    I1 --> J
    I2 --> J

    J --> K{Consecutive failures > threshold?}
    K -- yes --> M["Pool state → DEGRADED
        Apply exponential backoff"]
    K -- "no — was DEGRADED" --> N["Pool state → HEALTHY
        Clear backoff"]
    K -- "no — already HEALTHY" --> O[No state change]
    M --> P[Schedule next tick]
    N --> P
    O --> P
```

#### Reconcile pseudocode (normative)

```text
function reconcileTick(poolName, cfg, now):
  // leader-gated scheduler: only current leader may run reconcile maintenance writes
  if !stateStore.tryAcquirePrimaryLock(poolName, cfg.ownerId, ttl=cfg.primaryLockTtl):
    return

  try:
    runPrimaryReplenishOnce(poolName, cfg, now)
  finally:
    stateStore.releasePrimaryLock(poolName, cfg.ownerId)

function runPrimaryReplenishOnce(poolName, cfg, now):
  // 1) idle keys use fixed 24h TTL and expire naturally in TTL-capable stores
  //    in-memory store may run local sweep/lazy eviction
  stateStore.reapExpiredIdle(poolName, now) // no-op allowed for TTL-capable backends
  counters = stateStore.snapshotCounters(poolName) // idle...

  // 2) replenish toward maxIdle, bounded by warmupConcurrency
  deficit = max(0, cfg.maxIdle - counters.idle)
  toCreate = min(deficit, cfg.warmupConcurrency)
  if toCreate == 0 or backoff.active():
    stateStore.renewPrimaryLock(poolName, cfg.ownerId, ttl=cfg.primaryLockTtl)
    return

  repeat toCreate times:
    if !stateStore.renewPrimaryLock(poolName, cfg.ownerId, ttl=cfg.primaryLockTtl):
      break // lock lost; stop creating immediately
    try:
      newId = lifecycle.createSandbox(timeout=24h)
      stateStore.putIdle(poolName, newId)
    catch e:
      recordFailureAndMaybeBackoff(e)
```

#### Reconcile sequence (simplified, informative)

```mermaid
sequenceDiagram
    participant Reconciler as PoolReconciler
    participant Store as PoolStateStore
    participant API as Lifecycle API

    Reconciler->>Store: try acquire leader lock
    alt lock not acquired
        Store-->>Reconciler: false
        Reconciler-->>Reconciler: skip this tick
    else lock acquired
        Store-->>Reconciler: true
    end
    Reconciler-->>Reconciler: run replenish once
    Reconciler->>Store: reap expired idle
    Reconciler->>Store: snapshot counters

    loop create up to min(deficit, warmupConcurrency)
        Reconciler->>Store: renew leader lock
        Reconciler->>API: create sandbox with 24h timeout
        API-->>Reconciler: sandboxId / failure
        Reconciler->>Store: put idle on success
    end
    Reconciler->>Store: release leader lock
```

Reconcile policy notes:
- Replenishment is background work to restore standby reserve.
- Under high foreground demand or runtime quota pressure, idle may drain below
  `maxIdle`; this is expected.
- In distributed mode, replenish is leader-gated: only the current leader lock
  holder performs reconcile maintenance create paths for a given `poolName`.
- Nodes that fail to acquire/renew primary lock skip replenish on that tick and
  retry lock acquisition on subsequent reconcile ticks.
- Caller-facing `acquire` path remains independent and is served by all nodes
  (leader included); it does not require leader ownership.
- Source of truth:
  - Single-node mode: in-memory state store is authoritative.
  - Distributed mode: centralized state store is authoritative.
- `PoolReconciler` never mutates state directly; all state changes go through
  `PoolStateStore`.

**Pool health state transitions:**

| From | To | Trigger |
|------|----|---------|
| `HEALTHY` | `DEGRADED` | Consecutive create failures exceed threshold |
| `DEGRADED` | `HEALTHY` | Probe or create succeeds, failure counter resets |
| `HEALTHY` / `DEGRADED` | `DRAINING` | `shutdown(graceful=true)` called |
| any | `STOPPED` | `shutdown(graceful=false)` or drain completes |

When `DEGRADED`, the reconciler applies exponential backoff to create attempts,
preventing cascading pressure on a failing backend while continuing to serve
from existing idle sandboxes (validated by connect-on-acquire).

### Failure Handling and Recovery

Expected deterministic outcomes:

- `FAIL_FAST`: no idle sandbox available -> `SandboxException(code=POOL_EMPTY)`.
- `DIRECT_CREATE`: no idle -> attempt direct create; create/connect failure ->
  propagate original lifecycle error code.
- `sandboxTimeout` application fails ->
  propagate original lifecycle timeout/apply error code.
- Backend quota/capacity errors -> typed create failures, no silent fallback.
- Empty idle + repeated replenish failure -> degraded pool with user-configured
  fallback (`DIRECT_CREATE` or `FAIL_FAST`).
- Idle connect failure on acquire -> remove stale idle ID and fallback to direct create.
- State-store contention on idle-take/put -> retry with bounded backoff.
- State-store unavailability -> degrade to policy-defined empty behavior.

Error-model alignment:
- SDK should surface pool failures through existing `SandboxException` hierarchy.
- Pool-specific error codes should be minimal and used only for pool-owned
  deterministic states (for example `POOL_EMPTY` under `FAIL_FAST`).
- Lifecycle create/connect/timeout failures should propagate original SDK/server
  error codes rather than being remapped into pool-only codes.

Minimal error-code contract (normative):

1. Pool may emit pool-specific codes only for pool-owned deterministic outcomes
   that lifecycle APIs cannot represent (for example `POOL_EMPTY`).
2. Pool must not wrap or remap lifecycle create errors.
3. Pool must not wrap or remap lifecycle connect errors.
4. Pool must not wrap or remap lifecycle timeout-apply/renew errors.
5. If pool performs best-effort cleanup (`removeIdle`, `tryKill`) after failure,
   cleanup errors must not replace the original lifecycle error returned to caller.
6. Store-layer failures may use pool/store-specific codes when no existing
   lifecycle error is applicable.

Error code action matrix:

| `error.code` | Typical trigger | Retryable | Caller action |
|---|---|---|---|
| `POOL_EMPTY` | `acquire` with `FAIL_FAST` and no idle sandbox available | No (for same call) | Fail request fast or retry later according to business SLA |
| `<existing lifecycle error codes>` | Direct create/connect/timeout apply path fails | Depends on specific error | Reuse existing caller retry/degrade policy for lifecycle errors |
| `POOL_STATE_STORE_UNAVAILABLE` | Store unavailable during idle take/put/lock operations | Yes | Apply bounded retry; if exhausted, follow `emptyBehavior` fallback |
| `POOL_STATE_STORE_CONTENTION` | Atomic take or lock-update conflicts | Yes | Retry with bounded backoff and jitter |

#### Failure and backoff pseudocode

```text
function handleCreateFailure(pool, err):
  pool.failureCount += 1
  emitCounter("create_failure_total", tags={code: classify(err)})
  if pool.failureCount > pool.config.degradedThreshold:
    pool.state = DEGRADED
    backoff.bump() // exponential: min(maxBackoff, base * 2^n)

function handleCreateSuccess(pool):
  pool.failureCount = 0
  if pool.state == DEGRADED:
    pool.state = HEALTHY
  backoff.reset()

function withStateStoreRetry(op):
  for attempt in 1..maxStoreRetries:
    try:
      return op()
    catch e if isContention(e) or isTransientStoreError(e):
      sleep(jitteredBackoff(attempt))
  throw SandboxException(code=POOL_STATE_STORE_UNAVAILABLE)
```

Recovery model:
- On repeated create failures: move to `DEGRADED`.
- Use exponential backoff for create/replenish attempts.
- Keep serving from existing idle when possible (validated by connect-on-acquire).
- Return to `HEALTHY` after successful probes/creates.

```mermaid
sequenceDiagram
    participant Pool
    participant API as Lifecycle API
    Pool->>API: create sandbox
    API-->>Pool: 5xx / quota error
    Pool->>Pool: mark DEGRADED + backoff
    loop retry with backoff
        Pool->>API: health check / create probe
        API-->>Pool: still failing
    end
    Pool->>API: probe
    API-->>Pool: success
    Pool->>Pool: clear DEGRADED, resume replenish
```

### Observability

Metrics and logs are emitted at SDK layer:

- Gauges: `pool_idle`.
- Timers: `acquire_latency_ms`, `create_latency_ms`.
- Counters: `pool_exhausted_total`, `create_failure_total`, `direct_create_total`, `direct_create_failure_total`.
- Structured logs include `pool_name`, `sandbox_id`, acquire policy, and state transitions.

### Compatibility and Evolution

- Existing `Sandbox.builder()` and `SandboxManager` flows remain unchanged.
- Pool feature is opt-in and additive.
- Single-node and distributed modes share the same SDK pool control logic and API.
- Mode selection is implementation-driven via `PoolStateStore` injection.
- SDK does not prescribe or bundle a specific distributed datastore backend.
- All store records and coordination are isolated by `poolName`.
- Runtime remains authoritative for hard capacity and quota limits.
- State handling is forward-compatible: unknown backend lifecycle states are treated
  conservatively (fallback to direct create on connect failure).
- Pool adapts through lifecycle adapters rather than runtime-specific paths.

## Test Plan

Test plan includes:

- Unit tests for state transitions and idle-buffer semantics.
- Concurrency tests for `acquire` and replenish races under empty-idle conditions.
- State-store contract tests (atomic idle-take, idempotent put/remove, pool scoping).
- Reference in-memory store tests and user-store compliance test suite.
- Idle TTL tests: fixed 24h expiry behavior for distributed TTL-backed stores and
  in-memory `expiresAt` sweep/lazy eviction.
- Acquire fallback tests: idle connect failure triggers stale-id cleanup and
  direct-create fallback path.
- Replenish boundedness tests: leader-only create path respects `warmupConcurrency`
  and allows small best-effort overshoot under concurrent acquire/reconcile races.
- Fault-injection tests for backend creation failures and timeouts.
- Integration tests in local and remote environments.
- Compatibility tests for non-pool SDK usage.
- Soak tests for leak/retire correctness.

## Drawbacks

- Additional SDK complexity and maintenance overhead.
- More caller-facing tuning knobs that can be misconfigured.
- No implicit protection from backend quota misalignment.

## Alternatives

- Keep per-request sandbox creation only.
- Build runtime-side pool controls into server APIs.
- Provide best-effort caching without explicit acquire policies.

## Infrastructure Needed

No new mandatory infrastructure is required. Optional benchmark and soak-test
environments are recommended for tuning default pool parameters.

## Upgrade & Migration Strategy

- Backward compatible: existing SDK usage remains unchanged.
- Pooling introduced as opt-in API.
- Start with conservative defaults and iterative tuning guidance.


================================================
FILE: oseps/0006-developer-console.md
================================================
---
title: Developer Console for Sandbox Operations
authors:
  - "@divyamagrawal06"
creation-date: 2026-03-05
last-updated: 2026-03-06
status: implementable
---

# OSEP-0006: Developer Console for Sandbox Operations with Phased Auth Model

<!-- toc -->

- [Summary](#summary)
- [Motivation](#motivation)
  - [Goals](#goals)
  - [Non-Goals](#non-goals)
- [Requirements](#requirements)
- [Proposal](#proposal)
  - [Notes/Constraints/Caveats](#notesconstraintscaveats)
  - [Risks and Mitigations](#risks-and-mitigations)
- [Design Details](#design-details)
  - [Current State](#current-state)
  - [Phase 1 (MVP): Console + Server-Side RBAC Without DB](#phase-1-mvp-console--server-side-rbac-without-db)
  - [Phase 2: OIDC/JWT + PostgreSQL RBAC and Audit](#phase-2-oidcjwt--postgresql-rbac-and-audit)
  - [Role and Permission Model](#role-and-permission-model)
  - [Ownership and Team Scoping Without Database](#ownership-and-team-scoping-without-database)
  - [Server Changes](#server-changes)
  - [Console Application Design](#console-application-design)
  - [API and Spec Changes](#api-and-spec-changes)
  - [Operational Rollout](#operational-rollout)
- [Test Plan](#test-plan)
- [Drawbacks](#drawbacks)
- [Alternatives](#alternatives)
- [Infrastructure Needed](#infrastructure-needed)
- [Upgrade & Migration Strategy](#upgrade--migration-strategy)
<!-- /toc -->

## Summary

This proposal is based on [#348](https://github.com/alibaba/OpenSandbox/issues/348), which outlines the need for a Developer Console for sandbox lifecycle operations with a phased auth model.

The idea is to add a `console/` web app for day-to-day sandbox management (list, create, renew, delete, get endpoint, filtering) and a server-side auth/authz layer that does not break existing API key automation.

Phase 2:
OIDC JWT validation, PostgreSQL RBAC bindings, and durable audit logs.

## Motivation

Today OpenSandbox exposes lifecycle APIs and Swagger docs, but developers/operators still need to manage sandbox resources via APIs. This creates friction for common workflows (search/create/renew/delete), weakens governance in multi-user environments, and raises onboarding cost for teams that are not API-first.

- Server auth today is global API key only (`server/src/middleware/auth.py` with `OPEN-SANDBOX-API-KEY`).
- Lifecycle operations already exist and are stable (`server/src/api/lifecycle.py`, `specs/sandbox-lifecycle.yml`).
- Filtering by state/metadata already exists (`GET /sandboxes` and `matches_filter`).
- Sandbox metadata already maps to labels in Docker/Kubernetes services and is returned in list/get responses.

This means a console can be built on top of what exists without touching core runtime behavior.

### Goals

1. Add a standalone React app under `console/` for sandbox lifecycle operations.
2. Cover the MVP flows called out in [#348](https://github.com/alibaba/OpenSandbox/issues/348):
   - list and detail views
   - create sandbox from image + basic runtime options
   - renew expiration, delete sandbox, get endpoint
   - filtering by state and metadata
3. Enforce role boundaries server-side (not only hidden in UI):
   - `read_only` role for read operations
   - `operator` role for mutating operations
4. Keep existing API key automation and SDK behavior backward compatible.
5. Ensure browser clients never receive server API keys.
6. Ensure that it is feasible and easy to scale to Phase 2:
   - OIDC login and JWT validation in server
   - PostgreSQL-backed RBAC and durable audit events

### Non-Goals

Per the [issue discussion](https://github.com/alibaba/OpenSandbox/issues/348), the following are out of scope:

1. Billing or chargeback portal.
2. Approval workflows for every operation.
3. Replacing existing SDK/CLI/API workflows.
4. Changing Docker/Kubernetes runtime internals.
5. Full enterprise IAM policy language in the MVP.

## Requirements

| ID  | Requirement                                                  | Priority    |
| --- | ------------------------------------------------------------ | ----------- |
| R1  | Console users use core lifecycle operations from UI          | Must Have   |
| R2  | Role-based authorization on server for each lifecycle action | Must Have   |
| R3  | Existing `OPEN-SANDBOX-API-KEY` flow continues unchanged     | Must Have   |
| R4  | No server API key is exposed to browser code                 | Must Have   |
| R5  | Phase 1 works without introducing a database                 | Must Have   |
| R6  | Ownership/team scoping via existing metadata/labels          | Should Have |
| R7  | OIDC JWT validation and PostgreSQL RBAC/audit in Phase 2     | Should Have |

## Proposal

Following the phased strategy suggested in [#348](https://github.com/alibaba/OpenSandbox/issues/348) ("Ship MVP fast, no DB, validate usage and workflows"):

1. **Phase 1 (MVP)**:
   - Add a `console/` React app.
   - Add a user-auth path in server (config-gated) suitable for console access without API keys in browser.
   - Add authorization checks on lifecycle operations.
   - Add metadata-based scoping using reserved metadata keys for owner/team.
   - Emit audit logs for mutating operations.
   - No new database dependency.

2. **Phase 2 (Hardening)**:
   - Add OIDC JWT validation in server.
   - Add PostgreSQL tables for RBAC bindings and audit events.
   - Add richer operational UX (bulk safeguards, failure insights).

```mermaid
flowchart LR
    A[Developer Browser] --> B[console React App]
    B --> C[OpenSandbox Server]
    C --> D[Lifecycle Service Layer]
    D --> E[Docker/Kubernetes Runtime]
```

```mermaid
flowchart LR
    A[SDK/Automation] -->|OPEN-SANDBOX-API-KEY| C[OpenSandbox Server]
    B[Console User] -->|User Auth Path| C
    C --> D[AuthN + AuthZ Enforcement]
    D --> E[Lifecycle Operations]
```

### Notes/Constraints/Caveats

1. Metadata values must satisfy label constraints already enforced in `ensure_metadata_labels`; owner/team values require canonicalization.
2. Kubernetes runtime currently does not support pause/resume (`501`); console must reflect runtime capability.
3. API key requests remain privileged for backward compatibility.
4. Phase 1 audit is log-based (non-durable). Durable queryable audit is planned for Phase 2.

### Risks and Mitigations

| Risk                                                    | Impact                      | Mitigation                                                                       |
| ------------------------------------------------------- | --------------------------- | -------------------------------------------------------------------------------- |
| Scope creep from simple console into full control plane | Delivery delay              | Strict phase gates; MVP only core operations                                     |
| Header-spoofing if pre-auth mode is misconfigured       | Security                    | Config-gated user auth mode, trusted deployment guidance, Phase 2 JWT validation |
| Metadata-based scoping collisions                       | Authorization bugs          | Reserve keys for access control and enforce server-side overwrite rules          |
| Claim values incompatible with label format             | Provisioning/authz mismatch | Canonicalization to label-safe owner/team tokens                                 |
| Breaking API automation                                 | Adoption risk               | Keep API key path as-is; add compatibility tests                                 |
| Lack of durable audit in MVP                            | Governance gap              | Structured mutation logs in Phase 1 + Phase 2 audit table plan                   |

## Design Details

### Current State

Quick summary of the relevant server code as it stands today:

- Auth middleware: API key only (`server/src/middleware/auth.py`, header `OPEN-SANDBOX-API-KEY`).
- Lifecycle routes: `server/src/api/lifecycle.py`.
- Service implementations: `server/src/services/docker.py` (Docker), `server/src/services/k8s/kubernetes_service.py` (Kubernetes).
- Filtering: `state` and `metadata` filters in route parsing, `matches_filter` helper.
- Metadata: already stored as Docker/Kubernetes labels.

There is no database for RBAC or audit today.

### Phase 1 (MVP): Console + Server-Side RBAC Without DB

1. Standalone React + TypeScript app under `console/`.
2. Config-gated user-auth mode on the server (no API key in the browser).
3. Authorization checks in the lifecycle API path.
4. Reuse metadata labels for owner/team scoping.
5. Structured audit logs for mutations (create, delete, renew, etc.).

### Phase 2 (Hardening): OIDC/JWT + PostgreSQL RBAC and Audit

1. Validate OIDC-issued JWT in server (issuer, audience, signature/JWKS, exp/nbf).
2. Replace static role mapping with PostgreSQL RBAC bindings.
3. Persist mutation audit events in PostgreSQL.
4. Add query APIs for audit and governance.

### Role and Permission Model

Three roles, matching the separation called for in [#348](https://github.com/alibaba/OpenSandbox/issues/348):

- `read_only`: list/get/get endpoint.
- `operator`: read_only + create/renew/delete (+ pause/resume where runtime supports).
- `service_admin`: API key automation role with full access (compatibility role).

Here's a table for ref:

| Endpoint                                | read_only | operator | service_admin |
| --------------------------------------- | --------- | -------- | ------------- |
| `GET /sandboxes`                        | yes       | yes      | yes           |
| `GET /sandboxes/{id}`                   | yes       | yes      | yes           |
| `GET /sandboxes/{id}/endpoints/{port}`  | yes       | yes      | yes           |
| `POST /sandboxes`                       | no        | yes      | yes           |
| `POST /sandboxes/{id}/renew-expiration` | no        | yes      | yes           |
| `DELETE /sandboxes/{id}`                | no        | yes      | yes           |
| `POST /sandboxes/{id}/pause`            | no        | yes      | yes           |
| `POST /sandboxes/{id}/resume`           | no        | yes      | yes           |

### Ownership and Team Scoping Without Database

Phase 1 scope source:

- `metadata["access.owner"]`
- `metadata["access.team"]`

How it works:

1. On create, server injects/overwrites reserved scope metadata from authenticated principal.
2. Non-admin users can only act on resources within their owner/team scope.
3. `service_admin` bypasses scope checks.
4. Existing user-provided metadata remains supported, but reserved keys are server-controlled.

Canonicalization:

- Principal identifiers from user auth claims/headers are transformed into label-safe tokens (length and charset compatible with existing metadata-label validators).
- Canonicalization must be deterministic to keep scope matching stable across requests.

### Server Changes

#### 1. Configuration

Extend `server/src/config.py` with auth/authz sections.

`auth.mode` controls high-level authentication behavior:

- `"api_key_only"`: current behavior; only `OPEN-SANDBOX-API-KEY` auth is accepted.
- `"api_key_and_user"`: dual path; API key auth remains for SDK/automation, and user-authenticated requests are also accepted for Console access.

`user_mode` controls how user identity is extracted when `auth.mode = "api_key_and_user"`:

- Phase 1 supports `"trusted_header"` only.
- When `auth.mode = "api_key_only"`, `user_mode` is ignored.

```toml
[auth]
# Allowed values:
# - "api_key_only"
# - "api_key_and_user"
mode = "api_key_only"

# Used only when auth.mode = "api_key_and_user".
# Phase 1 supports "trusted_header".
user_mode = "trusted_header"

[auth.trusted_header]
# Used when user_mode = "trusted_header".
user_header = "X-OpenSandbox-User"
team_header = "X-OpenSandbox-Team"
roles_header = "X-OpenSandbox-Roles"

[authz]
default_role = "read_only"
owner_metadata_key = "access.owner"
team_metadata_key = "access.team"
operator_subjects = []
read_only_subjects = []
```

Trusted-header failure behavior (Phase 1):

1. Applies when `auth.mode = "api_key_and_user"` and `user_mode = "trusted_header"`.
2. Requests on the user-auth path that are missing required trusted identity headers are treated as unauthenticated and rejected with `401 Unauthorized`.
3. The server must NOT fall back to anonymous/default user access when trusted headers are missing.
4. The server must NOT silently switch to another auth path UNLESS that credential is explicitly provided (for example, `OPEN-SANDBOX-API-KEY` for API key auth).

Phase 2 adds:

```toml
[auth.oidc]
issuer = "https://accounts.google.com"           # or any OIDC provider
audience = "opensandbox-console"
jwks_url = "https://www.googleapis.com/oauth2/v3/certs"
```

#### 2. Authentication Middleware

Changes to `server/src/middleware/auth.py`:

1. Preserve current API key path exactly.
2. Add user principal extraction path (phase-gated by config).
3. Attach normalized principal to `request.state.principal`.
4. If trusted-header mode is active and required headers are missing, return `401 Unauthorized` (unauthenticated), not `403` (authenticated but forbidden).
5. Keep proxy path exemptions behavior unchanged for sandbox proxy route.

#### 3. Authorization Enforcement

New module `server/src/middleware/authorization.py` with a single entry point:

- `authorize_action(principal, action, sandbox=None)`.
- Scope checks for owner/team.

Integrate into `server/src/api/lifecycle.py` per route before invoking mutating service operations.

For list operations:
Apply server-side scope filter in addition to client-provided filters.

For get/delete/renew/endpoint:
Resolve sandbox resource and evaluate scope before action.

#### 4. Mutation Audit Logging (Phase 1)

For mutating actions, log:

- request_id
- principal subject/team/role
- action
- sandbox_id
- outcome (success/error code)
- timestamp

This extends existing request-id logging without DB dependency.

### Console Application Design

Standalone React app living under `console/`. Pages map directly to the MVP scope from [#348](https://github.com/alibaba/OpenSandbox/issues/348):

1. **Sandbox List:** state + metadata filters, pagination.
2. **Sandbox Detail:** status, metadata, image, entrypoint, expiration.
3. **Create Sandbox:** image, entrypoint, timeout, resource limits, env vars, metadata.
4. **Operations:** renew expiration, delete, endpoint retrieval.

The UI should disable buttons the user's role cannot use (e.g., hide "Create" for `read_only`), but the server is always the final authority. The browser only uses the user-auth path; the API key is never shipped in frontend code.

If Console requests are rejected with `401` because trusted headers are missing, the Console should render an explicit "authentication required / auth proxy misconfiguration" state instead of retrying with anonymous assumptions.

### API and Spec Changes

Primary lifecycle endpoints MUST remain unchanged.

Updates to `specs/sandbox-lifecycle.yml`:

1. Document dual auth path (API key + user auth mode).
2. Add `401` responses for unauthenticated user-auth requests (including trusted-header mode with missing required headers).
3. Add `403` responses where role restrictions apply (e.g., create/renew/delete).
4. Clarify reserved metadata keys used for ownership/team scoping.
5. Add error codes for authentication and authorization failures.

### Operational Rollout

1. Phase 1 stays behind a config flag (`auth.mode = "api_key_and_user"`).
2. Deploy console + updated server in a non-prod environment first.
3. Validate role boundaries and scope filtering.
4. Phase 2 switches to OIDC JWT mode and runs PostgreSQL migrations.

## Test Plan

### Unit Tests

1. Auth middleware:
   - API key success/failure unchanged.
   - user principal extraction in enabled mode.
   - dual-mode conflict behavior.
   - trusted-header mode rejects missing required headers with `401`.
2. Authorization logic:
   - Each action against the role permission table.
   - Owner/team scope checks (allow and deny cases).
   - Reserved metadata injection on create.
3. Canonicalization:
   - deterministic label-safe owner/team tokens.

### Integration Tests (Server)

1. Route-level authz:
   - `read_only` can list/get/endpoint; gets 403 on create/renew/delete.
   - `operator` can do all MVP operations.
2. Backward compat:
   - Existing API key clients work exactly as before.
3. Scope filtering:
   - Users only see sandboxes matching their owner/team.
4. Runtime parity:
   - Scoped list/get/delete/renew behaves the same on Docker and Kubernetes.
5. Trusted-header deployment behavior:
   - direct Console-to-server request without proxy-injected headers returns `401`.
   - proxy misconfiguration (one or more missing identity headers) returns `401`.

### Console Tests

1. Page-level API integration tests for list/detail/create/renew/delete/endpoint flows.
2. Role-based UX tests (buttons disabled/hidden for read_only).
3. E2E smoke path from login context to sandbox operations.

### Phase 2 Tests

1. JWT signature and claim validation tests.
2. PostgreSQL RBAC lookup tests.
3. Durable audit write/read tests.

## Drawbacks

1. A second auth path in the server means more code to maintain and more surface to test.
2. Metadata-based scoping (Phase 1) is less flexible than a proper DB-backed policy.
3. Adding a React app introduces a frontend build/release cycle into the repo.
4. Durable audit and richer RBAC are punted to Phase 2.

## Alternatives

### Alternative 1: Keep API-only (no console)

Pros:

- Zero frontend maintenance.
- No auth model changes.

Cons:

- Does not address operator efficiency and onboarding needs.

Decision: Rejected as it does not solve the efficiency and onboarding problems raised in [#348](https://github.com/alibaba/OpenSandbox/issues/348)

### Alternative 2: Implement Full OIDC + DB in One Phase

Pros:

- Strongest model from day one.

Cons:

- Larger scope, slower delivery, higher integration risk.

Decision: Rejected in favor of phased delivery, as mentioned in the issue.

### Alternative 3: Expose the API key to the browser

Would need almost no server changes, but leaks the global API key to every console user and gives up per-user governance entirely. Rejected.

## Infrastructure Needed

Phase 1:

- Node.js (for building/testing the `console/` app).
- Existing OpenSandbox server runtime (Docker or Kubernetes).
- If using trusted-header mode: a reverse proxy (e.g., Nginx, Envoy) that sets the identity headers after authenticating the user.

Phase 2:

- An OIDC provider (e.g., Google, Keycloak, Auth0).
- PostgreSQL instance for RBAC bindings and audit events.
- A schema migration tool (e.g., Alembic).

## Upgrade & Migration Strategy

1. Backward compatibility is preserved by default:
   - `auth.mode = "api_key_only"` keeps existing behavior.
2. User auth path is opt-in through configuration.
3. Existing SDK/automation clients continue using `OPEN-SANDBOX-API-KEY`.
4. Enabling console/user auth does not require lifecycle API contract breaks.
5. Phase 2 DB migrations are additive:
   - static config role mapping can remain as fallback during cutover.


================================================
FILE: oseps/0007-fast-sandbox-runtime-support.md
================================================
---
title: Fast Sandbox Runtime Support
authors:
  - "@fengcone"
creation-date: 2026-02-08
last-updated: 2026-02-08
status: provisional
---
# OSEP-0007: Fast Sandbox Runtime Support

<!-- toc -->

- [Summary](#summary)
- [Motivation](#motivation)
  - [Why Fast-Sandbox is Fast](#why-fast-sandbox-is-fast)
  - [Goals](#goals)
  - [Non-Goals](#non-goals)
- [Requirements](#requirements)
- [Proposal](#proposal)
  - [Notes/Constraints/Caveats](#notesconstraintscaveats)
  - [Risks and Mitigations](#risks-and-mitigations)
- [Design Details](#design-details)
  - [How Fast-Sandbox Achieves Millisecond-Scale Latency](#how-fast-sandbox-achieves-millisecond-scale-latency)
  - [Kubernetes Ecosystem Integration](#kubernetes-ecosystem-integration)
- [Test Plan](#test-plan)
- [Drawbacks](#drawbacks)
- [Alternatives](#alternatives)
- [Infrastructure Needed](#infrastructure-needed)
- [Upgrade & Migration Strategy](#upgrade--migration-strategy)

<!-- /toc -->

## Summary

Add first-class support for [fast-sandbox](https://github.com/fengcone/fast-sandbox) as a high-performance runtime backend for OpenSandbox. By leveraging fast-sandbox's gRPC Fast-Path API and pre-warmed Agent pools, OpenSandbox can achieve **millisecond-scale cold start latency** (compared to ~1 second with OpenSandbox's BatchSandbox pool, or 2-5 seconds with standard K8s runtime) for AI Agents, Serverless functions, and other latency-sensitive workloads while maintaining the existing SDK and API contract.

**Performance Characteristics** (with cached images on Agent nodes):

- **Fast Mode**: <50ms (container-first, async CRD)
- **Strong Mode**: ~50-100ms base + K8s API write latency (typically 20-50ms via etcd)

> **Note**: The millisecond-scale latency assumes the container image is already cached on the Agent's host node. Cold starts with uncached images incur additional image pull time.

## Motivation

OpenSandbox currently supports Docker and Kubernetes runtimes. While the Kubernetes runtime provides scalability, sandbox creation typically takes 2-5 seconds due to:

- K8s scheduler latency (~100-500ms)
- etcd write and watch propagation (~50-200ms)
- Kubelet pod creation and container runtime startup (~1-3s)
- Image pull when cache miss occurs (~1-10s)

### OpenSandbox's Existing Pool Optimization

OpenSandbox's Kubernetes runtime already supports a **pool-based optimization** via the `poolRef` field in BatchSandbox CRD. When `poolRef` is specified:

```yaml
apiVersion: sandbox.opensandbox.io/v1alpha1
kind: BatchSandbox
metadata:
  name: my-sandbox
spec:
  poolRef: my-pool              # Reference to pre-warmed pool
  taskTemplate:
    spec:
      process:
        command: ["python", "app.py"]
```

**How it works**:

- Users create a pool of pre-provisioned pods (managed by BatchSandbox controller)
- When creating a sandbox, OpenSandbox assigns a task from the pool
- Only `entrypoint` and `env` are customizable; image and resources are pre-defined
- Controller and OpenSandbox Server watch K8s API for state changes

**Performance with pool** (measured):

- Approximately **1 second** latency for pool-based allocation
- Eliminates scheduler wait and pod startup time
- Still requires K8s API write + watch propagation overhead
- Image must be pre-pulled in pool pods

This is an effective optimization for many use cases. However, fast-sandbox aims to push latency even lower through additional innovations described below.

For AI Agent and Serverless scenarios that require rapid sandbox provisioning, reducing even the K8s API overhead is valuable.

### Why Fast-Sandbox is Fast

fast-sandbox achieves millisecond-scale cold start through three key design innovations:

**Comparison: OpenSandbox Pool vs fast-sandbox**


| Aspect                          | OpenSandbox BatchSandbox Pool                      | fast-sandbox                                |
| ------------------------------- |----------------------------------------------------|---------------------------------------------|
| **Allocation mechanism**        | K8s API write → Controller watch → Task assignment | gRPC → in-memory Registry → Agent HTTP      |
| **Latency (with cached image)** | ~1 second (measured)                               | <50ms Fast, ~50 + API write (Strong)        |
| **Scheduling**                  | K8s Scheduler places pool pods (one-time)          | In-memory Registry with image affinity      |
| **Image awareness**             | Pool pods have fixed image                         | Registry scores by image cache availability |
| **Customization**               | entrypoint, env only                               | entrypoint, env, image, ports per request   |
| **Container creation**          | pre-warmed                                         | Direct containerd socket                    |
| **Consistency**                 | Strong (K8s etcd)                                  | Fast (eventual) or Strong (K8s etcd)        |
| **Failure recovery**            | K8s Controller reconciliation                      | Node Janitor + AutoRecreate policy          |

Both approaches use pre-provisioned resource pools to eliminate cold start overhead. fast-sandbox's key advantage is bypassing the K8s API path for container creation while maintaining visibility through async CRD writes.

#### 1. Direct API Allocation, Bypassing K8s Control Plane

Traditional K8s sandbox creation follows the slow path:

```
Client → K8s API Server → etcd → Scheduler → etcd → Kubelet → Container Runtime
 (~2-5 seconds total)
```

fast-sandbox uses a gRPC Fast-Path API that bypasses the K8s control plane:

**Fast Mode** (image cached on Agent node):

```
Client → gRPC Fast-Path → Registry (in-memory) → Agent HTTP → Containerd (<50ms)
```

**Strong Mode** (image cached on Agent node):

```
Client → gRPC Fast-Path → K8s API → etcd → Registry (in-memory) → Agent HTTP → Containerd
       ( <50ms base + 20-50ms API write)
```

**With uncached image** (both modes): Additional image pull time applies.

The Controller maintains an **in-memory Registry** for scheduling, eliminating:

- etcd write/read latency
- scheduler queue wait time
- watch propagation delays

This is similar to how "burst" instances work in cloud providers - resources are pre-provisioned and allocation happens at memory speed.

#### 2. In-Memory Scheduling with Image Affinity

fast-sandbox's Registry implements a smart scheduling algorithm:

```
score = allocated_count + (image_not_cached ? 1000 : 0)
```

Key characteristics:

- **In-memory allocation**: No disk I/O, no database queries (~1ms for 100 agents)
- **Image affinity scoring**: Prioritizes agents with cached images
- **Atomic slot management**: Avoids port conflicts through pre-reserved slots
- **Zero image pull latency**: When image is cached (common case), container starts immediately

This is fundamentally different from K8s scheduler which:

- Runs as a separate process with IPC overhead
- Doesn't track image cache state
- Schedules pods without considering image availability

#### 3. Kubernetes Ecosystem Reuse with Direct Containerd Access

fast-sandbox achieves speed while maintaining K8s compatibility:


| Aspect                     | fast-sandbox Approach                                          | K8s Benefit                               |
| -------------------------- | -------------------------------------------------------------- | ----------------------------------------- |
| **Resource Accounting**    | Agent Pods tracked in K8s                                      | Resource visibility via`kubectl get pods` |
| **Scheduling Constraints** | Node selectors, taints, tolerations via K8s                    | K8s scheduler places Agent Pods optimally |
| **Container Creation**     | Direct containerd socket access (bypasses kubelet)             | <10ms container creation vs ~500ms        |
| **Security Containers**    | Supports gVisor/Kata Containers via containerd runtime handler | Same workflow, different runtime class    |
| **Network Namespace**      | Reuses Agent Pod's network namespace                           | K8s CNI plugins work transparently        |

The key insight: **use K8s for what it's good at** (resource accounting, cluster management, scheduling constraints), but **bypass K8s for the hot path** (container creation).

### Goals

- Support creating, querying, and terminating sandboxes backed by fast-sandbox via the OpenSandbox server API
- Preserve existing OpenSandbox SDK and API behavior - no breaking changes
- Enable sub-100ms sandbox creation latency (strong consistency mode, with cached image) or sub-50ms (fast mode, with cached image)
- Support both Fast (ultra-low latency, eventual consistency) and Strong (guaranteed consistency) modes
- Provide flexible deployment: users can bring their own fast-sandbox or use OpenSandbox-provided charts

### Non-Goals

- Replacing or removing existing Docker or Kubernetes runtimes
- Implementing a full Kubernetes operator for fast-sandbox (it has its own controller)
- Changing the OpenSandbox sandbox lifecycle API or SDKs in a breaking way
- Direct management of fast-sandbox Agent Pods (handled by fast-sandbox controller)

## Requirements

- Must use the existing OpenSandbox lifecycle API and SDKs without breaking changes
- Must support OpenSandbox's execd-based command execution and file operations
- Must integrate with OpenSandbox's ingress component for routing
- Must support the standard OpenSandbox configuration model
- Must handle status mapping between fast-sandbox and OpenSandbox states

## Proposal

Introduce a `fast-sandbox` workload provider implementation that communicates directly with the fast-sandbox Controller via gRPC Fast-Path API. The provider is exposed as a new option under the Kubernetes runtime (`kubernetes.workload_provider = "fast-sandbox"`).

**Architecture Overview**:

```
+-------------------------------------------------------------------------+
|                        OpenSandbox Control Plane                        |
+-------------------------------------------------------------------------+
|                                                                         |
|   +--------------+    gRPC Fast-Path (9090)    +---------------------+  |
|   | OpenSandbox  | ------------------------>   |  fast-sandbox       |  |
|   |   Server     | <-------------------------  |  Controller         |  |
|   |              |    endpoints (IP:Port)      |                     |  |
|   +------+-------+                             +-------+-------------+  |
|          |                                             |                |
|          | SDK                                         | Registry       |
|          |                                             | (in-memory)    |
|          v                                             v                |
|   +--------------+    HTTP (5758)             +----------------------+  |
|   | OpenSandbox  | ---------------------->    |  Agent Pods          |  |
|   |  SDK         |    execd (44772)           |  (K8s Managed)       |  |
|   +--------------+                            +----------+-----------+  |
|                                                        |                |
|                                                        | containerd     |
|                                                        v                |
|                                                 +----------------+      |
|                                                 | User Container |      |
|                                                 | with execd     |      |
|                                                 +----------------+      |
|                                                                         |
+-------------------------------------------------------------------------+
                                ^
                                | K8s API Server (for Agent Pod mgmt only)
                                |
+-------------------------------------------------------------------------+
|                    Kubernetes Control Plane (async path)                |
|  - Agent Pod lifecycle (create/monitor/delete)                          |
|  - Resource accounting (CPU/memory requests visible in kubectl)         |
|  - Scheduling constraints (node selectors, taints, tolerations)         |
+-------------------------------------------------------------------------+
```

**Data Flow Comparison** (assuming cached image):

```
Standard K8s Runtime:
OpenSandbox Server → K8s API → etcd → Scheduler → etcd → Kubelet → containerd
      (2-5 seconds)

Fast-Sandbox Runtime (Fast Mode):
OpenSandbox Server → gRPC Fast-Path → Registry → Agent HTTP → containerd
      (<50ms, async CRD)

Fast-Sandbox Runtime (Strong Mode):
OpenSandbox Server → gRPC Fast-Path → K8s API → etcd → Watch → Agent → containerd
      (~50-100ms base + 20-50ms API write)
```

### Notes/Constraints/Caveats

- The fast-sandbox Controller and Agent Pods must be deployed separately (either by the user or via OpenSandbox-provided Helm charts)
- fast-sandbox uses its own CRD types (`Sandbox`, `SandboxPool`) for resource pool management - OpenSandbox does not manipulate these directly
- gRPC communication requires network reachability from OpenSandbox Server to fast-sandbox Controller
- The execd binary must be present in sandbox containers (typically via image or init container)

### Risks and Mitigations


| Risk                                                                     | Mitigation                                                                                                                         |
| ------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------- |
| fast-sandbox Controller becomes a single point of failure                | fast-sandbox Controller is designed for high availability; OpenSandbox can implement retries with fallback to standard K8s runtime |
| gRPC API changes in fast-sandbox could break integration                 | Version pinning in deployment; compatibility matrix documentation                                                                  |
| Network partition between OpenSandbox Server and fast-sandbox Controller | Configurable timeouts; health check endpoint integration                                                                           |
| State drift if sandboxes are managed outside OpenSandbox                 | OpenSandbox tracks sandbox IDs; periodic state reconciliation via gRPC GetSandbox                                                  |
| Fast mode orphaned containers                                            | fast-sandbox's Node Janitor DaemonSet cleans up orphaned resources                                                                 |

## Design Details

### How Fast-Sandbox Achieves Millisecond-Scale Latency

The fast-sandbox architecture is built around three performance-critical design choices:

#### 1. Bypassing K8s Control Plane for Hot Path

```
┌──────────────────────────────────────────────────────────────────────────┐
│                    Fast Mode Creation Flow (image cached)                │
├──────────────────────────────────────────────────────────────────────────┤
│                                                                          |
│  Prerequisite: Image is cached on Agent's host node (via containerd)     │
│                                                                          |
│  1. OpenSandbox Server → gRPC CreateSandbox request                      │
│     └─────────────────────────────────────────────────> ~1ms             │
│                                                                          |
│  2. Registry.Allocate() - in-memory scheduling                           │
│     └─────────────────────────────────────────────────> ~1ms             │
│     • Filter by pool, namespace, capacity, port conflicts                │
│     • Score by: allocated + (no_image_cache ? 1000 : 0)                  │
│     • Atomic mutex-based allocation                                      │
│                                                                          |
│  3. Controller → Agent HTTP POST /api/v1/agent/create                    │
│     └─────────────────────────────────────────────────> ~10-30ms         │
│                                                                          |
│  4. Agent → containerd.Create() with cached image                        │
│     └─────────────────────────────────────────────────> ~5-10ms          │
│     • Direct socket access to host containerd                            │
│     • No image pull (cached)                                             │
│     • Reuse Agent Pod's network namespace                                │
│                                                                          |
│  5. Controller returns response with endpoints                           │
│     <───────────────────────────────────────────────── ~1ms              │
│                                                                          |
│  Total: <50ms (end-to-end, with cached image)                            │
│                                                                          |
│  (Async: Controller creates K8s CRD for reconciliation/audit trail)      │
│                                                                          |
│  If image is NOT cached: Image pull time is added to step 4              │
└──────────────────────────────────────────────────────────────────────────┘
```

Compare to standard K8s:

```
1. API Server write to etcd              ~20ms
2. Scheduler watch and decision          ~100-500ms
3. Scheduler write to etcd               ~20ms
4. Kubelet watch and pod creation        ~50-200ms
5. Container runtime start               ~500ms-3s
6. Image pull (if cache miss)            ~1-10s
Total: 2-5s (best case, cache hit)
```

#### 2. Registry Scheduling Algorithm

```go
// From fast-sandbox internal/controller/agentpool/registry.go (simplified)

func Allocate(sandbox *Sandbox) (*AgentInfo, error) {
    bestSlot := nil
    minScore := 1000000

    for _, agent := range registry.agents {
        // Skip if pool/namespace mismatch or at capacity
        if agent.PoolName != sandbox.PoolRef ||
           agent.Namespace != sandbox.Namespace ||
           agent.Allocated >= agent.Capacity {
            continue
        }
        // Check port conflicts 
		if contains(agent.UsedPorts, sandbox.ExposedPorts) {
			continue
        }
        // Score: prefer lower allocation + cached image
        score := agent.Allocated
        if !contains(agent.Images, sandbox.Image) {
            score += 1000  // Heavy penalty for uncached image
        }

        if score < minScore {
            minScore = score
            bestSlot = agent
        }
    }

    return bestSlot, nil
}
```

**Performance characteristics** (from fast-sandbox benchmarks):

- 100 Agents: ~1.3ms allocation time
- 1000 Agents: ~14ms allocation time

#### 3. Direct Containerd Integration

Agent Pods run with privileged access to host containerd socket:

```go
// From fast-sandbox internal/agent/runtime/containerd_runtime.go

client, _ := containerd.New("/run/containerd/containerd.sock",
    containerd.WithDefaultNamespace("k8s.io"))

// Direct container creation - bypasses kubelet entirely
container, _ := client.NewContainer(
    ctx,
    sandboxID,
    containerd.WithImage(image),           // Already cached
    containerd.WithNewSnapshot(...),       // Instant with cache
    containerd.WithRuntime("runc", nil),   // Or "io.containerd.runsc.v2" for gVisor
)

task, _ := container.NewTask(ctx, cio.NewCreator(...))
task.Start(ctx)
```

This approach:

- Eliminates kubelet API overhead (~50-200ms)
- Enables image cache reuse (Agent Pod shares node's containerd image store)
- Supports alternative runtimes (gVisor, Kata Containers) via runtime handler

### Kubernetes Ecosystem Integration

Despite bypassing the K8s control plane for the hot path, fast-sandbox maintains full compatibility:

#### Resource Accounting via K8s Pods

Agent Pods are normal K8s Pods:

```yaml
apiVersion: v1
kind: Pod
metadata:
  name: fast-sandbox-agent-node-1
  labels:
    app: fast-sandbox-agent
    pool-ref: default-pool
spec:
  containers:
  - name: agent
    image: fast-sandbox/agent:latest
    resources:
      requests:
        cpu: "2000m"
        memory: "4Gi"
      limits:
        cpu: "4000m"
        memory: "8Gi"
    volumeMounts:
    - name: containerd-socket
      mountPath: /run/containerd/containerd.sock
  volumes:
  - name: containerd-socket
    hostPath:
      path: /run/containerd/containerd.sock
```

These Pods are visible in `kubectl get pods` and count against:

- Node resource allocation (visible to cluster autoscaler)
- Resource quotas (namespace limits enforced)
- Scheduler decisions (node affinity, taints, tolerations)

#### CRD for Reconciliation and Auditing

fast-sandbox defines two CRDs:

```yaml
# SandboxPool - manages Agent Pod lifecycle
apiVersion: sandbox.fast.io/v1alpha1
kind: SandboxPool
metadata:
  name: default-pool
  namespace: default
spec:
  capacity:
    poolMin: 2
    poolMax: 10
    bufferMin: 1
    bufferMax: 3
  maxSandboxesPerPod: 5
  runtimeType: container           # or "gvisor" for secure containers
  agentTemplate:
    spec:
      containers:
      - name: agent
        image: fast-sandbox/agent:latest
        imagePullPolicy: IfNotPresent
        env:
        - name: AGENT_CAPACITY
          value: "5"
        volumeMounts:
        - name: containerd-socket
          mountPath: /run/containerd/containerd.sock
      volumes:
      - name: containerd-socket
        hostPath:
          path: /run/containerd/containerd.sock

---
# Sandbox - audit trail for sandbox creation
apiVersion: sandbox.fast.io/v1alpha1
kind: Sandbox
metadata:
  name: my-sandbox
  namespace: default
  labels:
    sandbox.fast.io/created-by: fastpath-fast  # or fastpath-strong
spec:
  image: python:3.11
  poolRef: default-pool
  command: ["python", "-m", "http.server", "8000"]
  exposedPorts: [8000]
  failurePolicy: AutoRecreate         # or "Manual"
  recoveryTimeoutSeconds: 60
status:
  phase: Running
  sandboxID: abc123...               # Actual container ID
  assignedPod: fast-sandbox-agent-node-1
  nodeName: node-1
  endpoints:
  - "10.244.1.5:8000"
```

These CRDs serve as:

- **Audit trail**: Reconciliation between gRPC state and K8s
- **Self-healing**: Controller can detect and clean up orphaned sandboxes
- **Observability**: Standard K8s tools (kubectl, metrics-server) work

#### Security Container Support

fast-sandbox supports gVisor/Kata Containers via containerd runtime handlers:

```go
// Fast mode: runc (default)
containerd.WithRuntime("runc", nil)

// Secure mode: gVisor
containerd.WithRuntime("io.containerd.runsc.v2", nil)

// VM mode: Kata Containers
containerd.WithRuntime("io.containerd.kata.v2", nil)
```

This allows OpenSandbox to offer different security isolation levels without changing the integration layer.

#### Node Janitor: Orphan Container Cleanup

Fast mode creates containers before writing CRD, which can result in orphaned containers if:
- Agent Pod is unexpectedly deleted (crash, node drain, eviction)
- CRD write fails after container creation
- Network partition prevents CRD reconciliation

To handle these cases, fast-sandbox provides a **Node Janitor DaemonSet** that runs on each node

**How Janitor detects orphans:**

| Orphan Type | Detection Method | Cleanup Trigger |
|-------------|-------------------|-----------------|
| Agent Pod disappeared | Pod UID not found in K8s API | Immediate (after timeout) |
| Sandbox CRD deleted | CRD not found by sandbox name | Immediate (after timeout) |
| UID mismatch (recreated CRD) | Container label ≠ CRD UID | Immediate (after timeout) |
| Fast mode timeout | Container created > 10s ago without CRD | After orphan timeout |

**Janitor scan process:**

1. List all containers with label `fast-sandbox.io/managed=true` via containerd
2. For each container, check:
   - Agent Pod exists (via K8s API)
   - Sandbox CRD exists with matching UID
   - Container age > orphan timeout (default 10s for Fast mode)
3. If orphan detected: enqueue cleanup task
4. Cleanup process:
   - SIGKILL the task
   - Delete task from containerd
   - Delete container with snapshot cleanup
   - Remove FIFO files from `/run/containerd/fifo/`

**Configuration parameters:**

| Parameter | Default | Description |
|-----------|---------|-------------|
| `--scan-interval` | 2m | Full container scan interval |
| `--orphan-timeout` | 10s | Wait before treating Fast-mode container as orphan |
| `NODE_NAME` | (required) | Node this janitor pod runs on |

**Why the timeout?** Fast mode creates containers before CRD writes. The 10-second (configurable) timeout allows time for the async CRD write to complete, preventing false positives during normal operation.

### Configuration Extension

Add `FastSandboxRuntimeConfig` to `server/src/config.py`:

```python
class FastSandboxRuntimeConfig(BaseModel):
    """fast-sandbox runtime configuration."""

    controller_endpoint: str = Field(
        default="localhost:9090",
        description="fast-sandbox Controller gRPC endpoint.",
    )
    default_pool_ref: str = Field(
        default="default-pool",
        description="Default SandboxPool name for sandbox allocation.",
    )
    default_consistency_mode: Literal["fast", "strong"] = Field(
        default="strong",
        description=(
            "Default consistency mode. 'fast' = sub-50ms with cached image, eventual consistency. "
            "'strong' = ~50-100ms base + K8s API write latency (typically 20-50ms), guaranteed consistency."
        ),
    )
    execd_port: int = Field(
        default=44772,
        description="execd port in sandbox containers.",
    )
```

Update `AppConfig` to include the new config block and validation logic.

### TOML Configuration Example

```toml
[server]
host = "0.0.0.0"
port = 8080
api_key = "your-secret-key"

[runtime]
type = "kubernetes"
execd_image = "opensandbox/execd:v1.0.7"

[kubernetes]
namespace = "default"
workload_provider = "fast-sandbox"

[fast_sandbox]
controller_endpoint = "fast-sandbox-controller.opensandbox.svc:9090"
default_pool_ref = "default-pool"
default_consistency_mode = "strong"  # "fast" = sub-50ms (cached), "strong" = ~50-100ms + API write
execd_port = 44772
```

### New Code Structure

```
server/src/services/k8s/
├── fastsandbox_provider.py      # New: FastSandboxProvider WorkloadProvider implementation
├── fastsandbox_client.py        # New: gRPC client wrapper for fast-sandbox Controller
├── provider_factory.py          # Modified: Register "fast-sandbox" provider
└── ...
```

### API Mapping


| OpenSandbox API                         | fast-sandbox gRPC  | Description                       |
| --------------------------------------- | ------------------ | --------------------------------- |
| `POST /sandboxes`                       | `CreateSandbox`    | Create sandbox, returns endpoints |
| `GET /sandboxes/{id}`                   | `GetSandbox`       | Query sandbox status              |
| `DELETE /sandboxes/{id}`                | `DeleteSandbox`    | Delete sandbox                    |
| `POST /sandboxes/{id}/renew-expiration` | `UpdateSandbox`    | Update expiration time            |
| `GET /sandboxes/{id}/endpoints/{port}`  | (local resolution) | Resolve from CreateResponse       |

### Request Parameter Mapping

```python
# OpenSandbox CreateSandboxRequest → fast-sandbox CreateRequest
{
    "image": {"uri": "python:3.11"},              # → image
    "entrypoint": ["python", "-m", "http.server"], # → command
    "env": {"PYTHONUNBUFFERED": "1"},             # → envs
    "resourceLimits": {"cpu": "500m"},            # → (Agent pool capacity)
    "timeout": 3600,                             # → expireTimeSeconds
    "extensions": {
        "pool_ref": "default-pool",              # → poolRef
        "consistency_mode": "strong",            # → consistencyMode (override)
        "failure_policy": "auto_recreate"        # → failurePolicy
    }
}
```

### Status Mapping


| fast-sandbox Phase | OpenSandbox State |
| ------------------ | ----------------- |
| Running            | Running           |
| Pending / Creating | Pending           |
| Failed / Lost      | Failed            |
| (deleted)          | Terminated        |

### Extensions Field Support

The `extensions` field in `CreateSandboxRequest` supports fast-sandbox specific options:


| Extension Key      | Type                       | Description                                 |
| ------------------ | -------------------------- | ------------------------------------------- |
| `pool_ref`         | string                     | Target SandboxPool name (overrides default) |
| `consistency_mode` | "fast"\| "strong"          | Consistency mode (overrides default)        |
| `failure_policy`   | "manual"\| "auto_recreate" | Failure recovery policy                     |

## Test Plan

- **Unit Tests**: FastSandboxClient gRPC wrapper, request/response mapping, status translation
- **Integration Tests**: Deploy fast-sandbox in Kind cluster, test create/get/delete/renew flows
- **E2E Tests**: Full OpenSandbox SDK flow using fast-sandbox runtime
- **Performance Tests**: Measure sandbox creation latency vs standard K8s runtime

### Test Scenarios

1. Basic lifecycle: create → status query → delete
2. Expiration renewal
3. Fast vs Strong consistency modes
4. Pool selection via extensions
5. Image affinity: second sandbox on same node (should be faster)
6. Failure: controller unavailable, invalid pool ref
7. execd connectivity after sandbox creation
8. Concurrent sandbox creation (stress test)

### Performance Benchmarks

Target metrics (to be verified in tests):


| Scenario                              | Target Latency         | Notes                                           |
| ------------------------------------- | ---------------------- | ----------------------------------------------- |
| OpenSandbox BatchSandbox Pool         | ~1 second              | Measured with K8s API + watch overhead          |
| Cold start, image cached, Fast mode   | <50ms                  | Container-first, async CRD                      |
| Cold start, image cached, Strong mode | ~50-100ms + API write  | CRD-first, ~20-50ms additional for K8s API/etcd |
| Cold start, image NOT cached          | Base + image pull time | Image pull depends on size and network          |
| Warm start (reuse same Agent)         | <30ms                  | Agent already allocated                         |
| Registry allocation (100 Agents)      | ~1.3ms                 | In-memory scheduling                            |
| Registry allocation (1000 Agents)     | ~14ms                  | In-memory scheduling                            |

> **Important**: The millisecond-scale latencies above assume the container image is already cached on the Agent's host node. In production, pre-pulling images or using a common set of base images is recommended for consistent performance.

## Drawbacks

- **Added Dependency**: Requires deploying and managing fast-sandbox Controller and Agent Pods and Janitor DaemonSet
- **Operational Complexity**: Teams need to understand both OpenSandbox and fast-sandbox concepts
- **gRPC Protocol**: Introduces gRPC dependency (vs pure HTTP/REST for K8s API)
- **Limited Ecosystem**: fast-sandbox is a newer project with smaller community than vanilla K8s
- **Fast Mode Orphans**: Fast consistency mode can create orphaned containers if CRD write fails (mitigated by Node Janitor)

## Alternatives

1. **Continue with standard K8s runtime only**: Rejected due to 2-5s cold start latency
2. **Use only fast-sandbox CRD path (via K8s API)**: Rejected because it loses the Fast-Path gRPC performance benefit
3. **Build OpenSandbox-native fast-path**: Rejected due to reinventing complex scheduling and container management logic
4. **External adapter service**: Rejected due to additional operational components

## Infrastructure Needed

- **CI/CD**: Kind cluster with fast-sandbox installed for integration tests
- **Documentation**: Deployment guide for fast-sandbox + OpenSandbox integration
- **Helm Charts** (optional): Unified charts deploying OpenSandbox Server + fast-sandbox components

## Upgrade & Migration Strategy

- **Backwards Compatible**: Default runtime unchanged; opt-in via configuration
- **No Migration**: Existing Docker/K8s runtime users unaffected
- **Enable by Config**: Simply set `kubernetes.workload_provider = "fast-sandbox"` and add `[fast_sandbox]` block
- **Rollback**: Switch back to `kubernetes` or `docker` runtime type with no data loss


================================================
FILE: oseps/0008-pause-resume-rootfs-snapshot.md
================================================
---
title: Pause and Resume via Rootfs Snapshot
authors:
  - "@fengcone"
creation-date: 2026-03-11
last-updated: 2026-03-13
status: implementing
---

# OSEP-0008: Pause and Resume via Rootfs Snapshot

<!-- toc -->
- [Summary](#summary)
- [Motivation](#motivation)
  - [Goals](#goals)
  - [Non-Goals](#non-goals)
- [Requirements](#requirements)
- [Proposal](#proposal)
  - [API Overview](#api-overview)
  - [Kubernetes Resource Overview](#kubernetes-resource-overview)
  - [Component Interaction Overview](#component-interaction-overview)
  - [Notes/Constraints/Caveats](#notesconstraintscaveats)
  - [Risks and Mitigations](#risks-and-mitigations)
- [Design Details](#design-details)
- [Test Plan](#test-plan)
- [Drawbacks](#drawbacks)
- [Alternatives](#alternatives)
- [Infrastructure Needed](#infrastructure-needed)
- [Upgrade & Migration Strategy](#upgrade--migration-strategy)
<!-- /toc -->

## Summary

This proposal introduces pause and resume semantics for Kubernetes-backed
sandboxes by persisting the sandbox root filesystem as an OCI image. On pause,
the server creates a `SandboxSnapshot` CR for the running sandbox Pod, a
dedicated controller creates a commit Job on the same node, and the rootfs is
committed and pushed to a registry. After the snapshot becomes ready, the
original `BatchSandbox` is removed so compute resources are released.

Resume is intentionally simpler. The server resolves the single retained
snapshot for the stable `sandboxId`, then creates a new `BatchSandbox` with
`replicas = 1` from the snapshot image. The public `sandboxId` remains stable
across pause and resume.

```text
Time ------------------------------------------------------------------------>

Sandbox lifecycle:   [Running]--[Pausing]--[Paused]--[Resuming]--[Running]
                         |                     |
                  commit + push         create new BatchSandbox
                  delete old BatchSandbox from snapshot image
```

## Motivation

OpenSandbox users often need to temporarily stop a sandbox without losing the
filesystem state that has accumulated during a long-running task. Typical cases
include releasing cluster resources overnight, pausing an agent before a risky
step, or resuming a workspace later from the same working directory.

Today, Kubernetes runtime returns `HTTP 501 Not Implemented` for both `pause`
and `resume`. Docker supports cgroup freeze, but that does not survive restart
or migration. Rootfs snapshot is the practical middle ground in the persistence
roadmap:

- Phase 1: persistent volumes preserve explicit mounts.
- Phase 2: rootfs snapshot preserves the container filesystem.
- Phase 3: VM or process checkpoint preserves memory and execution state.

This OSEP deliberately chooses a simple architecture:

- keep `BatchSandbox` as the runtime workload resource used by the server today
- add a single `SandboxSnapshot` CR per `sandboxId`
- do not introduce a new per-instance lifecycle CR
- do not support multiple retained snapshots in v1

### Goals

- Implement `pause` for Kubernetes sandboxes by committing a running sandbox Pod
  rootfs into an OCI image and pushing it to a configurable registry.
- Keep the public `sandboxId` stable across pause and resume.
- Release compute resources after pause by deleting the original
  `BatchSandbox`.
- Implement `resume` by creating a new `BatchSandbox` with `replicas = 1` from
  the retained snapshot image.
- Expose `Pausing`, `Paused`, and `Resuming` through the existing Lifecycle API.
- Keep the design minimal by retaining only one snapshot per sandbox.

### Non-Goals

- Preserving in-memory process state, open sockets, or CPU registers.
- Supporting multiple historical snapshots per sandbox.
- Adding `GET /sandboxes/{sandboxId}/snapshots` in v1.
- Designing a general multi-instance pause model for `BatchSandbox` with
  `replicas > 1`.
- Extending Docker runtime to rootfs snapshot.
- Implementing automatic scheduled snapshots.

## Requirements

- Public `sandboxId` must remain unchanged after pause and resume.
- A sandbox has at most one retained snapshot in v1.
- Pause must work from the currently running sandbox Pod and record the concrete
  `podName`, `containerName`, and `nodeName` that are being snapshotted.
- The commit Job must run on the same node as the source Pod.
- Pause must complete `commit -> push` before the original `BatchSandbox` is
  deleted.
- At most one pause operation may be in progress for a given `sandboxId`.
- Resume must work when the original `BatchSandbox` no longer exists.
- `GET /sandboxes/{sandboxId}` must still return `200` and state `Paused` while
  the sandbox is represented only by a `SandboxSnapshot`.
- `DELETE /sandboxes/{sandboxId}` must delete both the live workload and any
  retained `SandboxSnapshot` for that sandbox.
- Registry credentials must be referenced via Kubernetes Secret, not inline API
  credentials.
- `SandboxSnapshot` must carry enough policy and workload reconstruction data to
  resume even after the original `BatchSandbox` has been deleted.
- The API shape must leave room for future snapshot backends, especially VM
  snapshot, even though this revision only implements rootfs snapshot.
- The design must remain compatible with the current server behavior where
  Kubernetes sandboxes are created as `BatchSandbox` with `replicas = 1`.

## Proposal

Pause and resume are modeled around two resources:

- `BatchSandbox`: runtime workload resource used for the live sandbox
- `SandboxSnapshot`: persisted snapshot state for one stable `sandboxId`

The public API stays sandbox-oriented, and the server remains the orchestrator.
The snapshot controller only handles snapshot execution.

### API Overview

```text
POST /sandboxes/{sandboxId}/pause   -> create or update SandboxSnapshot, return 202
POST /sandboxes/{sandboxId}/resume  -> create new BatchSandbox from snapshot, return 202
GET  /sandboxes/{sandboxId}         -> returns Running / Pausing / Paused / Resuming
```

There is no `GET /sandboxes/{sandboxId}/snapshots` endpoint in this version
because each sandbox retains only one snapshot.

### Kubernetes Resource Overview

```text
BatchSandbox (existing)
  |- used by Server as the live workload resource
  |- created with replicas = 1 for public sandbox lifecycle API
  `- deleted after pause succeeds

SandboxSnapshot (new, one per sandboxId)
  |- metadata.name = <sandboxId>
  |- spec.sandboxId
  |- spec.policy.type              # Rootfs today, reserved for VMSnapshot later
  |- spec.sourceBatchSandboxName
  |- spec.sourcePodName
  |- spec.sourceContainerName
  |- spec.sourceNodeName
  |- spec.imageUri
  |- spec.snapshotPushSecretName
  |- spec.resumeImagePullSecretName
  |- spec.resumeTemplate
  |- status.phase                 # Pending | Committing | Pushing | Ready | Failed
  |- status.readyAt
  `- status.message
```

The `SandboxSnapshot` name is deterministic and equal to `sandboxId`, which
enforces the “one sandbox, one snapshot” rule.

### Component Interaction Overview

Pause flow:

```mermaid
sequenceDiagram
    participant Client
    participant Server
    participant Batch as BatchSandbox
    participant Snapshot as SandboxSnapshot
    participant Ctrl as SandboxSnapshotController
    participant Job as Commit Job Pod
    participant Registry

    Client->>Server: POST /sandboxes/{id}/pause
    Server->>Batch: Read live BatchSandbox and Pod info
    Server->>Snapshot: Create/Update SandboxSnapshot\n(sandboxId, podName, containerName, nodeName, imageUri, pushSecretRef, resumePullSecretRef)
    Server-->>Client: 202 Accepted
    Ctrl->>Job: Create same-node commit Job Pod
    Job->>Registry: Push snapshot image
    Job-->>Ctrl: Commit/push succeeded
    Ctrl->>Snapshot: status.phase = Ready
    Server->>Batch: Delete original BatchSandbox after snapshot Ready
```

Resume flow:

```mermaid
sequenceDiagram
    participant Client
    participant Server
    participant Snapshot as SandboxSnapshot
    participant Batch as New BatchSandbox
    participant Ctrl as BatchSandboxController
    participant Pod as Sandbox Pod

    Client->>Server: POST /sandboxes/{id}/resume
    Server->>Snapshot: Lookup SandboxSnapshot by sandboxId
    Server->>Snapshot: Validate snapshot status.phase == Ready
    Server->>Batch: Create new BatchSandbox\n(replicas=1, image=snapshot.imageUri, sandboxId unchanged)
    Server-->>Client: 202 Accepted
    Ctrl->>Pod: Create sandbox Pod
    Pod-->>Ctrl: Pod becomes Running and Ready
    Server->>Server: Aggregate state as Resuming -> Running
```

### Notes/Constraints/Caveats

- `BatchSandbox` still supports broader semantics in the platform, but this
  proposal only targets the current public server path where a sandbox maps to a
  `BatchSandbox` with `replicas = 1`.
- The old `BatchSandbox` is deleted after a successful pause, so the paused
  state exists only in `SandboxSnapshot`.
- The server remains the orchestration owner for pause and resume. The
  snapshot controller is not responsible for creating or deleting
  `BatchSandbox`.
- `SandboxSnapshot.spec.policy.type` is reserved for future snapshot backends.
  This revision only supports `Rootfs`.
- Snapshot image URI should be stable for the single retained snapshot, for
  example `<snapshotRegistry>/<sandboxId>:snapshot`. This v1 design therefore
  assumes a registry/tag policy that allows replacing the retained snapshot
  image for a sandbox.
- Snapshot push authentication and resume-time image pull authentication are
  modeled separately. They may reference the same Kubernetes Secret in some
  deployments, but the design must not assume they are identical.
- Because the original `BatchSandbox` is deleted, resume cannot rely on
  `imageUri` alone. `SandboxSnapshot` must retain enough `resumeTemplate`
  information for the server to reconstruct a new `BatchSandbox`.
- Registries with immutable tags are not compatible with this simplified
  single-snapshot design unless the implementation changes the tag strategy in a
  future revision.
- Resume creates a new `BatchSandbox`; it does not resurrect the previous one.

### Risks and Mitigations

| Risk | Mitigation |
|------|------------|
| Pause succeeds in commit but old workload is deleted too early | Delete the original `BatchSandbox` only after `SandboxSnapshot.status.phase == Ready`. |
| Commit job lands on the wrong node | Store `sourceNodeName` in `SandboxSnapshot.spec` and pin the commit Job Pod to that node. |
| Server cannot represent a paused sandbox once `BatchSandbox` is gone | Use `SandboxSnapshot` as the source of truth for paused state in `GET /sandboxes/{sandboxId}`. |
| Repeated pause requests cause inconsistent state | Allow only one in-flight pause per `sandboxId`; return `409` if snapshot phase is already `Pending`, `Committing`, or `Pushing`. |
| Snapshot image is unavailable on resume | Require `status.phase == Ready` before resume and surface image-pull failures through normal sandbox startup state. |
| Single-snapshot design loses rollback ability | Accept as an intentional simplification for v1; multi-snapshot support is a future extension. |

## Design Details

### 1. Public Lifecycle API changes

This OSEP keeps the public API minimal:

- `CreateSandboxRequest.pausePolicy` is added as an optional field.
- `POST /sandboxes/{sandboxId}/pause`
- `POST /sandboxes/{sandboxId}/resume`
- `GET /sandboxes/{sandboxId}`

There is no snapshots listing API in this version.

Suggested request shape:

```yaml
pausePolicy:
  snapshotType: Rootfs
  snapshotRegistry: registry.example.com/sandbox-snapshots
  snapshotPushSecretName: snapshot-registry-push-secret
  resumeImagePullSecretName: snapshot-registry-pull-secret
```

`pausePolicy.snapshotType` is reserved for future expansion and currently only
supports `Rootfs`. A later revision can add `VMSnapshot` without breaking the
API shape.

### 2. PausePolicy on BatchSandbox

Pause policy remains part of the live sandbox workload definition:

```go
type PausePolicy struct {
    SnapshotType              string `json:"snapshotType,omitempty"` // Rootfs today, VMSnapshot reserved
    SnapshotRegistry          string `json:"snapshotRegistry"`
    SnapshotPushSecretName    string `json:"snapshotPushSecretName,omitempty"`
    ResumeImagePullSecretName string `json:"resumeImagePullSecretName,omitempty"`
}

type BatchSandboxSpec struct {
    // existing fields...
    PausePolicy *PausePolicy `json:"pausePolicy,omitempty"`
}
```

This policy is used by the server when constructing `SandboxSnapshot`.

### 3. SandboxSnapshot CRD

Introduce `SandboxSnapshot` under `sandbox.opensandbox.io/v1alpha1`.

```go
type SandboxSnapshotPhase string

const (
    SandboxSnapshotPhasePending    SandboxSnapshotPhase = "Pending"
    SandboxSnapshotPhaseCommitting SandboxSnapshotPhase = "Committing"
    SandboxSnapshotPhasePushing    SandboxSnapshotPhase = "Pushing"
    SandboxSnapshotPhaseReady      SandboxSnapshotPhase = "Ready"
    SandboxSnapshotPhaseFailed     SandboxSnapshotPhase = "Failed"
)

type SandboxSnapshotSpec struct {
    SandboxID                 string                `json:"sandboxId"`
    Policy                    SnapshotPolicy        `json:"policy"`
    SourceBatchSandboxName    string                `json:"sourceBatchSandboxName"`
    SourcePodName             string                `json:"sourcePodName"`
    SourceContainerName       string                `json:"sourceContainerName"`
    SourceNodeName            string                `json:"sourceNodeName"`
    ImageURI                  string                `json:"imageUri"`
    SnapshotPushSecretName    string                `json:"snapshotPushSecretName,omitempty"`
    ResumeImagePullSecretName string                `json:"resumeImagePullSecretName,omitempty"`
    ResumeTemplate            *runtime.RawExtension `json:"resumeTemplate,omitempty"`
    PausedAt                  metav1.Time           `json:"pausedAt"`
}

type SandboxSnapshotStatus struct {
    Phase     SandboxSnapshotPhase `json:"phase,omitempty"`
    Message   string               `json:"message,omitempty"`
    ReadyAt   *metav1.Time         `json:"readyAt,omitempty"`
    ImageDigest string             `json:"imageDigest,omitempty"`
}

type SnapshotPolicy struct {
    Type string `json:"type"` // Rootfs today, VMSnapshot reserved
}
```

Key rules:

- `metadata.name = sandboxId`
- one namespace contains at most one `SandboxSnapshot` for a given `sandboxId`
- creating a new pause request overwrites the retained snapshot
- `policy.type` must be set to `Rootfs` in this revision
- `SourcePodName`, `SourceContainerName`, and `SourceNodeName` are mandatory
  because the commit workflow is bound to a concrete live container
- `SourceContainerName` identifies the main sandbox workload container whose
  rootfs is being snapshotted; init containers and sidecars are not committed
- `SnapshotPushSecretName` is used only for the in-container registry push
  performed by the commit Job
- `ResumeImagePullSecretName` is used only when reconstructing the resumed
  workload so kubelet can pull the retained snapshot image
- `ResumeTemplate` must preserve enough information to reconstruct a new
  `BatchSandbox` after the original workload has been deleted

### 4. Pause state model

State is derived from resource presence:

- `BatchSandbox` exists and is ready, and no matching pause cleanup is pending
  -> `Running`
- `BatchSandbox` exists and snapshot phase is
  `Pending|Committing|Pushing|Ready`, and the live workload still matches
  `snapshot.spec.sourceBatchSandboxName` -> `Pausing`
- `BatchSandbox` is absent and snapshot phase is `Ready` -> `Paused`
- `BatchSandbox` exists and was created from snapshot but is not ready yet ->
  `Resuming`
- `SandboxSnapshot.status.phase == Failed` and no live replacement workload ->
  `Failed`

This means `GET /sandboxes/{sandboxId}` must consult both `BatchSandbox` and
`SandboxSnapshot`.

### 5. Pause flow

The pause flow is:

```text
1. Client  POST /sandboxes/{sandboxId}/pause
2. Server  Resolve current BatchSandbox and running Pod for sandboxId
3. Server  Validate:
           - workload exists
           - replicas == 1 for this server path
           - pausePolicy is configured
           - no existing snapshot for sandboxId is already in phase
             Pending|Committing|Pushing
4. Server  Create or replace SandboxSnapshot(name=sandboxId) with:
           - policy.type = Rootfs
           - sourceBatchSandboxName
           - sourcePodName
           - sourceContainerName
           - sourceNodeName
           - target imageUri
           - snapshotPushSecretName
           - resumeImagePullSecretName
           - resumeTemplate
           - pausedAt
           - status.phase = Pending
5. Snapshot controller creates a same-node commit Job Pod
6. Job Pod commits container rootfs and pushes image
7. Snapshot controller updates phase:
           Pending -> Committing -> Pushing -> Ready
8. Server-side pause orchestration deletes the original BatchSandbox
9. GET /sandboxes/{sandboxId} now returns Paused from SandboxSnapshot
```

Failure behavior:

- If commit or push fails, `SandboxSnapshot.status.phase = Failed`
- The original `BatchSandbox` is not deleted
- The sandbox remains `Running` or transitions to `Failed` based on the final
  server policy; this OSEP recommends keeping the workload running and exposing
  the snapshot failure in the message
- If another pause is already in progress for the same `sandboxId`, the server
  returns `409 Conflict`

### 6. Commit Job Pod

The snapshot controller creates one short-lived Job Pod:

```yaml
apiVersion: batch/v1
kind: Job
metadata:
  name: sbxsnap-commit-<sandboxId>
spec:
  ttlSecondsAfterFinished: 300
  template:
    spec:
      restartPolicy: Never
      nodeName: <sourceNodeName>
      containers:
        - name: committer
          image: <committerImage>
          command: ["/bin/sh", "-c"]
          args:
            - |
              snapshot-committer \
                --containerd-namespace k8s.io \
                --container-id <containerID> \
                --target-image <imageUri> \
                --registry-auth-file /var/run/opensandbox/registry/.dockerconfigjson
          volumeMounts:
            - name: containerd-sock
              mountPath: /run/containerd/containerd.sock
            - name: snapshot-push-auth
              mountPath: /var/run/opensandbox/registry
              readOnly: true
      volumes:
        - name: containerd-sock
          hostPath:
            path: /run/containerd/containerd.sock
            type: Socket
        - name: snapshot-push-auth
          secret:
            secretName: <snapshotPushSecretName>
```

The controller resolves the source container ID from `SourcePodName` and
`SourceContainerName`.

`snapshot-committer` in this example is a logical role, not a required product
name. The implementation may be a small in-house binary, a thin wrapper around
existing container tooling, or another committer client, as long as it
performs the following responsibilities explicitly:

- commit the source container rootfs into a snapshot image
- read the mounted registry auth config from the Secret volume
- push the snapshot image to `spec.imageUri`
- return a clear success/failure signal so the controller can update
  `SandboxSnapshot.status.phase`

Important auth semantics:

- `imagePullSecrets` on the Job Pod, if needed for the `committerImage`, only
  affects kubelet pulling the Job image. It does not authenticate registry
  operations performed by the process inside the container.
- `snapshotPushSecretName` is mounted into the committer container and must be
  consumed explicitly by the snapshot push client as registry auth config.
- `resumeImagePullSecretName` is not used by the commit Job. It is propagated
  to the resumed workload template so kubelet can pull `snapshot.spec.imageUri`
  during resume.

### 7. Resume flow

The resume flow is:

```text
1. Client  POST /sandboxes/{sandboxId}/resume
2. Server  Resolve SandboxSnapshot(name=sandboxId)
3. Server  Validate:
           - snapshot exists
           - snapshot status.phase == Ready
4. Server  Create a new BatchSandbox:
           - metadata.name reuses the same public sandbox identity mapping
           - replicas = 1
           - template reconstructed from snapshot.spec.resumeTemplate
           - template image = snapshot.spec.imageUri
           - template imagePullSecrets = snapshot.spec.resumeImagePullSecretName
           - labels preserve sandboxId
5. Server  Aggregate sandbox state as Resuming while the new BatchSandbox is
           starting
6. BatchSandbox controller creates the new Pod
7. Once the new Pod is running and ready, GET /sandboxes/{sandboxId} returns Running
```

The snapshot is retained after resume so the sandbox can be paused and resumed
again later, but only the latest snapshot is kept.

### 8. Stable sandbox ID

The public `sandboxId` is stable across three states:

- live workload exists: identify by `BatchSandbox` label `opensandbox.io/id`
- paused workload: identify by `SandboxSnapshot.metadata.name == sandboxId`
- resumed workload: identify by the new `BatchSandbox` label

The workload object identity may change, but the public sandbox identity does
not.

### 9. List and get semantics

`GET /sandboxes/{sandboxId}` must:

- first resolve the live `BatchSandbox`
- then resolve `SandboxSnapshot`
- merge both views into one lifecycle status

`GET /sandboxes` should include:

- running sandboxes from live `BatchSandbox` objects
- paused sandboxes from `SandboxSnapshot` objects that have no live
  `BatchSandbox`

This keeps paused sandboxes visible even though their workloads have been
deleted.

### 10. Delete semantics

`DELETE /sandboxes/{sandboxId}` must remove all Kubernetes state associated with
the public sandbox identity:

- delete the live `BatchSandbox` if it exists
- delete `SandboxSnapshot(name=sandboxId)` if it exists

Registry cleanup is best-effort in this revision:

- if the implementation can safely delete the retained snapshot image from the
  registry, it may do so
- registry image deletion failure must not block sandbox deletion success
- operators may rely on registry retention or garbage collection if image
  deletion is unavailable or undesirable

### 11. Configuration

Add a new server config section:

```toml
[pause]
default_snapshot_registry = ""
committer_image = "containerd/containerd:1.7"
```

Semantics:

- `default_snapshot_registry` is used when `pausePolicy.snapshotRegistry` is not
  explicitly set.
- `committer_image` is the image used by the commit Job Pod.

### 12. Security considerations

The commit Job mounts the node's container runtime socket to resolve the source
container and commit its root filesystem. This is a privileged operation with
node-level runtime access.

Operational constraints for this design:

- `committer_image` is selected by server or operator configuration, not by the
  public sandbox API
- the commit Job spec is not user-extensible in this revision
- operators should treat the snapshot controller and committer image as trusted
  infrastructure components, with tighter RBAC and supply-chain controls than
  ordinary sandbox workloads

## Test Plan

### Unit tests

- Pause request creates or replaces `SandboxSnapshot(name=sandboxId)`.
- `SandboxSnapshot` contains `sourcePodName`, `sourceContainerName`, and
  `sourceNodeName` from the live workload.
- Snapshot controller creates a Job pinned to the correct node.
- Server returns `Paused` when `BatchSandbox` is absent and snapshot is `Ready`.
- Server returns `Pausing` when snapshot is `Ready` but the source
  `BatchSandbox` still exists.
- Server returns `Resuming` after new `BatchSandbox` is created from snapshot but
  before readiness.
- Pause returns `409` when another pause is already in progress for the same
  `sandboxId`.
- Resume fails with `409` when snapshot is absent or not `Ready`.
- Delete removes `SandboxSnapshot` for paused sandboxes.

### Integration tests

- End-to-end pause:
  - running `BatchSandbox`
  - snapshot becomes `Ready`
  - original `BatchSandbox` is deleted
  - `GET /sandboxes/{id}` returns `Paused`
- End-to-end resume:
  - server finds snapshot by `sandboxId`
  - creates new `BatchSandbox`
  - new Pod comes up from snapshot image
  - `GET /sandboxes/{id}` returns `Running`
- Repeat pause after resume:
  - the same `SandboxSnapshot` resource is reused or replaced
  - only one snapshot remains
- Delete after pause:
  - paused sandbox is removed even when no live `BatchSandbox` exists
  - retained `SandboxSnapshot` is removed

### Manual and operator validation

- Confirm the committed image is present in the registry after pause.
- Confirm working directory contents survive pause and resume.
- Confirm CPU and memory are released after the old `BatchSandbox` is deleted.
- Confirm the commit Job Pod actually runs on the source node.
- Confirm the committed rootfs comes from the intended sandbox container rather
  than a sidecar.

## Drawbacks

- Only one snapshot is retained, so rollback to older states is impossible.
- The design assumes the server-side Kubernetes path uses `replicas = 1`.
- The paused state is split from the live workload and must be reconstructed by
  the server from multiple resources.
- Registries that enforce immutable tags are a poor fit for the simplified
  single-snapshot design.
- Commit still requires node-local runtime access.

## Alternatives

### Introduce a dedicated SandboxInstance CR

A more general design is possible, but rejected here because the user goal is a
simpler architecture aligned with the current server path. For v1, the single
snapshot CR plus existing `BatchSandbox` is sufficient.

### Store pause state directly on BatchSandbox

Rejected because the paused state must survive after the workload is deleted.
Once pause succeeds, the original `BatchSandbox` no longer exists.

### Support multiple snapshots and `/snapshots` API in v1

Rejected to keep the architecture minimal. Multi-snapshot history can be added
later by changing `SandboxSnapshot` naming and list semantics.

### Restore the old BatchSandbox instead of creating a new one

Rejected because pause deletes the original workload to release resources. Resume
is cleaner if it always creates a fresh `BatchSandbox` from the retained image.

## Infrastructure Needed

- An OCI registry reachable from cluster nodes.
- A registry credential Secret of type `kubernetes.io/dockerconfigjson`.
- A committer image that can access `containerd.sock` on the source node.
- RBAC for `SandboxSnapshot`, Jobs, and reads on Pods and `BatchSandbox`.

## Upgrade & Migration Strategy

This change is additive for the public API and simple for operators.

- Existing clients keep using the same sandbox lifecycle endpoints.
- Existing Kubernetes deployments without the new `SandboxSnapshot` CRD continue
  to return `501` for pause and resume.
- Rollout sequence:
  - install the `SandboxSnapshot` CRD
  - deploy the `SandboxSnapshotController`
  - deploy the updated server with pause/resume orchestration
- Existing running sandboxes do not require migration. Only new pause/resume
  operations use the new flow.


================================================
FILE: oseps/0009-auto-renew-sandbox-on-ingress-access.md
================================================
---
title: Auto-Renew Sandbox on Ingress Access
authors:
  - "@Pangjiping"
creation-date: 2026-03-15
last-updated: 2026-03-19
status: implementing
---

# OSEP-0009: Auto-Renew Sandbox on Ingress Access

<!-- toc -->
- [Summary](#summary)
- [Motivation](#motivation)
  - [Goals](#goals)
  - [Non-Goals](#non-goals)
- [Requirements](#requirements)
- [Proposal](#proposal)
  - [Notes/Constraints/Caveats](#notesconstraintscaveats)
  - [Risks and Mitigations](#risks-and-mitigations)
- [Design Details](#design-details)
  - [Scope: Supported Reverse Proxy Paths](#scope-supported-reverse-proxy-paths)
  - [Activation Model and Extensions Contract](#activation-model-and-extensions-contract)
  - [Control Strategy to Prevent Renewal Storms](#control-strategy-to-prevent-renewal-storms)
  - [Mode A: Server Proxy Path](#mode-a-server-proxy-path)
  - [Mode B: Ingress Gateway Path (Redis Queue)](#mode-b-ingress-gateway-path-redis-queue)
  - [Why Redis Between Ingress and Server](#why-redis-between-ingress-and-server)
  - [Redis Data Model](#redis-data-model)
  - [Configuration](#configuration)
- [Test Plan](#test-plan)
- [Drawbacks](#drawbacks)
- [Infrastructure Needed](#infrastructure-needed)
- [Upgrade & Migration Strategy](#upgrade--migration-strategy)
<!-- /toc -->

## Summary

Introduce an access-driven sandbox auto-renew mechanism for ingress traffic. When users access sandbox services through reverse proxy paths, OpenSandbox can automatically extend sandbox expiration for sandboxes that explicitly opt in to this capability.

This proposal only supports two proxy paths that can observe access traffic: server proxy and ingress gateway. Docker direct access is explicitly out of scope because no reverse proxy request can be reliably captured there.

## Motivation

Today users must renew expiration explicitly through `POST /sandboxes/{id}/renew-expiration`. For interactive workloads (IDE, notebook, web app), request traffic already implies sandbox activity, but expiration still depends on explicit lifecycle API calls from clients.

This creates two practical issues:

- User sessions can be interrupted even while ingress traffic is still active.
- Naively triggering renewal on every ingress request would create renewal storms under high QPS.

An access-driven renewal mechanism is needed, but it must be strongly rate-controlled and deduplicated.

### Goals

- Automatically renew sandbox expiration on observed ingress access for explicitly opted-in sandboxes.
- Support exactly two existing reverse proxy implementations:
  - server proxy path
  - ingress gateway path
- Use direct self-call renewal in server proxy mode.
- Use Redis-backed queue forwarding in ingress gateway mode.
- Require explicit capability enablement at three levels: server, ingress, and sandbox creation request.
- Strictly control actual renewal API calls to avoid excessive renew traffic.
- Preserve existing lifecycle API semantics and backward compatibility.

### Non-Goals

- Supporting Docker direct exposure mode for auto-renew triggers.
- Replacing manual renewal API (`renew-expiration`) behavior.
- Introducing per-request guaranteed renewal (best-effort under policy control is sufficient).
- Building a generic event bus for all lifecycle actions.

## Requirements

- The implementation must work with existing lifecycle API and runtime providers.
- Reverse proxy traffic must be the only trigger source for this proposal.
- Auto-renew must be disabled unless all three conditions are met:
  - server supports and enables auto-renew-on-access,
  - ingress supports and enables renew-intent signaling (for ingress mode),
  - sandbox creation request explicitly opts in via `extensions`.
- Renewal requests must be bounded by deduplication and throttling controls.
- Ingress gateway mode must use Redis as the forwarding queue.
- Renewal must be idempotent from the caller perspective (repeated access events do not imply repeated renew calls).
- The design must remain safe under burst traffic and multi-replica deployments.

## Proposal

Add an "access renew controller" that converts proxy access signals into controlled renewal attempts.

- In server proxy mode, the server path handling proxied traffic submits local renew intents and performs internal renewal calls.
- In ingress gateway mode, ingress publishes renew intents into Redis; OpenSandbox server consumes and executes controlled renewals.
- Both modes share the same renewal gate logic: opt-in check, eligibility window, cooldown, and per-sandbox in-flight deduplication.

At a high level, access traffic indicates activity, but only eligible events produce actual `renew-expiration` operations.

### Notes/Constraints/Caveats

- This OSEP applies to reverse proxy captured traffic only.
- If a deployment bypasses proxy (direct pod/container access), no automatic renewal signal is available.
- Ingress-mode auto-renew is best-effort and depends on Redis availability.
- Renewal policy is intentionally conservative to prioritize control-plane stability.

### Risks and Mitigations

| Risk | Mitigation |
| --- | --- |
| Renewal storms under high ingress QPS | Multi-stage gating: renew-window check + cooldown + in-flight dedupe |
| Duplicate renewals across server replicas | Redis lock keys for distributed dedupe in ingress mode; local dedupe in server proxy path |
| Redis backlog growth in traffic spikes | Queue TTL, bounded consumer concurrency, and drop-on-overload policy |
| False negatives (active sandbox not renewed) | Configurable renew window and cooldown; metrics/alerts for missed renew opportunities |
| Added operational complexity | Feature flag rollout, default-off mode, and explicit docs/runbooks |

## Design Details

### Scope: Supported Reverse Proxy Paths

Only these two paths are supported:

1. **Server proxy path**
   - Access route: `/sandboxes/{sandbox_id}/proxy/{port}/...`
   - Traffic is observed inside OpenSandbox server directly.
2. **Ingress gateway path**
   - Access is observed by ingress/gateway implementation (wildcard/header/uri routing modes).
   - Signals are forwarded through Redis queue to server workers.

Explicitly unsupported:

- **Docker direct mode** (client accesses container endpoint directly):
  - No mandatory reverse proxy hop exists.
  - OpenSandbox cannot reliably observe all access requests.

### Activation Model and Extensions Contract

This feature uses explicit "three-party handshake" activation.

1. **Server-side capability switch**
   - `server.auto_renew_on_access.enabled = true` must be set (stored under `ServerConfig`).
2. **Ingress-side capability switch** (ingress mode only)
   - ingress must be configured to publish renew-intents (`server.auto_renew_on_access.redis.enabled = true` and ingress integration enabled).
3. **Sandbox-level opt-in and duration**
   - sandbox must declare in `CreateSandboxRequest.extensions` how long each automatic renewal extends expiration (see below). Presence of a valid value opts the sandbox in.

If any condition is missing, access events are ignored for renewal.

Given current API schema (`extensions: Dict[str, str]`), this OSEP proposes:

- `extensions["access.renew.extend.seconds"]` = positive integer string (e.g. `"1800"`)

**Meaning:** When auto-renew on access is triggered for this sandbox, each renewal extends expiration by this many seconds. The key thus both opts the sandbox in and defines the per-renewal extension duration.

**Behavior rules:**

- Missing key or invalid value (non-positive integer string) means no auto-renew on access for that sandbox.
- Valid value (e.g. `"1800"`) enables auto-renew subject to policy gating; each successful renewal uses `new_expires_at = now + (value of access.renew.extend.seconds)`.
- Invalid values are rejected at sandbox creation time with 4xx validation error.

### Control Strategy to Prevent Renewal Storms

Both modes share the same strict control policy. An access event triggers renewal only when all checks pass:

1. **Opt-in check**: sandbox has a valid positive `access.renew.extend.seconds` in extensions.
2. **Sandbox state check**: sandbox must be `Running`.
3. **Renew window check**: remaining TTL must be below `before_expiration_seconds`.
4. **Cooldown check**: no successful renewal for this sandbox within `min_interval_seconds`.
5. **In-flight dedupe**: at most one renewal task per sandbox at a time.

If any check fails, the event is acknowledged and dropped without a renewal call.

Renew target time:

- `new_expires_at = now + (value of extensions["access.renew.extend.seconds"])`; server may enforce a cap or default.
- must also satisfy `new_expires_at > current_expires_at` before calling renew API

This guarantees bounded renewal frequency even for very hot sandboxes.

### Mode A: Server Proxy Path

For requests handled by server proxy:

```
Client --> OpenSandbox Server Proxy --> Sandbox Service
              |
              +--> AccessRenewController (local signal)
                        |
                        +--> eligibility + cooldown + in-flight checks
                                |
                                +--> internal renew call (server -> own renew handler)
```

Implementation notes:

- Trigger point: after sandbox resolution and before/after proxy forward (implementation-defined), with non-blocking behavior.
- Renewal execution must not increase proxy path latency materially; use async/background task dispatch.
- Internal renewal uses existing service-level renewal logic to avoid API divergence.

### Mode B: Ingress Gateway Path (Redis Queue)

For requests first seen by ingress:

```
Client --> Ingress/Gateway
             |
             +--> publish renew-intent to Redis (sandbox_id, ts, route info)
                           |
                           v
                  OpenSandbox Renew Worker
                           |
                           +--> eligibility + cooldown + distributed dedupe
                                   |
                                   +--> renew call
```

Redis usage:

- Queue: **Redis List only** (required). Ingress pushes with LPUSH; server workers pop with BRPOP. No ack—best-effort delivery. Keeps the model simple and avoids Stream/consumer-group complexity.
- Intent payload (one JSON string per list element):
  - `sandbox_id` (string, required)
  - `observed_at` (string, required; RFC3339 or RFC3339Nano)
  - `port` (int, optional) — sandbox port accessed
  - `request_uri` (string, optional) — path forwarded to the sandbox
- Ingress may apply a **client-side throttle** (e.g. min interval per sandbox) so not every request produces an intent; queue key and optional list cap are configurable.
- Distributed dedupe lock key (server side):
  - `opensandbox:renew:lock:{sandbox_id}` with short TTL

Worker behavior:

- One or more workers block on BRPOP; on pop, parse payload, drop if stale, then run gate checks and maybe renew (with lock). No requeue on failure—best-effort.
- On publish/consume failures, log and drop.

### Why Redis Between Ingress and Server

Redis is selected for ingress -> server renew-intent delivery to decouple data-plane bursts from control-plane renew execution.

Compared with ingress directly calling server renew APIs:

- **Backpressure isolation**: ingress can LPUSH quickly; server workers process at their own pace.
- **Latency protection**: ingress request path does not wait on renew execution.
- **Multi-replica friendliness**: multiple server instances can BRPOP from the same list (competing consumers); each message is taken by one worker.
- **Failure containment**: when server is transiently unhealthy, intents can sit in the list briefly instead of ingress retrying synchronously.

Compared with other MQs (Kafka/NATS/Pulsar):

- **Scope fit**: best-effort, short-lived; Redis List is the minimal option and avoids Stream/consumer-group complexity.
- **Operational cost**: Redis is commonly available; List is the simplest structure.
- **Implementation speed**: LPUSH + BRPOP + lock is enough; no XREADGROUP/XACK or group management.

### Redis Data Model

This OSEP uses a Redis List for renew-intent events plus a lock key for per-sandbox dedupe (server side).

**Keys:**

- **Intent list key**: configurable, default `opensandbox:renew:intent` (Redis List)
- **Per-sandbox lock key**: `opensandbox:renew:lock:{sandbox_id}` (server consumer only)

**Intent payload** (single JSON string per list element):

| Field          | Type   | Required | Description                          |
|----------------|--------|----------|--------------------------------------|
| `sandbox_id`   | string | yes      | Sandbox identifier                   |
| `observed_at`  | string | yes      | Time of access (RFC3339 or RFC3339Nano) |
| `port`         | int    | no       | Sandbox port accessed                |
| `request_uri`  | string | no       | Path forwarded to the sandbox        |

Producer (ingress):

- Push with `LPUSH <queue_key> <serialized-json>`.
- Optional: cap list length (`LTRIM <queue_key> 0 max_len-1` after LPUSH); overflow is best-effort drop.
- Ingress may throttle: e.g. at most one intent per sandbox per N seconds (client-side) to limit queue growth.

Consumer (server):

- One or more workers block with `BRPOP opensandbox:renew:intent <timeout>`.
- On pop: parse payload; if `now - observed_at > event_ttl_seconds`, drop and continue.
- Acquire lock: `SET opensandbox:renew:lock:{sandbox_id} <value> NX EX lock_ttl_seconds`.
- If lock acquired: run gate checks (opt-in, state, window, cooldown) and maybe renew; then lock expires by TTL.
- If lock not acquired: treat as in-flight dedupe, drop.
- No ack or requeue: if the worker crashes after pop, that intent is lost (best-effort).

Notes:

- Lock TTL must be short and greater than the renew critical section.
- Implementations must use Redis List; this LPUSH/BRPOP + lock flow is the only specified processing model.

### Configuration

Use `server` configuration namespace; no independent top-level config block is required:

```toml
[server]
auto_renew_on_access.enabled = false
auto_renew_on_access.before_expiration_seconds = 300
auto_renew_on_access.extension_seconds = 1800
auto_renew_on_access.min_interval_seconds = 60

# auto-detected by request path:
# - server-proxy path uses local trigger
# - ingress path uses redis trigger

auto_renew_on_access.redis.enabled = false
auto_renew_on_access.redis.url = "redis://127.0.0.1:6379/0"
auto_renew_on_access.redis.queue_key = "opensandbox:renew:intent"
auto_renew_on_access.redis.lock_ttl_seconds = 10
auto_renew_on_access.redis.event_ttl_seconds = 30
auto_renew_on_access.redis.consumer_concurrency = 8
```

Configuration rules:

- `server.auto_renew_on_access.enabled=false` means feature fully disabled.
- Ingress path renewal requires Redis block enabled and reachable on the server; the **ingress component** uses its own config (e.g. CLI flags: `--renew-intent-enabled`, `--renew-intent-redis-dsn`, `--renew-intent-queue-key`, `--renew-intent-queue-max-len`, `--renew-intent-min-interval`) to connect to Redis and publish intents. Queue key and default list name should match what the server consumer expects (e.g. `opensandbox:renew:intent`).
- Server proxy path can run without Redis.
- Feature is applied per sandbox only when `extensions["access.renew.extend.seconds"]` is present and a valid positive integer string.
- Docker runtime direct mode remains unsupported regardless of this config.

Create request example:

```json
{
  "image": { "uri": "python:3.11-slim" },
  "entrypoint": ["python", "-m", "http.server", "8000"],
  "timeout": 3600,
  "extensions": {
    "access.renew.extend.seconds": "1800"
  }
}
```

## Test Plan

- **Unit Tests**
  - Extension validation for auto-renew opt-in keys and values
  - Renew eligibility function (window/cooldown/state checks)
  - In-flight dedupe behavior under concurrent signals
  - Renew target time calculation and monotonicity checks
- **Integration Tests (Server Proxy)**
  - Non-opt-in sandbox never triggers renew under access traffic
  - Opt-in sandbox triggers bounded renew calls under same traffic
  - High-frequency proxy requests only trigger bounded renew calls
  - Proxy request path remains successful when renew path fails transiently
- **Integration Tests (Ingress + Redis)**
  - Non-opt-in sandbox intents are ignored at consumer side
  - Ingress event publish -> worker consume -> renew success
  - Duplicate events for same sandbox are coalesced
  - Redis unavailable path follows best-effort drop semantics
- **Stress Tests**
  - N sandboxes x high QPS access confirms renew call count stays within policy bound

Success criteria:

- Renewal request rate remains proportional to policy limits, not ingress QPS.
- Active sandboxes in supported proxy paths are renewed before expiration under normal operating conditions.

## Drawbacks

- Adds background components and policy tuning complexity.
- Ingress mode introduces hard dependency on Redis availability.
- Conservative gating may skip some renew opportunities under extreme failure conditions.

## Infrastructure Needed

- Redis service for ingress gateway mode.
- Ingress (or gateway) that publishes renew intents (e.g. OpenSandbox Ingress with `--renew-intent-enabled`, Redis DSN, optional queue key / list cap / client-side per-sandbox min-interval throttle).

## Upgrade & Migration Strategy

- Backward compatible and disabled by default.
- Rollout order:
  1. Deploy server with feature flag off.
  2. Enable in server proxy path for canary validation.
  3. Enable ingress + Redis path progressively.
- Rollback:
  - Disable `server.auto_renew_on_access.enabled` (and `server.auto_renew_on_access.redis.enabled` for ingress mode).
  - Existing manual renewal flow remains unchanged.


================================================
FILE: oseps/0010-opentelemetry-instrumentation.md
================================================
---
title: OpenTelemetry Instrumentation (execd, egress, and ingress)
authors:
  - "@Pangjiping"
creation-date: 2026-03-18
last-updated: 2026-03-18
status: draft
---

# OSEP-0010: OpenTelemetry Instrumentation (execd, egress, and ingress)

<!-- toc -->
- [Summary](#summary)
- [Motivation](#motivation)
  - [Goals](#goals)
  - [Non-Goals](#non-goals)
- [Requirements](#requirements)
- [Proposal](#proposal)
  - [Notes/Constraints/Caveats](#notesconstraintscaveats)
  - [Risks and Mitigations](#risks-and-mitigations)
- [Design Details](#design-details)
  - [1. Metrics](#1-metrics)
    - [1.1 execd metrics](#11-execd-metrics)
    - [1.2 egress metrics](#12-egress-metrics)
    - [1.3 ingress metrics](#13-ingress-metrics)
  - [2. Logging](#2-logging)
  - [3. Tracing](#3-tracing)
  - [4. Initialization and configuration](#4-initialization-and-configuration)
- [Test Plan](#test-plan)
- [Drawbacks](#drawbacks)
- [Infrastructure Needed](#infrastructure-needed)
- [Upgrade & Migration Strategy](#upgrade--migration-strategy)
<!-- /toc -->

## Summary

This proposal introduces unified **OpenTelemetry** instrumentation for OpenSandbox’s three Go components—**execd**, **egress**, and **ingress**—covering **Metrics**, **Logs**, and **Distributed Traces**. With OTLP export, configurable sampling, and environment-based configuration, operators and developers can observe request flows, resource usage, policy enforcement, and ingress proxy traffic in production and integrate with existing observability stacks (e.g., Jaeger, Prometheus, Grafana Loki).

## Motivation

Today execd, egress, and ingress have partial observability (e.g., execd’s HTTP API and `GetMetrics`/`WatchMetrics`, zap/loggers in egress and ingress) but lack:

- **Standardized metrics**: No Prometheus/OpenTelemetry-style HTTP QPS, latency, status codes; no unified metrics for execd code execution and Jupyter sessions, egress DNS/policy, or ingress proxy requests and routing.
- **Distributed tracing**: No way to correlate requests, code execution, DNS lookups, policy evaluation, and ingress proxy forwarding in a single trace.
- **Log–trace correlation**: Logs do not include `trace_id`/`span_id`, making it hard to jump from logs to traces.
- **Unified export**: No OTLP endpoint or sampling configuration, so integration with a central observability platform is difficult.

Adopting OpenTelemetry allows the three components to gain consistent metrics, logs, and tracing without changing core logic, with the ability to disable or tune sampling via environment variables for production.

### Goals

- Integrate the OpenTelemetry SDK (Go) into execd, egress, and ingress to emit **Metrics**, **Logs**, and **Traces**.
- **Metrics**: Cover HTTP, code execution, Jupyter, filesystem operations, and system resources (execd); DNS, policy, nftables, and system resources (egress); HTTP/WebSocket proxy requests, routing resolution, status codes, and system resources (ingress).
- **Logging**: Extend the existing zap logger to automatically add `trace_id` and `span_id`, with context-aware logging.
- **Tracing**: Instrument key paths (HTTP requests, code execution, DNS lookups, policy evaluation, ingress proxy and routing) with spans.
- **Configuration**: Provide full initialization and support for OTLP exporters, sampling, and environment variables; default to no export or low sampling so deployments without observability backends are unaffected.

### Non-Goals

- Do not replace existing execd HTTP metric endpoints such as `GetMetrics`/`WatchMetrics`; they can coexist with OpenTelemetry metrics.
- Do not implement OpenTelemetry on the server (Python) in this proposal; scope is limited to the three Go components (execd, egress, ingress).
- Do not commit to vendor-specific backends (e.g., Datadog, New Relic); export is via the standard OTLP protocol only.
- Do not require a Collector; both direct OTLP and via-Collector export are supported.

## Requirements

| ID | Requirement | Priority |
|----|-------------|----------|
| R1 | execd/egress/ingress support exporting Metrics, Logs, and Traces via OTLP | Must Have |
| R2 | Metrics cover all execd, egress, and ingress metric items listed in this proposal | Must Have |
| R3 | Logs automatically include trace_id and span_id, with values taken from context | Must Have |
| R4 | Key paths (HTTP, code execution, DNS, policy, ingress proxy) have trace spans | Must Have |
| R5 | Configuration via environment variables (endpoint, sampling, toggles) without code changes | Must Have |
| R6 | Default or unset config results in no export or low sampling to avoid impacting deployments without observability | Should Have |
| R7 | Compatible with existing zap Logger interface; no breaking changes to Logger abstraction | Should Have |

## Proposal

Introduce an **OpenTelemetry initialization module** in the main startup of execd, egress, and ingress that:

1. Creates and registers a **MeterProvider** and **MetricReader** (e.g., OTLP exporter).
2. Creates a **TracerProvider** with a sampler such as **TraceIDRatioBased** and registers an OTLP trace exporter.
3. Optionally sets up a **LoggerProvider** or **zap enhancement** so that log fields include trace/span information.
4. Reads OTLP endpoint, sampling rate, service name, etc., from environment variables (or config files).

Application code records metrics and spans on critical paths and, when logging, extracts the current span’s trace_id/span_id from `context.Context` into zap fields. Metrics, logs, and traces then align semantically and can be exported to the same observability platform via OTLP. Egress and ingress both use the standard library `net/http` (egress for the policy API ServeMux, ingress for the proxy Handler); wrap the `Handler` or use middleware such as otelhttp to create a span and context per request. Execd uses Gin and can use the otelgin middleware.

### Notes/Constraints/Caveats

- OpenTelemetry Go SDK version and stability must match the project’s Go version; prefer the stable API (e.g., `go.opentelemetry.io/otel` v1).
- Metric and span names should follow OpenTelemetry semantic conventions (e.g., HTTP attributes, metric units) for compatibility with generic dashboards.
- egress may run as a sidecar in the same Pod as the workload; keep sampling and export batching configurable to limit sidecar CPU/memory.
- Log enhancements apply only to code paths using the shared Logger; code that uses the standard `log` package is out of scope for this proposal but can be migrated later.

### Risks and Mitigations

| Risk | Mitigation |
|------|------------|
| OTLP export failures or unreachable endpoint cause blocking or retry storms | Use async export, configurable timeouts and queue limits; on failure only log locally and do not affect the main flow |
| High sampling rate produces too much trace data | Default to low or no sampling; configure via environment; recommend ≤ 0.1 in production |
| High metric cardinality (e.g., per sandbox_id or raw URL path) | Avoid high-cardinality dimensions: only use aggregated dimensions such as status_code, operation; **HTTP metrics must use the route template `http.route`** (e.g. `/code/contexts/:contextId`), not the raw request path, or execd routes with path parameters will produce high-cardinality series that are hard to operate |
| Divergence from existing metrics APIs | Leave existing HTTP metric endpoints unchanged; OpenTelemetry metrics are additive |

## Design Details

### 1. Metrics

#### 1.1 execd metrics

| Category | Metric name (suggested) | Type | Description |
|----------|-------------------------|------|-------------|
| **HTTP** | `execd.http.request.count` | Counter | Request count by method, **http.route (route template)**, status_code (QPS derivable) |
| | `execd.http.request.duration` | Histogram | Request latency (s or ms) by method, **http.route (route template)** |
| **Code execution** | `execd.execution.count` | Counter | Execution count by result (success/failure) |
| | `execd.execution.duration` | Histogram | Duration per execution |
| | `execd.execution.memory_bytes` | Histogram / Gauge | Memory usage during execution (if available) |
| **Jupyter sessions** | `execd.jupyter.sessions.active` | UpDownCounter / Gauge | Current active sessions |
| | `execd.jupyter.sessions.created_total` | Counter | Sessions created |
| | `execd.jupyter.sessions.deleted_total` | Counter | Sessions deleted |
| **Filesystem** | `execd.filesystem.operations.count` | Counter | Operation count by type (upload/download/list/delete, etc.) |
| | `execd.filesystem.operations.duration` | Histogram | Operation duration |
| **System** | `execd.system.cpu.usage` | Gauge | Process or host CPU usage (optional) |
| | `execd.system.memory.usage_bytes` | Gauge | Memory usage |
| | `execd.system.process.count` | Gauge | Current number of processes in the system |

All metrics are created via the OpenTelemetry Meter; units and attributes follow [OpenTelemetry semantic conventions](https://opentelemetry.io/docs/specs/semconv/).

**Execd HTTP dimensions:** Several execd routes embed identifiers in the URL (e.g. `/code/contexts/:contextId`, `/session/:sessionId/run`, `/command/status/:id` in `components/execd/pkg/web/router.go`). Using the raw request path as a metric dimension would create high-cardinality time series and make OTLP/Prometheus metrics hard to operate. Therefore **the route template must be used as the dimension**: `http.route` (e.g. `/code/contexts/:contextId`), not the actual request path (e.g. `/code/contexts/abc-123`). Gin and middleware such as otelgin should be configured to record the matched route pattern as `http.route`.

#### 1.2 egress metrics

| Category | Metric name (suggested) | Type | Description |
|----------|-------------------------|------|-------------|
| **DNS** | `egress.dns.queries.count` | Counter | DNS query count (QPS derivable) |
| | `egress.dns.query.duration` | Histogram | Per-query latency |
| | `egress.dns.cache.hits_total` | Counter | Cache hits |
| | `egress.dns.cache.misses_total` | Counter | Cache misses (hit rate = hits / (hits + misses)) |
| **Policy** | `egress.policy.evaluations.count` | Counter | Evaluations by action (allow/deny) |
| | `egress.policy.denied_total` | Counter | Denials; block rate derivable with evaluations |
| **nftables** | `egress.nftables.rules.count` | Gauge | Current rule count |
| | `egress.nftables.updates.count` | Counter | Rule update count (update frequency observable) |
| **System** | `egress.system.cpu.usage` | Gauge | CPU usage |
| | `egress.system.memory.usage_bytes` | Gauge | Memory usage |

#### 1.3 ingress metrics

| Category | Metric name (suggested) | Type | Description |
|----------|-------------------------|------|-------------|
| **HTTP** | `ingress.http.request.count` | Counter | Request count by method, status_code, proxy_type (http/websocket) (QPS derivable) |
| | `ingress.http.request.duration` | Histogram | Request duration (including routing and proxy) by method, proxy_type |
| **Routing** | `ingress.routing.resolutions.count` | Counter | Resolutions by result (success/not_found/not_ready/error) |
| | `ingress.routing.resolution.duration` | Histogram | Time to resolve sandbox target (from cache or API) |
| **Proxy type** | `ingress.proxy.http.requests_total` | Counter | HTTP proxy request count |
| | `ingress.proxy.websocket.connections_total` | Counter | WebSocket connection count |
| **System** | `ingress.system.cpu.usage` | Gauge | CPU usage |
| | `ingress.system.memory.usage_bytes` | Gauge | Memory usage |

Note: Ingress typically returns 200 (success), 400 (bad request), 404 (sandbox not found), 502 (upstream error), 503 (sandbox not ready); aggregate by `http.status_code` for error-rate monitoring.

Metric namespaces are `execd.*`, `egress.*`, and `ingress.*` for easy filtering in a shared backend.

**Custom metric dimensions (env):** Provide an env-based hook so users can define **extra metric dimensions** (not limited to sandbox_id). For example, support **`OPENSANDBOX_OTEL_METRICS_EXTRA_ATTRIBUTES`** (or equivalent): a comma-separated list of attribute names (e.g. `sandbox_id`, `tenant_id`, or custom keys). When recording metrics, if the current context or request carries those attributes, they are reported as dimensions on that data point; when unset or empty, no extra dimensions are added. This lets users opt in to “aggregate by sandbox_id” or any custom dimension and accept the cardinality and cost. Implementations must document that this option increases cardinality and should be used only when entity count is bounded.

### 2. Logging

- **Zap enhancement**: In `components/internal/logger` (zap implementation), add the ability to read the current span’s `TraceID` and `SpanID` from `context.Context` and inject them as zap fields, e.g.:
  - Add `LoggerWithContext(ctx context.Context) Logger`, or at call sites use `logger.With(Field{Key: "trace_id", Value: trace.SpanFromContext(ctx).SpanContext().TraceID().String()})` (and similarly for span_id).
- **Context-aware**: Handlers and middleware that receive `context.Context` should use a logger that has trace/span injected so all logs for the same request share the same trace_id.
- **Filter/query by sandbox_id**: When a request or operation is associated with a sandbox (e.g. execd handling a request for that sandbox, ingress proxying to that sandbox), log records **must** include a filterable sandbox identifier (recommend a consistent attribute name such as `opensandbox.sandbox_id` or `sandbox_id`) so that log backends can filter and query by sandbox_id for per-sandbox debugging.
- **Correlation**: If OTLP Logs are used, log records can carry trace_id/span_id and link to the Traces backend for “click from log to trace” workflows.

Implementation options:

- A zap `Core` or `Hook` that reads span from `context.Context` and adds fields (requires middleware to propagate context with span through the request path).
- A `log.Ctx(ctx).Infof(...)`-style helper that gets span from ctx and calls zap.

The existing `Logger` interface (`Infof`, `With`, `Named`) stays unchanged; only context-based construction or trace-field helpers are added.

### 3. Tracing

- **HTTP (execd: Gin)**
  execd uses Gin (`components/execd/pkg/web/router.go`). Register OpenTelemetry HTTP middleware (e.g., `otelgin`) on its routes so each request gets a span with `http.method`, `http.route`, `http.status_code`, etc. When the request is associated with a sandbox (e.g. API call for that sandbox), the span **must** include `sandbox_id` (or a consistent name such as `opensandbox.sandbox_id`) so that traces can be filtered and queried by sandbox_id in Jaeger and similar backends. Pass the request’s context downstream so business logic and logging use the same trace.

- **HTTP (egress: net/http)**
  egress exposes its policy API from a net/http `ServeMux` (`components/egress/policy_server.go`), not Gin. Instrument egress’s HTTP entry points the same way as ingress: wrap the `http.Handler` or use net/http-compatible middleware (e.g., `go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp`) to create a span per request and pass context with span downstream, so that R1/R4 are met for egress HTTP.

- **ingress HTTP**  
  Ingress uses `net/http`. Wrap the `http.Handler` (or use middleware) to create a root span per request (e.g., `ingress.proxy`) with `http.method`, `http.route`, `http.status_code`, `ingress.mode` (header/uri). When the request is routed to a sandbox, the span **must** include `sandbox_id` (or `opensandbox.sandbox_id`) so that traces can be filtered and queried by sandbox_id. Pass context with span from `ServeHTTP` into the proxy for logs and child spans.

- **ingress proxy forwarding**  
  When forwarding to the target sandbox, create a child span (e.g., `ingress.forward`) with target host and proxy_type (http/websocket). Sandbox resolution (sandbox_id → backend address from sandbox provider) can be a separate child span (e.g., `ingress.resolve`) with attribute resolution_result (success/not_found/not_ready/error) to distinguish 404/503/502 in the trace.

- **Code execution**  
  At execd’s execution entry (e.g., `ExecuteCode`/run), create a child span such as `execution.run` with attributes like `execd.operation=execute` and result. If there are multiple steps (prepare, run, cleanup), add child spans per step.

- **DNS query**  
  In egress DNS proxy, create a span per query (e.g., `dns.query`) with domain, result (allow/deny), cache hit/miss.

- **Policy evaluation**  
  In egress policy evaluation, create a span (e.g., `policy.evaluate`) with target (domain/IP) and action (allow/deny).

All spans are children of the HTTP request span when entered via HTTP, so the full call tree is visible in UIs like Jaeger.

### 4. Initialization and configuration

- **Initialization**  
  Implement `InitOpenTelemetry(ctx context.Context, opts InitOptions) (shutdown func(), err error)` in main for execd, egress, and ingress (or in a shared `pkg/telemetry`):
  - Create `MeterProvider` and register an OTLP metric exporter (e.g., `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp` or gRPC).
  - Create `TracerProvider` with a `TraceIDRatioBased` sampler and register an OTLP trace exporter.
  - Optionally create `LoggerProvider` and register an OTLP log exporter; otherwise rely on zap enhancement and the standard Logs Bridge.
  - Set global `otel.SetMeterProvider`, `otel.SetTracerProvider`, etc., and return a `shutdown` function (Flush + ForceFlush) to call on process exit.

- **OTLP exporter**  
  Support HTTP and gRPC OTLP endpoints via environment variables:
  - `OTEL_EXPORTER_OTLP_ENDPOINT` (or per-signal `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT`, `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`).
  - If unset, do not export or use a Noop provider to avoid connection errors.

- **Sampling**  
  - Use `OTEL_TRACES_SAMPLER_ARG` (0.0–1.0 for ratio sampler).
  - Or `OTEL_TRACES_SAMPLER=parentbased_traceidratio` with `OTEL_TRACES_SAMPLER_ARG=0.1`.

- **Environment variables**  
  Support at least (names follow OpenTelemetry conventions):
  - `OTEL_SERVICE_NAME`: service name (execd / egress / ingress).
  - `OTEL_EXPORTER_OTLP_ENDPOINT` (or per-signal endpoints).
  - `OTEL_TRACES_SAMPLER`, `OTEL_TRACES_SAMPLER_ARG`.
  - `OTEL_METRICS_EXPORTER`, `OTEL_LOGS_EXPORTER` (e.g., `none` to disable).
  - `OTEL_RESOURCE_ATTRIBUTES`: key-value pairs for resource attributes (e.g., deployment.env);
  - **`OPENSANDBOX_OTEL_METRICS_EXTRA_ATTRIBUTES`**: comma-separated list of **custom metric dimension** attribute names (e.g. `sandbox_id`, `tenant_id`, or custom keys). When recording metrics, if the context or request carries an attribute with that name, it is added as an extra dimension on the data point; when unset or empty, no extra dimensions are added. This allows opt-in “aggregate by sandbox_id” or other custom dimensions; users assume cardinality and cost. Document that this increases cardinality and is best when entity count is bounded.

Optionally read some of these from existing config or flags and allow environment variables to override.

## Test Plan

- **Unit tests**
  - Metrics: Create a MeterProvider with an in-memory or mock exporter, run business logic, assert exported metric count and key attributes.
  - Logging: Build context with a span, call LoggerWithContext and log, assert output contains trace_id and span_id.
  - Tracing: Use sdktrace.NewTracerProvider with a SpanRecorder or in-memory exporter, run one request flow, assert span names and parent-child relationships.
- **Integration tests**
  - Start execd/egress/ingress with OTLP endpoint pointing at a test Collector or mock; send HTTP requests and trigger execution/DNS/policy/proxy; verify OTLP payloads contain expected metrics and traces.
- **Configuration**
  - When `OTEL_EXPORTER_OTLP_*` is unset, no connection is made and no error is raised.
  - When sampling rate is 0, no spans are produced.
  - Environment variables override config file where applicable.

Acceptance: With OTLP enabled and sampling configured, Jaeger shows full traces (HTTP → execution/DNS/policy/ingress proxy); Prometheus or the backend shows all execd, egress, and ingress metrics listed above; log lines include trace_id/span_id that link to traces.

## Drawbacks

- Additional dependencies and binary size (OpenTelemetry SDK and OTLP exporters).
- Under high QPS, even with low sampling, tracing and metrics add some CPU/memory cost; control via sampling and aggregation dimensions.
- Correct log–trace correlation requires passing `context.Context` through the call chain; some legacy code may need small changes.

## Infrastructure Needed

- **Go dependencies**
  - `go.opentelemetry.io/otel`
  - `go.opentelemetry.io/otel/sdk`
  - `go.opentelemetry.io/otel/exporters/otlp/...` (metrics/traces/logs, HTTP or gRPC as needed)
  - Optional: `go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin` (execd only); egress and ingress use net/http and require wrapping the Handler with e.g. `go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp`.
- **Runtime**
  - For direct OTLP: an reachable OTLP endpoint (e.g., OpenTelemetry Collector, Jaeger, or an OTLP-capable backend).
  - For “no export” mode: no extra infrastructure.

## Upgrade & Migration Strategy

- **Backward compatibility**: No changes to existing HTTP metric endpoints or Logger interface; only new initialization and optional env vars. With OpenTelemetry unconfigured, behavior is unchanged.
- **Rollout**
  1. Ship initialization and config code with OTLP endpoint unset (noop).
  2. Enable OTLP and low sampling in test; verify metrics and traces.
  3. Add metric and span instrumentation in execd/egress/ingress handlers and zap trace injection.
  4. Enable in production and tune sampling and endpoint as needed.
- **Rollback**: Unset or clear `OTEL_EXPORTER_OTLP_*` to stop export; no code change required.


================================================
FILE: oseps/CONTRIBUTING.md
================================================
# OSEP (OpenSandbox Enhancement Proposals)

Use this directory to draft, review, and store enhancement proposals before they
undergo broader discussion.

> [!NOTE]
> The OSEP process and template structure is inspired by
> [Tekton Enhancement Proposals (TEPs)](https://github.com/tektoncd/community/tree/main/teps).

> [!IMPORTANT]
> **When is an OSEP required?**
>
> Use the OSEP process for changes that:
> - Introduce new features or major enhancements
> - Modify the core sandbox API or runtime behavior
> - Affect the security model or isolation guarantees
>
> Small bug fixes, documentation updates, and minor refactors can be submitted
> directly as Pull Requests without an OSEP.

## Getting started

1. Run the init script to create a new proposal:

   ```bash
   oseps/init-osep.sh "Proposal Title"
   ```

   This copies the template, fills in metadata, and creates a sequentially
   numbered `0001-proposal-title.md` draft.

2. Fill in each section from the template (`Summary`, `Motivation`, …).
3. Once ready, submit the resulting file in a PR for community review.

**Available options:**

```bash
oseps/init-osep.sh --help
oseps/init-osep.sh --status provisional --author "@username" "My Feature"
```

## Template

The template used for new proposals lives at `oseps/osep-template.md.template`
and mirrors Tekton's TEP structure while capturing the key sections needed
for OpenSandbox planning. Each generated file starts with YAML front matter
followed by the title and TOC:

```yaml
---
title: My First Proposal
authors:
  - "@your-github-handle"
creation-date: 2025-12-21
last-updated: 2025-12-21
status: draft
---

# OSEP-0001: My First Proposal

<!-- toc -->
- [Summary](#summary)
...
<!-- /toc -->
```

This YAML front matter renders as a table on GitHub and keeps the proposal
metadata (status, authors, dates) visible at the top of the document.

## Status lifecycle

| Status | Description |
|--------|-------------|
| `draft` | Work in progress; not yet under formal review. |
| `provisional` | Maintainers agree with the direction; design details still pending. |
| `implementable` | Design approved and compliance checks passed; ready for implementation. |
| `implementing` | Code is being merged and SDKs are being synchronized. |
| `implemented` | Feature has reached GA status with complete documentation. |
| `withdrawn` | Author has withdrawn the proposal. |
| `rejected` | Maintainers have declined the proposal. |


================================================
FILE: oseps/README.md
================================================
# OpenSandbox Enhancement Proposals

See the [OSEP contributing](CONTRIBUTING.md) for information on OSEPs and how to create and merge them.

This is the complete list of OpenSandbox Enhancement Proposals:

|                            OSEP                            |                   Title                    |    Status     | Last Updated |
|:----------------------------------------------------------:|:------------------------------------------:|:-------------:|:------------:|
|       [OSEP-0001](0001-fqdn-based-egress-control.md)       |         FQDN-based Egress Control          |  implemented  |  2026-01-22  |
| [OSEP-0002](0002-kubernetes-sigs-agent-sandbox-support.md) |   kubernetes-sigs/agent-sandbox Support    |  implemented  |  2026-01-23  |
|   [OSEP-0003](0003-volume-and-volumebinding-support.md)    |               Volume Support               | implementing  |  2026-02-11  |
|       [OSEP-0004](0004-secure-container-runtime.md)        | Pluggable Secure Container Runtime Support | implementing  |  2026-02-09  |
|       [OSEP-0005](0005-client-side-sandbox-pool.md)        |          Client-Side Sandbox Pool          | implementing  |  2026-03-09  |
|           [OSEP-0006](0006-developer-console.md)           |  Developer Console for Sandbox Operations  | implementable |  2026-03-06  |
|     [OSEP-0007](0007-fast-sandbox-runtime-support.md)      |        Fast Sandbox Runtime Support        |  provisional  |  2026-02-08  |
|   [OSEP-0008](0008-pause-resume-rootfs-snapshot.md)        |     Pause and Resume via Rootfs Snapshot   |     draft     |  2026-03-13  |
| [OSEP-0009](0009-auto-renew-sandbox-on-ingress-access.md)  |    Auto-Renew Sandbox on Ingress Access    | implementing  |  2026-03-18  |

================================================
FILE: oseps/init-osep.sh
================================================
#!/usr/bin/env bash

# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Helper to bootstrap a new OpenSandbox Enhancement Proposal (OSEP).

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TEMPLATE="$SCRIPT_DIR/osep-template.md.template"

# Valid status values
VALID_STATUSES="draft provisional implementable implementing implemented withdrawn rejected"

usage() {
    cat <<EOF
Usage: $(basename "$0") [OPTIONS] <title>

Create a new OpenSandbox Enhancement Proposal

Arguments:
  title                 Proposal title (will appear in the document header)

Options:
  -s, --status STATUS   Initial status of the proposal (default: draft)
                        Valid: draft, provisional, implementable, implementing,
                               implemented, withdrawn, rejected
  -a, --author AUTHOR   Author(s) to attribute in the new proposal
  -o, --output PATH     Explicit path to write the new proposal
  -h, --help            Show this help message

Examples:
  $(basename "$0") "Network Control"
  $(basename "$0") --status provisional --author "@user" "New Feature"
EOF
}

slugify() {
    local title="$1"
    echo "$title" \
        | tr '[:upper:]' '[:lower:]' \
        | sed -E 's/[^a-z0-9 _-]//g' \
        | sed -E 's/[ _-]+/-/g' \
        | sed -E 's/^-+|-+$//g'
}

default_author() {
    local author
    author=$(git config user.name 2>/dev/null || true)
    if [[ -z "$author" ]]; then
        author=$(git config user.email 2>/dev/null || true)
    fi
    if [[ -z "$author" ]]; then
        author="${USER:-Unknown Author}"
    fi
    echo "$author"
}

next_sequence() {
    local highest=0
    for file in "$SCRIPT_DIR"/[0-9][0-9][0-9][0-9]-*.md; do
        [[ -f "$file" ]] || continue
        local basename
        basename=$(basename "$file")
        local num="${basename%%-*}"
        # Remove leading zeros for arithmetic
        num=$((10#$num))
        if (( num > highest )); then
            highest=$num
        fi
    done
    echo $((highest + 1))
}

validate_status() {
    local status="$1"
    for valid in $VALID_STATUSES; do
        if [[ "$status" == "$valid" ]]; then
            return 0
        fi
    done
    echo "Error: Invalid status '$status'" >&2
    echo "Valid statuses: $VALID_STATUSES" >&2
    exit 1
}

# Parse arguments
TITLE=""
STATUS="draft"
AUTHOR=""
OUTPUT=""

while [[ $# -gt 0 ]]; do
    case "$1" in
        -h|--help)
            usage
            exit 0
            ;;
        -s|--status)
            STATUS=$(printf '%s' "$2" | tr '[:upper:]' '[:lower:]')
            shift 2
            ;;
        -a|--author)
            AUTHOR="$2"
            shift 2
            ;;
        -o|--output)
            OUTPUT="$2"
            shift 2
            ;;
        -*)
            echo "Error: Unknown option $1" >&2
            usage >&2
            exit 1
            ;;
        *)
            if [[ -z "$TITLE" ]]; then
                TITLE="$1"
            else
                echo "Error: Unexpected argument '$1'" >&2
                usage >&2
                exit 1
            fi
            shift
            ;;
    esac
done

# Validate required arguments
if [[ -z "$TITLE" ]]; then
    echo "Error: title is required" >&2
    usage >&2
    exit 1
fi

# Validate status
validate_status "$STATUS"

# Set defaults
if [[ -z "$AUTHOR" ]]; then
    AUTHOR=$(default_author)
fi

DATE=$(date +%Y-%m-%d)
SLUG=$(slugify "$TITLE")

# Determine destination
if [[ -z "$OUTPUT" ]]; then
    SEQ=$(next_sequence)
    PROPOSAL_ID=$(printf "%04d" "$SEQ")
    DESTINATION="$SCRIPT_DIR/${PROPOSAL_ID}-${SLUG}.md"

    # Ensure unique filename
    while [[ -f "$DESTINATION" ]]; do
        SEQ=$((SEQ + 1))
        PROPOSAL_ID=$(printf "%04d" "$SEQ")
        DESTINATION="$SCRIPT_DIR/${PROPOSAL_ID}-${SLUG}.md"
    done
else
    DESTINATION="$OUTPUT"
    PROPOSAL_ID=$(basename "$DESTINATION" | sed -E 's/^([0-9]+)-.*/\1/')
fi

# Check if destination exists
if [[ -f "$DESTINATION" ]]; then
    echo "Refusing to overwrite existing proposal at $DESTINATION" >&2
    exit 1
fi

# Verify template exists
if [[ ! -f "$TEMPLATE" ]]; then
    echo "Error: OSEP template not found at $TEMPLATE" >&2
    exit 1
fi

# Render template using pure bash substitution (avoids sed escaping issues)
content=$(<"$TEMPLATE")
content="${content//\{\{title\}\}/$TITLE}"
content="${content//\{\{author\}\}/$AUTHOR}"
content="${content//\{\{status_metadata\}\}/$STATUS}"
content="${content//\{\{date\}\}/$DATE}"
content="${content//\{\{proposal_id\}\}/$PROPOSAL_ID}"
printf '%s\n' "$content" > "$DESTINATION"

echo "Created $DESTINATION"


================================================
FILE: oseps/osep-template.md.template
================================================
---
title: {{title}}
authors:
  - "{{author}}"
creation-date: {{date}}
last-updated: {{date}}
status: {{status_metadata}}
---

# OSEP-{{proposal_id}}: {{title}}

<!-- toc -->
- [Summary](#summary)
- [Motivation](#motivation)
  - [Goals](#goals)
  - [Non-Goals](#non-goals)
- [Requirements](#requirements)
- [Proposal](#proposal)
  - [Notes/Constraints/Caveats](#notesconstraintscaveats)
  - [Risks and Mitigations](#risks-and-mitigations)
- [Design Details](#design-details)
- [Test Plan](#test-plan)
- [Drawbacks](#drawbacks)
- [Alternatives](#alternatives)
- [Infrastructure Needed](#infrastructure-needed)
- [Upgrade & Migration Strategy](#upgrade--migration-strategy)
<!-- /toc -->

## Summary

<!--
Brief summary of the proposal. Describe the feature/change and why it matters.
Aim for 2-3 sentences that explain the problem and proposed solution.
-->

## Motivation

<!--
Explain why this work matters and the problem it solves.
What is the current pain point? Why should OpenSandbox care?
-->

### Goals

<!--
Specific, measurable objectives. What does success look like?
Examples:
- Reduce deployment time by X%
- Enable feature Y for users
- Improve reliability of Z
-->

### Non-Goals

<!--
Clarify what is intentionally out of scope.
What will NOT be addressed by this proposal?
-->

## Requirements

<!--
List any constraints that must be satisfied.
What are the hard requirements vs nice-to-haves?
-->

## Proposal

<!--
High-level description of the proposed approach.
Focus on what, not how. Avoid implementation details.
Include diagrams or examples if helpful.
-->

### Notes/Constraints/Caveats

<!--
(Optional) Any additional context that reviewers should know up front.
-->

### Risks and Mitigations

<!--
Highlight critical risks and how they will be managed.
Consider: performance, security, compatibility, operational aspects.
-->

## Design Details

<!--
Detailed implementation specifics:
- APIs and interfaces
- Data models and schema changes
- Algorithm or logic flow
- Configuration changes
-->

## Test Plan

<!--
Outline how the change will be verified and tested.
Include: unit tests, integration tests, QA/E2E tests, manual testing.
What scenarios must be covered?
-->

## Drawbacks

<!--
What arguments exist against this direction?
What are the trade-offs?
-->

## Alternatives

<!--
Summarize other approaches that were evaluated.
Why was this proposal chosen over alternatives?
-->

## Infrastructure Needed

<!--
(Optional) List any new tooling, repos, or environments required.
Do we need new services, storage, or third-party dependencies?
-->

## Upgrade & Migration Strategy

<!--
(Optional) Describe the migration path for users/operators if needed.
How will existing setups be upgraded? Are there breaking changes?
-->


================================================
FILE: sandboxes/code-interpreter/Dockerfile
================================================
# syntax=docker/dockerfile:1
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


FROM opensandbox/code-interpreter-base:latest

# Install Python kernels
RUN set -euo pipefail \
    && echo "Setting up ipykernel for Python 3.10, 3.11, 3.12, 3.13, 3.14" \
    && versions=("3.10" "3.11" "3.12" "3.13" "3.14") \
    && for version in "${versions[@]}"; do \
        echo "Setting up ipykernel for Python $version" \
        && . /opt/opensandbox/code-interpreter-env.sh python $version \
        && python3 --version \
        && python3 -m pip install ipykernel jupyter bash_kernel --break-system-packages; \
    done \
    && echo "Setting up ipykernel complete"

# Install Java kernel
RUN set -euo pipefail \
    && echo "Setting up IJava kernel" \
    && curl -L https://github.com/SpencerPark/IJava/releases/download/v1.3.0/ijava-1.3.0.zip -o /tmp/ijava.zip \
    && mkdir -p /opt/ijava \
    && unzip -o /tmp/ijava.zip -d /opt/ijava \
    && rm -rf /tmp/ijava.zip \
    && echo "Setting up IJava kernel done"

# Install tslab for Node.js versions
RUN set -euo pipefail \
    && echo "Setting up tslab kernel" \
    && versions=("18" "20" "22") \
    && for version in "${versions[@]}"; do \
        echo "Setting up tslab for Node $version" \
        && . /opt/opensandbox/code-interpreter-env.sh node $version \
        && node --version \
        && npm --version \
        && npm install -g tslab; \
    done \
    && echo "Setting up tslab kernel done"

# Install Go tooling for gonb
RUN set -euo pipefail \
    && echo "Setting up gonb" \
    && versions=("1.25" "1.24" "1.23") \
    && for version in "${versions[@]}"; do \
        echo "Setting up gonb for Go $version" \
        && . /opt/opensandbox/code-interpreter-env.sh go $version \
        && go version \
        && go install github.com/janpfeifer/gonb@latest \
        && go install golang.org/x/tools/cmd/goimports@latest \
        && go install golang.org/x/tools/gopls@latest; \
    done \
    && echo "Setting up gonb done"

ENV JUPYTER_HOST=http://127.0.0.1:44771 \
    JUPYTER_PORT=44771 \
    JUPYTER_TOKEN=opensandboxcodeinterpreterjupyter \
    PYTHON_VERSION=3.14 \
    NODE_VERSION=22 \
    GO_VERSION=1.25 \
    JAVA_VERSION=21

COPY scripts/code-interpreter.sh /opt/opensandbox/code-interpreter.sh
COPY scripts/jupyter_notebook_config.py /root/.jupyter/
RUN chmod +x /opt/opensandbox/code-interpreter.sh

WORKDIR /workspace
ENTRYPOINT ["/opt/opensandbox/code-interpreter.sh"]


================================================
FILE: sandboxes/code-interpreter/Dockerfile_base
================================================
# syntax=docker/dockerfile:1
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


FROM ubuntu:24.04

ARG TARGETARCH

# Use bash for RUN commands to support source and arrays
SHELL ["/bin/bash", "-c"]

ENV DEBIAN_FRONTEND=noninteractive \
    LANG=C.UTF-8 \
    MAVEN_VERSION=3.9.2 \
    MAVEN_HOME=/opt/maven \
    UV_PYTHON_INSTALL_DIR=/opt/python/versions \
    NODE_ROOT=/opt/node \
    GO_ROOT=/opt/go \
    NODE_V18=18.20.3 \
    NODE_V20=20.14.0 \
    NODE_V22=22.2.0 \
    GO_V1_25=1.25.5 \
    GO_V1_24=1.24.11 \
    GO_V1_23=1.23.12

# 1. Install common tools
RUN apt-get update && apt-get install -y --no-install-recommends \
    ca-certificates curl wget git vim nano unzip zip tar build-essential \
    software-properties-common gnupg lsb-release \
    && rm -rf /var/lib/apt/lists/*

# 2. Install Java (8, 11, 17, 21)
RUN add-apt-repository universe && apt-get update && apt-get install -y --no-install-recommends \
    openjdk-8-jdk openjdk-11-jdk openjdk-17-jdk openjdk-21-jdk \
    && rm -rf /var/lib/apt/lists/*

# 3. Install Maven
RUN mkdir -p ${MAVEN_HOME} && \
    curl -fsSL https://archive.apache.org/dist/maven/maven-3/${MAVEN_VERSION}/binaries/apache-maven-${MAVEN_VERSION}-bin.tar.gz \
    | tar -xzC ${MAVEN_HOME} --strip-components=1 && \
    ln -s ${MAVEN_HOME}/bin/mvn /usr/local/bin/mvn

# 4. Install Python (3.10 - 3.14) using uv
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
    mv /root/.local/bin/uv /usr/local/bin/uv && \
    mv /root/.local/bin/uvx /usr/local/bin/uvx && \
    mkdir -p /opt/python/versions && \
    uv python install 3.10 3.11 3.12 3.13 && \
    (uv python install 3.14 || echo "Python 3.14 skipped")

# 5. Install Node.js (18, 20, 22)
RUN mkdir -p ${NODE_ROOT} && \
    ARCH="" && \
    if [ -z "${TARGETARCH}" ]; then \
      case "$(uname -m)" in \
        x86_64) TARGETARCH="amd64" ;; \
        aarch64) TARGETARCH="arm64" ;; \
        *) echo "Unsupported architecture: $(uname -m)"; exit 1 ;; \
      esac; \
    fi && \
    case "${TARGETARCH}" in \
      "amd64") ARCH="x64" ;; \
      "arm64") ARCH="arm64" ;; \
      *) echo "Unsupported architecture: ${TARGETARCH}"; exit 1 ;; \
    esac && \
    cd ${NODE_ROOT} && \
    for v in ${NODE_V18} ${NODE_V20} ${NODE_V22}; do \
        curl -fsSL https://nodejs.org/dist/v${v}/node-v${v}-linux-${ARCH}.tar.xz | tar -xJ && \
        mv node-v${v}-linux-${ARCH} v${v}; \
    done

# 6. Install Go (latest three major versions)
RUN mkdir -p ${GO_ROOT} && \
    ARCH="" && \
    if [ -z "${TARGETARCH}" ]; then \
      case "$(uname -m)" in \
        x86_64) TARGETARCH="amd64" ;; \
        aarch64) TARGETARCH="arm64" ;; \
        *) echo "Unsupported architecture: $(uname -m)"; exit 1 ;; \
      esac; \
    fi && \
    case "${TARGETARCH}" in \
      "amd64") ARCH="amd64" ;; \
      "arm64") ARCH="arm64" ;; \
      *) echo "Unsupported architecture: ${TARGETARCH}"; exit 1 ;; \
    esac && \
    cd ${GO_ROOT} && \
    for v in ${GO_V1_25} ${GO_V1_24} ${GO_V1_23}; do \
        curl -fsSL https://go.dev/dl/go${v}.linux-${ARCH}.tar.gz | tar -xz && \
        mv go ${v}; \
    done

# 7. Configure defaults & env script
ENV GOROOT=${GO_ROOT}/${GO_V1_25} \
    PATH="${NODE_ROOT}/v${NODE_V22}/bin:${GO_ROOT}/${GO_V1_25}/bin:${PATH}"

RUN mkdir -p /opt/opensandbox
COPY scripts/code-interpreter-env.sh /opt/opensandbox/code-interpreter-env.sh
RUN chmod +x /opt/opensandbox/code-interpreter-env.sh

CMD ["/bin/bash"]


================================================
FILE: sandboxes/code-interpreter/README.md
================================================
# OpenSandbox Code Interpreter Environment

English | [中文](README_zh.md)

This directory contains the Docker build files for the Code Interpreter sandbox. The image is based on `Ubuntu 24.04`
and comes pre-installed with multiple mainstream programming languages and their multi-version environments, designed to
provide an out-of-the-box multi-language code execution environment.

## Features

- **Multi-Language Support**: Pre-installed Python, Java, Node.js, and Go with multiple versions
- **Version Switching**: Easy runtime version switching without rebuilding
- **Jupyter Integration**: Built-in Jupyter Notebook with multi-language kernels
- **Multi-Architecture**: Supports both amd64 and arm64 architectures
- **Production Ready**: Optimized for containerized execution environments

## Supported Languages & Versions

The image comes pre-installed with the following languages and versions:

| Language    | Supported Versions            | Installation Path      | Notes                                    |
|:------------|:------------------------------|:-----------------------|:-----------------------------------------|
| **Python**  | 3.10, 3.11, 3.12, 3.13, 3.14* | `/opt/python/versions` | Installed via `uv`; 3.14 is experimental |
| **Java**    | 8, 11, 17, 21                 | `/usr/lib/jvm`         | OpenJDK; includes Maven 3.9.2            |
| **Node.js** | v18, v20, v22                 | `/opt/node`            | Official Linux binaries                  |
| **Go**      | 1.23, 1.24, 1.25              | `/opt/go`              | Official Linux binaries                  |

*> Note: Version numbers may be updated to the latest patch versions at build time.*

## Quick Start

### 1. Build the Image

Since multi-architecture (amd64/arm64) is supported, it's recommended to use Docker Buildx:

```bash
# Navigate to the directory
cd sandboxes/code-interpreter

# Build local image
docker build -t opensandbox/code-interpreter:latest .

# For multi-architecture builds (requires Docker Buildx)
docker buildx build --platform linux/amd64,linux/arm64 \
  -t opensandbox/code-interpreter:latest .
```

### 2. Run the Container

**With Custom Version Selection:**

```bash
docker run -it --rm \
  -e PYTHON_VERSION=3.11 \
  -e JAVA_VERSION=17 \
  -e NODE_VERSION=20 \
  -e GO_VERSION=1.24 \
  opensandbox/code-interpreter:latest
```

## Version Switching

The image includes a built-in version switching script `/opt/opensandbox/code-interpreter-env.sh`. You need to use the
`source` command to load it to modify the current shell's environment variables.

### Basic Usage

```bash
source /opt/opensandbox/code-interpreter-env.sh <language> <version>
```

### Examples

**Switch Python Version:**

```bash
# Switch to Python 3.11
source /opt/opensandbox/code-interpreter-env.sh python 3.11
python3 --version
# Output: Python 3.11.x
```

**Switch Java Version:**

```bash
# Switch to Java 8
source /opt/opensandbox/code-interpreter-env.sh java 8
java -version
```

**Switch Node.js Version:**

```bash
# Switch to Node 22
source /opt/opensandbox/code-interpreter-env.sh node 22
node -v
```

**Switch Go Version:**

```bash
# Switch to Go 1.25
source /opt/opensandbox/code-interpreter-env.sh go 1.25
go version
```

### List Available Versions

If you don't specify a version number, the script will list all available versions installed in the current image:

```bash
# List all Python versions
source /opt/opensandbox/code-interpreter-env.sh python

# List all Java versions
source /opt/opensandbox/code-interpreter-env.sh java

# List all Node.js versions
source /opt/opensandbox/code-interpreter-env.sh node

# List all Go versions
source /opt/opensandbox/code-interpreter-env.sh go
```

## Default Versions

The default version configuration when the container starts:

- **Python**: 3.14
- **Java**: 21
- **Node.js**: 22
- **Go**: 1.25

To permanently modify the default version at the Dockerfile level, adjust the `ENV PATH` settings at the bottom of the
Dockerfile.

## Jupyter Notebook Integration

### Available Kernels

The image comes with pre-configured Jupyter kernels for all supported languages:

- **Python**: ipykernel for all Python versions
- **Java**: IJava kernel
- **TypeScript/JavaScript**: tslab kernel
- **Go**: gonb kernel
- **Bash**: bash_kernel

### Starting Jupyter

```bash
/opt/opensandbox/code-interpreter.sh
```

### Environment Variables

- `JUPYTER_HOST`: Jupyter server host (default: `http://127.0.0.1:44771`)
- `JUPYTER_PORT`: Jupyter server port (default: `44771`)
- `JUPYTER_TOKEN`: Access token (default: `opensandboxcodeinterpreterjupyter`)

## Advanced Usage

### Persistent Workspace

Mount a local directory to persist your work:

```bash
docker run -it --rm \
  -v $(pwd)/workspace:/workspace \
  opensandbox/code-interpreter:latest
```

### Custom Configuration

Override Jupyter configuration:

```bash
docker run -it --rm \
  -v $(pwd)/jupyter_config.py:/root/.jupyter/jupyter_notebook_config.py \
  opensandbox/code-interpreter:latest
```

### Install Additional Packages

**Python:**

```bash
python3 -m pip install pandas numpy --break-system-packages
```

**Node.js:**

```bash
npm install -g typescript
```

**Go:**

```bash
go install github.com/user/package@latest
```

**Java:**

```bash
mvn install dependency:copy-dependencies
```

## Architecture

```
code-interpreter/
├── Dockerfile                          # Main build file
├── Dockerfile_base                     # Base build file
├── README.md                           # This file
├── README_zh.md                        # Chinese README
└── scripts/
    ├── code-interpreter-env.sh         # Version switching script
    ├── code-interpreter.sh             # Jupyter startup script
    └── jupyter_notebook_config.py      # Jupyter configuration
```

## Troubleshooting

If a specific version is not found, list available versions:

```bash
source /opt/opensandbox/code-interpreter-env.sh <language>
```

## License

This project is part of the OpenSandbox suite. See the main [LICENSE](../../LICENSE) file for details.

## Support

For issues and questions:

- GitHub Issues: [OpenSandbox Issues](https://github.com/alibaba/OpenSandbox/issues)

## Related Projects

- [OpenSandbox](../../) - Main project
- [Server](../../server/) - Server implementation
- [Execd](../../components/execd/) - Runtime execution engine


================================================
FILE: sandboxes/code-interpreter/README_zh.md
================================================
# OpenSandbox Code Interpreter 环境

中文 | [English](README.md)

这个目录包含了 Code Interpreter 沙箱的 Docker 构建文件。该镜像基于 `Ubuntu 24.04`
，并预装了多种主流编程语言及其多版本环境，旨在提供一个开箱即用的多语言代码执行环境。

## 特性

- **多语言支持**：预装 Python、Java、Node.js 和 Go 及其多个版本
- **版本切换**：无需重新构建，支持运行时快速切换版本
- **Jupyter 集成**：内置 Jupyter Notebook 并支持多语言内核
- **多架构支持**：同时支持 amd64 和 arm64 架构
- **生产就绪**：针对容器化执行环境进行了优化

## 支持的语言与版本

镜像内预置了以下语言和版本：

| 语言          | 支持版本                          | 安装路径                   | 备注                     |
|:------------|:------------------------------|:-----------------------|:-----------------------|
| **Python**  | 3.10, 3.11, 3.12, 3.13, 3.14* | `/opt/python/versions` | 使用 `uv` 安装；3.14 为实验性版本 |
| **Java**    | 8, 11, 17, 21                 | `/usr/lib/jvm`         | OpenJDK; 含 Maven 3.9.2 |
| **Node.js** | v18, v20, v22                 | `/opt/node`            | 官方 Linux 二进制包          |
| **Go**      | 1.23, 1.24, 1.25              | `/opt/go`              | 官方 Linux 二进制包          |

*> 注意: 版本号可能会随构建时间更新至小版本的最新版。*

## 快速开始

### 1. 构建镜像

由于支持多架构（amd64/arm64），建议使用 Docker Buildx 构建：

```bash
# 进入目录
cd sandboxes/code-interpreter

# 构建本地镜像
docker build -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:latest .

# 多架构构建（需要 Docker Buildx）
docker buildx build --platform linux/amd64,linux/arm64 \
  -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:latest .
```

### 2. 运行容器

**指定自定义版本：**

```bash
docker run -it --rm \
  -e PYTHON_VERSION=3.11 \
  -e JAVA_VERSION=17 \
  -e NODE_VERSION=20 \
  -e GO_VERSION=1.24 \
  sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:latest
```

## 如何切换版本

镜像内置了一个环境切换脚本 `/opt/opensandbox/code-interpreter-env.sh`，你需要使用 `source` 命令加载它来修改当前 Shell
的环境变量。

### 基本用法

```bash
source /opt/opensandbox/code-interpreter-env.sh <language> <version>
```

### 示例

**切换 Python 版本：**

```bash
# 切换到 Python 3.11
source /opt/opensandbox/code-interpreter-env.sh python 3.11
python3 --version
# Output: Python 3.11.x
```

**切换 Java 版本：**

```bash
# 切换到 Java 8
source /opt/opensandbox/code-interpreter-env.sh java 8
java -version
```

**切换 Node.js 版本：**

```bash
# 切换到 Node 22
source /opt/opensandbox/code-interpreter-env.sh node 22
node -v
```

**切换 Go 版本：**

```bash
# 切换到 Go 1.25
source /opt/opensandbox/code-interpreter-env.sh go 1.25
go version
```

### 查看可用版本

如果不指定版本号，脚本会列出当前镜像内已安装的可用版本：

```bash
# 查看所有 Python 版本
source /opt/opensandbox/code-interpreter-env.sh python

# 查看所有 Java 版本
source /opt/opensandbox/code-interpreter-env.sh java

# 查看所有 Node.js 版本
source /opt/opensandbox/code-interpreter-env.sh node

# 查看所有 Go 版本
source /opt/opensandbox/code-interpreter-env.sh go
```

## 默认版本

容器启动时的默认版本配置如下：

- **Python**: 3.14
- **Java**: 21
- **Node.js**: 22
- **Go**: 1.25

如需在 Dockerfile 层面永久修改默认版本，请调整 Dockerfile 底部的 `ENV PATH` 设置。

## Jupyter Notebook 集成

### 可用内核

镜像预装了所有支持语言的 Jupyter 内核：

- **Python**：所有 Python 版本的 ipykernel
- **Java**：IJava 内核
- **TypeScript/JavaScript**：tslab 内核
- **Go**：gonb 内核
- **Bash**：bash_kernel

### 启动 Jupyter

```bash
/opt/opensandbox/code-interpreter.sh
```

### 环境变量

- `JUPYTER_HOST`：Jupyter 服务器地址（默认：`http://127.0.0.1:44771`）
- `JUPYTER_PORT`：Jupyter 服务器端口（默认：`44771`）
- `JUPYTER_TOKEN`：访问令牌（默认：`opensandboxcodeinterpreterjupyter`）

## 高级用法

### 持久化工作空间

挂载本地目录以持久化您的工作：

```bash
docker run -it --rm \
  -v $(pwd)/workspace:/workspace \
  sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:latest
```

### 自定义配置

覆盖 Jupyter 配置：

```bash
docker run -it --rm \
  -v $(pwd)/jupyter_config.py:/root/.jupyter/jupyter_notebook_config.py \
  sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:latest
```

### 安装额外的包

**Python：**

```bash
python3 -m pip install pandas numpy --break-system-packages
```

**Node.js：**

```bash
npm install -g typescript
```

**Go：**

```bash
go install github.com/user/package@latest
```

**Java：**

```bash
mvn install dependency:copy-dependencies
```

## 架构说明

```
code-interpreter/
├── Dockerfile                          # 镜像Dockerfile
├── Dockerfile_base                     # 基础镜像Dockerfile
├── README.md                           # 英文文档
├── README_zh.md                        # 本文件
└── scripts/
    ├── code-interpreter-env.sh         # 版本切换脚本
    ├── code-interpreter.sh             # Jupyter 启动脚本
    └── jupyter_notebook_config.py      # Jupyter 配置文件
```

## 许可证

此项目是 OpenSandbox 套件的一部分。详情请参阅主 [LICENSE](../../LICENSE) 文件。

## 支持

问题和疑问：

- GitHub Issues: [OpenSandbox Issues](https://github.com/alibaba/OpenSandbox/issues)

## 相关项目

- [OpenSandbox](../../) - 主项目
- [Server](../../server/) - 服务器实现
- [Execd](../../components/execd/) - 运行时执行引擎


================================================
FILE: sandboxes/code-interpreter/build.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

TAG=${TAG:-latest}

docker buildx rm code-interpreter-builder || true

docker buildx create --use --name code-interpreter-builder

docker buildx inspect --bootstrap

docker buildx ls

#docker buildx build -t opensandbox/code-interpreter-base:${TAG} \
#  --platform linux/amd64,linux/arm64 \
#  -f Dockerfile_base \
#  --push \
#  .

docker buildx build \
  -t opensandbox/code-interpreter:${TAG} \
  -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:${TAG} \
  --platform linux/amd64,linux/arm64 \
  --push \
  .


================================================
FILE: sandboxes/code-interpreter/scripts/code-interpreter-env.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script is used to switch versions of different languages in the OpenSandbox environment
# Usage: source /opt/opensandbox/code-interpreter-env.sh <language> <version>
# Examples:
#   source /opt/opensandbox/code-interpreter-env.sh python 3.13
#   source /opt/opensandbox/code-interpreter-env.sh java 21
#   source /opt/opensandbox/code-interpreter-env.sh node 22
#   source /opt/opensandbox/code-interpreter-env.sh go 1.25

function usage() {
	echo "Usage: source code-interpreter-env.sh <language> <version>"
	echo "Supported languages: python, java, node, go"
}

DEFAULT_PY_VERSION=${DEFAULT_PY_VERSION:-3.13}
DEFAULT_JAVA_VERSION=${DEFAULT_JAVA_VERSION:-21}
DEFAULT_NODE_VERSION=${DEFAULT_NODE_VERSION:-22}
DEFAULT_GO_VERSION=${DEFAULT_GO_VERSION:-1.25}

append_env_if_needed() {
	local key=$1
	local value=$2
	if [ -z "${EXECD_ENVS:-}" ]; then
		return
	fi
	# Best-effort: ensure parent dir exists, ignore errors.
	mkdir -p "$(dirname "$EXECD_ENVS")" 2>/dev/null || true
	printf '%s=%s\n' "$key" "$value" >>"$EXECD_ENVS" 2>/dev/null || true
}

function switch_python() {
	local version=$1
	if [ -z "$version" ]; then
		echo "Available Python versions:"
		find /opt/python/versions -maxdepth 1 -name "cpython-*" -type d -printf "%f\n" | cut -d'-' -f2 | sort -V
		return
	fi

	# Find matching version directory
	local target_dir=$(find /opt/python/versions -maxdepth 1 -type d -name "cpython-${version}*" | sort -V | tail -n 1)

	if [ -d "$target_dir" ]; then
		export PATH="$target_dir/bin:$PATH"
		append_env_if_needed PATH "$PATH"
		echo "Switched to Python $(python3 --version)"
	else
		echo "Python version $version not found."
	fi
}

function switch_java() {
	local version=$1
	if [ -z "$version" ]; then
		echo "Available Java versions:"
		ls /usr/lib/jvm/ | grep -E '^java-[0-9]+-openjdk' | cut -d'-' -f2 | sort -V | uniq
		return
	fi

	# Match openjdk path
	local java_home=""
	if [ -d "/usr/lib/jvm/java-${version}-openjdk-amd64" ]; then
		java_home="/usr/lib/jvm/java-${version}-openjdk-amd64"
	elif [ -d "/usr/lib/jvm/java-${version}-openjdk-arm64" ]; then # ARM compatibility
		java_home="/usr/lib/jvm/java-${version}-openjdk-arm64"
	fi

	if [ -n "$java_home" ]; then
		export JAVA_HOME="$java_home"
		export PATH="$JAVA_HOME/bin:$PATH"
		append_env_if_needed JAVA_HOME "$JAVA_HOME"
		append_env_if_needed PATH "$PATH"
		echo "Switched to Java $version ($JAVA_HOME)"
	else
		echo "Java version $version not found."
	fi
}

function switch_node() {
	local version=$1
	if [ -z "$version" ]; then
		echo "Available Node versions:"
		ls /opt/node/
		return
	fi

	# Find matching version (e.g. v18 -> v18.x.x)
	local target_dir=$(find /opt/node -maxdepth 1 -type d -name "v${version}*" | sort -V | tail -n 1)

	if [ -d "$target_dir" ]; then
		export PATH="$target_dir/bin:$PATH"
		append_env_if_needed PATH "$PATH"
		echo "Switched to Node $(node --version)"
	else
		echo "Node version $version not found."
	fi
}

function switch_go() {
	local version=$1
	if [ -z "$version" ]; then
		echo "Available Go versions:"
		ls /opt/go/
		return
	fi

	# Find matching version
	local target_dir=$(find /opt/go -maxdepth 1 -type d -name "${version}*" | sort -V | tail -n 1)

	if [ -d "$target_dir" ]; then
		export GOROOT="$target_dir"
		export PATH="$GOROOT/bin:$PATH"
		append_env_if_needed GOROOT "$GOROOT"
		append_env_if_needed PATH "$PATH"
		echo "Switched to Go $(go version)"
	else
		echo "Go version $version not found."
	fi
}

# Main logic
LANG=$1
VER=$2

if [ -z "$LANG" ]; then
	usage
	return
fi

case $LANG in
python)
	if [ -z "$VER" ]; then
		VER=$DEFAULT_PY_VERSION
	fi
	switch_python $VER
	;;
java)
	if [ -z "$VER" ]; then
		VER=$DEFAULT_JAVA_VERSION
	fi
	switch_java $VER
	;;
node)
	if [ -z "$VER" ]; then
		VER=$DEFAULT_NODE_VERSION
	fi
	switch_node $VER
	;;
go)
	if [ -z "$VER" ]; then
		VER=$DEFAULT_GO_VERSION
	fi
	switch_go $VER
	;;
*)
	echo "Unsupported language: $LANG"
	usage
	;;
esac


================================================
FILE: sandboxes/code-interpreter/scripts/code-interpreter.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

declare -a pids=()
BASHRC_FILE=${BASHRC_FILE:-/root/.bashrc}

record_env_selection() {
	local lang=$1
	local version=$2

	if [ -z "$version" ]; then
		return
	fi

	local tmp_file
	tmp_file=$(mktemp)

	if [ -f "$BASHRC_FILE" ]; then
		grep -vE "^source /opt/opensandbox/code-interpreter-env.sh ${lang}(\\s|$)" "$BASHRC_FILE" >"$tmp_file" || true
	else
		: >"$tmp_file"
	fi

	echo "source /opt/opensandbox/code-interpreter-env.sh ${lang} ${version}" >>"$tmp_file"
	mv "$tmp_file" "$BASHRC_FILE"
}

if [ -n "${PYTHON_VERSION:-}" ]; then
	source /opt/opensandbox/code-interpreter-env.sh python "${PYTHON_VERSION}"
	record_env_selection python "${PYTHON_VERSION}"
else
	source /opt/opensandbox/code-interpreter-env.sh python
fi

if [ -n "${JAVA_VERSION:-}" ]; then
	source /opt/opensandbox/code-interpreter-env.sh java "${JAVA_VERSION}"
	record_env_selection java "${JAVA_VERSION}"
else
	source /opt/opensandbox/code-interpreter-env.sh java
fi

if [ -n "${NODE_VERSION:-}" ]; then
	source /opt/opensandbox/code-interpreter-env.sh node "${NODE_VERSION}"
	record_env_selection node "${NODE_VERSION}"
else
	source /opt/opensandbox/code-interpreter-env.sh node
fi

if [ -n "${GO_VERSION:-}" ]; then
	source /opt/opensandbox/code-interpreter-env.sh go "${GO_VERSION}"
	record_env_selection go "${GO_VERSION}"
else
	source /opt/opensandbox/code-interpreter-env.sh go
fi

setup_python() {
	python --version
	time {
		python3 -m ipykernel install --name python --display-name "Python"
	}
}

setup_java() {
	time {
		python3 /opt/ijava/install.py --sys-prefix
	}
}

# setup node
setup_node() {
	time {
		npm install -g tslab
		tslab install
	}
}

setup_go() {
	time {
		# shellcheck disable=SC2155
		gonb --install
	}
}

setup_bash() {
	time {
		python3 -m bash_kernel.install
	}
}

# export go bin path
export PATH="$(go env GOPATH)/bin:$PATH"
if [ -n "${EXECD_ENVS:-}" ]; then
	mkdir -p "$(dirname "$EXECD_ENVS")" 2>/dev/null || true
	printf 'PATH=%s\n' "$PATH" >>"$EXECD_ENVS" 2>/dev/null || true
fi

setup_python &
pids+=($!)
setup_java &
pids+=($!)
setup_node &
pids+=($!)
setup_go &
pids+=($!)
setup_bash &
pids+=($!)

jupyter notebook --ip=127.0.0.1 --port="${JUPYTER_PORT:-44771}" --allow-root --no-browser --NotebookApp.token="${JUPYTER_TOKEN:-opensandboxcodeinterpreterjupyter}" >/opt/opensandbox/jupyter.log


================================================
FILE: sandboxes/code-interpreter/scripts/jupyter_notebook_config.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This code is based on or derived from Jupyter Notebook
# Copyright (c) 2015 Jupyter Development Team
# Licensed under BSD 3-Clause License
# https://github.com/jupyter/notebook/blob/main/LICENSE

# Configuration file for notebook.

c = get_config()  #noqa


================================================
FILE: scripts/add-license.sh
================================================
#!/bin/bash

# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Add Apache 2.0 license headers to source files that are missing them.
# Usage: run from repo root: ./scripts/add-license.sh

set -euo pipefail

LICENSE_YEAR=$(date +%Y)
LICENSE_OWNER="Alibaba Group Holding Ltd."
LICENSE_MARKER_REGEX="Copyright [0-9]{4} ${LICENSE_OWNER// / }"
LICENSE_TEXT_TEMPLATE=$(
  cat <<'EOF'
Copyright __YEAR__ Alibaba Group Holding Ltd.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
EOF
)

# Basenames to include regardless of extension.
INCLUDE_BASENAMES=("Dockerfile")

# Paths to ignore.
IGNORES=(
  ".git"
  "node_modules"
  ".venv"
  "venv"
  "dist"
  "build"
  "__pycache__"
  "LICENSE"
  "NOTICE"
  "README.md"
  "README_zh.md"
  "scripts/spec-doc/index.html"
)

has_license() {
  local file="$1"
  head -n 40 "$file" | grep -Eq "$LICENSE_MARKER_REGEX"
}

comment_header() {
  local style="$1"
  local text="$2"
  case "$style" in
    "line:#")
      printf '%s\n' "$text" | sed 's/^/# /'
      ;;
    "line://")
      printf '%s\n' "$text" | sed 's:^:// :'
      ;;
    "block:html")
      printf "<!--\n%s\n-->\n" "$text"
      ;;
    "block:css")
      printf "/*\n%s\n*/\n" "$text"
      ;;
    *)
      return 1
      ;;
  esac
}

should_ignore_path() {
  local file="$1"
  for ignore in "${IGNORES[@]}"; do
    if [[ "$file" == "$ignore" || "$file" == "$ignore"/* ]]; then
      return 0
    fi
  done
  return 1
}

is_k8s_mock_go() {
  local file="${1-}"
  [[ -z "$file" ]] && return 1
  # Skip any Go mocks under kubernetes/internal:
  # - filenames ending with _mock.go
  # - any file under a /mock/ directory
  if [[ "$file" != kubernetes/internal/* ]]; then
    return 1
  fi
  if [[ "$file" == *"_mock.go" ]]; then
    return 0
  fi
  if [[ "$file" == */mock/*.go ]]; then
    return 0
  fi
  return 1
}

is_generated_go() {
  local file="${1-}"
  [[ -z "$file" ]] && return 1
  [[ "${file##*.}" != "go" ]] && return 1

  local base
  base="$(basename "$file")"
  case "$base" in
    zz_generated.*|*.pb.go|*_pb.go|*_gen.go|*_generated.go)
      return 0
      ;;
  esac

  if head -n 5 "$file" | grep -qi "code generated"; then
    return 0
  fi

  return 1
}

style_for_file() {
  local file="${1-}"
  [[ -z "$file" ]] && { echo ""; return; }
  local base ext
  base="$(basename "$file")"
  ext="${file##*.}"

  case "$ext" in
    sh|py|toml|tf|sql) echo "line:#"; return ;;
    go|java|kt|kts|ts|tsx|js|jsx|mjs|cjs|mts|cts) echo "line://"; return ;;
    css) echo "block:css"; return ;;
    html) echo "block:html"; return ;;
  esac

  for b in "${INCLUDE_BASENAMES[@]}"; do
    if [[ "$base" == "$b" ]]; then
      echo "line:#"
      return
    fi
  done

  echo ""
}

process_file() {
  local file="${1-}"
  [[ -z "$file" ]] && return
  local style

  if should_ignore_path "$file"; then
    return
  fi

  if is_k8s_mock_go "$file"; then
    return
  fi

  style="$(style_for_file "$file")"
  [[ -z "$style" ]] && return

  if is_generated_go "$file"; then
    return
  fi

  if has_license "$file"; then
    return
  fi

  local license_text header
  license_text="${LICENSE_TEXT_TEMPLATE/__YEAR__/$LICENSE_YEAR}"
  header="$(comment_header "$style" "$license_text")"

  # Respect shebang: insert after the first line if it starts with #!
  if head -n1 "$file" | grep -q "^#!"; then
    local first rest
    first="$(head -n1 "$file")"
    rest="$(tail -n +2 "$file")"
    printf '%s\n\n%s\n\n%s' "$first" "$header" "$rest" >"$file"
  # Place before DOCTYPE for HTML to avoid breaking rendering.
  elif head -n1 "$file" | grep -qi "^<!doctype"; then
    local body
    body="$(cat "$file")"
    printf '%s\n\n%s' "$header" "$body" >"$file"
  else
    local body
    body="$(cat "$file")"
    printf '%s\n\n%s' "$header" "$body" >"$file"
  fi
  echo "Added license: $file"
}

main() {
  local files
  if [[ "$#" -gt 0 ]]; then
    IFS=$'\n' read -r -d '' -a files < <(git ls-files -- "$@" && printf '\0')
  else
    IFS=$'\n' read -r -d '' -a files < <(git ls-files && printf '\0')
  fi
  for f in "${files[@]}"; do
    process_file "$f"
  done
}

main "$@"


================================================
FILE: scripts/bump-component-version.sh
================================================
#!/bin/bash
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Bump egress or execd image version across the entire project.
# Usage: from repo root:
#   ./scripts/bump-component-version.sh egress v1.0.2
#   ./scripts/bump-component-version.sh execd v1.0.7
#   ./scripts/bump-component-version.sh v1.0.2              # same as: egress v1.0.2

set -euo pipefail

REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$REPO_ROOT"

# Parse args: [egress|execd] NEW_VERSION  or  NEW_VERSION (default egress)
COMPONENT=""
NEW_VERSION=""
if [ $# -eq 1 ]; then
  COMPONENT="egress"
  NEW_VERSION="$1"
elif [ $# -eq 2 ]; then
  COMPONENT="$1"
  NEW_VERSION="$2"
else
  echo "Usage: $0 [egress|execd] NEW_VERSION" >&2
  echo "       $0 NEW_VERSION   # bumps egress" >&2
  echo "Example: $0 egress v1.0.2" >&2
  echo "Example: $0 execd 1.0.7" >&2
  exit 1
fi

case "$COMPONENT" in
  egress|execd|code-interpreter) ;;
  *)
    echo "Error: unsupported component: $COMPONENT" >&2
    exit 0
    ;;
esac

# Normalize version: ensure 'v' prefix
if [[ ! "$NEW_VERSION" =~ ^v ]]; then
  NEW_VERSION="v${NEW_VERSION}"
fi

# Pattern and replacement for this component (e.g. egress:vX.Y.Z -> egress:NEW_VERSION)
PATTERN="${COMPONENT}:v[0-9]+\.[0-9]+\.[0-9]+"
REPLACEMENT="${COMPONENT}:${NEW_VERSION}"

files=()
while IFS= read -r f; do
  [ -n "$f" ] && files+=("$f")
done < <(grep -rEl --exclude-dir=.git --exclude-dir=__pycache__ --exclude-dir=.venv --exclude-dir=node_modules "$PATTERN" . 2>/dev/null || true)

updated=0
for f in "${files[@]}"; do
  [ -f "$f" ] || continue
  if perl -i -pe "s/$PATTERN/$REPLACEMENT/g" "$f" 2>/dev/null; then
    echo "Updated $f"
    ((updated++)) || true
  fi
done

if [ "$updated" -eq 0 ]; then
  echo "No files were updated (no matches for $PATTERN)." >&2
  exit 1
fi

echo "Done. Bumped $COMPONENT version to $NEW_VERSION in $updated file(s)."


================================================
FILE: scripts/csharp-e2e.sh
================================================
#!/bin/bash
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -euxo pipefail

TAG=${TAG:-latest}

REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"

# build execd image locally (context must include internal/)
docker build -f components/execd/Dockerfile -t opensandbox/execd:local "${REPO_ROOT}"

# prepare required images from registry
docker pull opensandbox/code-interpreter:${TAG}
echo "-------- Eval test images --------"
docker images

# prepare hostpath volume for e2e test
mkdir -p /tmp/opensandbox-e2e/host-volume-test
mkdir -p /tmp/opensandbox-e2e/logs
echo "opensandbox-e2e-marker" > /tmp/opensandbox-e2e/host-volume-test/marker.txt
chmod -R 755 /tmp/opensandbox-e2e

# prepare Docker named volume for pvc e2e test
docker volume rm opensandbox-e2e-pvc-test 2>/dev/null || true
docker volume create opensandbox-e2e-pvc-test
docker run --rm -v opensandbox-e2e-pvc-test:/data alpine sh -c "\
  echo 'pvc-marker-data' > /data/marker.txt && \
  mkdir -p /data/datasets/train && \
  echo 'pvc-subpath-marker' > /data/datasets/train/marker.txt"
echo "-------- CSHARP E2E test logs for execd --------" > /tmp/opensandbox-e2e/logs/execd.log

# setup server
cd server
: > server.log
(uv sync && uv run python -m src.main) > server.log 2>&1 &
cd ..

# wait for server
sleep 10

# test env for C# fixture
export OPENSANDBOX_TEST_DOMAIN="localhost:8080"
export OPENSANDBOX_TEST_PROTOCOL="http"
export OPENSANDBOX_TEST_API_KEY=""
export OPENSANDBOX_SANDBOX_DEFAULT_IMAGE="opensandbox/code-interpreter:${TAG}"

mkdir -p tests/csharp/build/test-results
dotnet restore "tests/csharp/OpenSandbox.E2ETests/OpenSandbox.E2ETests.csproj"
dotnet test "tests/csharp/OpenSandbox.E2ETests/OpenSandbox.E2ETests.csproj" \
  --configuration Release \
  --no-restore \
  --results-directory "tests/csharp/build/test-results" \
  --logger "trx;LogFileName=csharp-e2e.trx"


================================================
FILE: scripts/java-e2e.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -euxo pipefail

TAG=${TAG:-latest}

REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"

# build execd image locally (context must include internal/)
docker build -f components/execd/Dockerfile -t opensandbox/execd:local "${REPO_ROOT}"

# prepare required images from registry
docker pull opensandbox/code-interpreter:${TAG}
echo "-------- Eval test images --------"
docker images

# prepare hostpath volume for e2e test
mkdir -p /tmp/opensandbox-e2e/host-volume-test
mkdir -p /tmp/opensandbox-e2e/logs
echo "opensandbox-e2e-marker" > /tmp/opensandbox-e2e/host-volume-test/marker.txt
chmod -R 755 /tmp/opensandbox-e2e

# prepare Docker named volume for pvc e2e test
docker volume rm opensandbox-e2e-pvc-test 2>/dev/null || true
docker volume create opensandbox-e2e-pvc-test
# seed the named volume with a marker file and subpath test data via a temporary container
docker run --rm -v opensandbox-e2e-pvc-test:/data alpine sh -c "\
  echo 'pvc-marker-data' > /data/marker.txt && \
  mkdir -p /data/datasets/train && \
  echo 'pvc-subpath-marker' > /data/datasets/train/marker.txt"
echo "-------- JAVA E2E test logs for execd --------" > /tmp/opensandbox-e2e/logs/execd.log

# setup server
cd server
uv sync && uv run python -m src.main > server.log 2>&1 &
cd ..

# wait for server
sleep 10

cd sdks/sandbox/kotlin
./gradlew clean publishToMavenLocal --no-build-cache
cd ../../../

cd sdks/code-interpreter/kotlin
./gradlew clean publishToMavenLocal --no-build-cache -PuseMavenLocal
cd ../../../

# run Java e2e
cd tests/java
./gradlew test


================================================
FILE: scripts/javascript-e2e.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -euxo pipefail

TAG=${TAG:-latest}

REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"

# build execd image locally (context must include internal/)
docker build -f components/execd/Dockerfile -t opensandbox/execd:local "${REPO_ROOT}"

# prepare required images from registry
docker pull opensandbox/code-interpreter:${TAG}
echo "-------- Eval test images --------"
docker images

# prepare hostpath volume for e2e test
mkdir -p /tmp/opensandbox-e2e/host-volume-test
mkdir -p /tmp/opensandbox-e2e/logs
echo "opensandbox-e2e-marker" > /tmp/opensandbox-e2e/host-volume-test/marker.txt
chmod -R 755 /tmp/opensandbox-e2e

# prepare Docker named volume for pvc e2e test
docker volume rm opensandbox-e2e-pvc-test 2>/dev/null || true
docker volume create opensandbox-e2e-pvc-test
# seed the named volume with a marker file and subpath test data via a temporary container
docker run --rm -v opensandbox-e2e-pvc-test:/data alpine sh -c "\
  echo 'pvc-marker-data' > /data/marker.txt && \
  mkdir -p /data/datasets/train && \
  echo 'pvc-subpath-marker' > /data/datasets/train/marker.txt"
echo "-------- JAVASCRIPT E2E test logs for execd --------" > /tmp/opensandbox-e2e/logs/execd.log

# setup server
cd server
uv sync && uv run python -m src.main > server.log 2>&1 &
cd ..

# wait for server
sleep 10

# run JavaScript/TypeScript e2e (SDK builds are handled by the test script)
cd tests/javascript

# Pin pnpm via corepack (repo expects pnpm@9.x)
corepack enable
corepack prepare pnpm@9.15.0 --activate

pnpm install

# Ensure SDK workspace deps exist before running build steps (CI does not have prebuilt node_modules).
pnpm -C ../../sdks install --frozen-lockfile

# Align with other E2E jobs: local server does not require API key by default.
# Ensure tests do not send an auth header.
export OPENSANDBOX_TEST_API_KEY=""
export OPENSANDBOX_SANDBOX_DEFAULT_IMAGE="opensandbox/code-interpreter:${TAG}"

pnpm test:ci


================================================
FILE: scripts/python-e2e.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script verifies that required files contain the Apache 2.0 license header.
# It scans tracked source files and fails with a list of violations if any header
# is missing.

set -euxo pipefail

TAG=${TAG:-latest}

REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"

# build execd image locally (context must include internal/)
docker build -f components/execd/Dockerfile -t opensandbox/execd:local "${REPO_ROOT}"

# prepare required images from registry
docker pull opensandbox/code-interpreter:${TAG}
echo "-------- Eval test images --------"
docker images

# prepare hostpath volume for e2e test
mkdir -p /tmp/opensandbox-e2e/host-volume-test
mkdir -p /tmp/opensandbox-e2e/logs
echo "opensandbox-e2e-marker" > /tmp/opensandbox-e2e/host-volume-test/marker.txt
chmod -R 755 /tmp/opensandbox-e2e

# prepare Docker named volume for pvc e2e test
docker volume rm opensandbox-e2e-pvc-test 2>/dev/null || true
docker volume create opensandbox-e2e-pvc-test
# seed the named volume with a marker file and subpath test data via a temporary container
docker run --rm -v opensandbox-e2e-pvc-test:/data alpine sh -c "\
  echo 'pvc-marker-data' > /data/marker.txt && \
  mkdir -p /data/datasets/train && \
  echo 'pvc-subpath-marker' > /data/datasets/train/marker.txt"
echo "-------- PYTHON E2E test logs for execd --------" > /tmp/opensandbox-e2e/logs/execd.log

# setup server
cd server
uv sync && uv run python -m src.main > server.log 2>&1 &
cd ..

# wait for server
sleep 10

# build local api
cd sdks/sandbox/python && make generate-api
cd ../../..

# run real python e2e
cd tests/python
uv sync --all-extras --refresh && make test


================================================
FILE: scripts/spec-doc/generate-spec.js
================================================
#!/usr/bin/env node
/**
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


/**
 * Generate spec-inline.js from sandbox-lifecycle.yml
 *
 * Usage:
 *   node scripts/spec-doc/generate-spec.js
 *   node scripts/spec-doc/generate-spec.js --output docs/public/api/spec-inline.js
 *
 * This script:
 * 1. Reads specs/sandbox-lifecycle.yml
 * 2. Escapes backticks
 * 3. Wraps in JavaScript template literal
 * 4. Writes to docs/public/api/spec-inline.js (by default)
 */

const fs = require('fs');
const path = require('path');

// Find project root
function findProjectRoot() {
  let dir = __dirname;
  while (dir !== path.dirname(dir)) {
    if (fs.existsSync(path.join(dir, 'specs', 'sandbox-lifecycle.yml'))) {
      return dir;
    }
    dir = path.dirname(dir);
  }
  throw new Error('Could not find project root (sandbox-lifecycle.yml not found)');
}

function parseOutputPathArg(projectRoot) {
  const outputFlagIndex = process.argv.indexOf('--output');
  if (outputFlagIndex === -1) {
    return path.join(projectRoot, 'docs', 'public', 'api', 'spec-inline.js');
  }
  const outputValue = process.argv[outputFlagIndex + 1];
  if (!outputValue) {
    throw new Error('Missing value for --output');
  }
  if (path.isAbsolute(outputValue)) {
    return outputValue;
  }
  return path.join(projectRoot, outputValue);
}

function main() {
  try {
    const projectRoot = findProjectRoot();
    const yamlPath = path.join(projectRoot, 'specs', 'sandbox-lifecycle.yml');
    const outputPath = parseOutputPathArg(projectRoot);

    // Validate input file exists
    if (!fs.existsSync(yamlPath)) {
      throw new Error(`YAML file not found: ${yamlPath}`);
    }

    console.log('Generating spec-inline.js...');
    console.log(`   Input:  ${yamlPath}`);
    console.log(`   Output: ${outputPath}`);
    fs.mkdirSync(path.dirname(outputPath), { recursive: true });


    // Read YAML
    const yamlContent = fs.readFileSync(yamlPath, 'utf-8');
    const yamlSize = Math.round(yamlContent.length / 1024);

    // Escape backticks for template literal
    const escapedYaml = yamlContent.replace(/`/g, '\\`');

    // Generate JavaScript
    const jsContent = `const OPENAPI_SPEC_YAML = \`${escapedYaml}\`;`;
    const jsSize = Math.round(jsContent.length / 1024);

    // Write output
    fs.writeFileSync(outputPath, jsContent, 'utf-8');

    console.log('\nSuccessfully generated spec-inline.js');
    console.log(`   YAML size: ${yamlSize} KB`);
    console.log(`   JS size:   ${jsSize} KB`);
    console.log(`   Compression ratio: ${((jsSize / yamlSize) * 100).toFixed(1)}%`);

    // Verify
    const generated = fs.readFileSync(outputPath, 'utf-8');
    if (generated.startsWith('const OPENAPI_SPEC_YAML = `')) {
      console.log('\nFile validated successfully');
      process.exit(0);
    } else {
      throw new Error('Generated file validation failed');
    }
  } catch (error) {
    console.error(`\nError: ${error.message}`);
    console.error(error.stack);
    process.exit(1);
  }
}

// Run
main();


================================================
FILE: scripts/spec-doc/index.html
================================================
<!DOCTYPE html>
<!--
 Copyright 2025 Alibaba Group Holding Ltd.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->

<html>
<head>
  <title>OpenSandbox Lifecycle API Documentation</title>
  <meta charset="utf-8"/>
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <link href="https://fonts.googleapis.com/css?family=Montserrat:300,400,700|Roboto:300,400,700" rel="stylesheet">
  <style>
    body {
      margin: 0;
      padding: 0;
      font-family: 'Roboto', sans-serif;
    }
  </style>
</head>
<body>
  <div id="redoc-container"></div>

  <!-- Load js-yaml first for YAML parsing -->
  <script src="https://cdn.jsdelivr.net/npm/js-yaml@4/dist/js-yaml.min.js"></script>

  <!-- Load ReDoc (stable version) -->
  <script src="https://cdn.jsdelivr.net/npm/redoc@latest/bundles/redoc.standalone.js"></script>

  <!-- Load generated inline spec -->
  <script src="../../docs/public/api/spec-inline.js"></script>

  <!-- Initialize ReDoc after all libraries are loaded -->
  <script>
    console.log('📄 OpenSandbox API Documentation - Initializing');

    window.addEventListener('load', function() {
      console.log('🔍 Page fully loaded, checking dependencies...');
      console.log('   - Redoc available?', typeof Redoc !== 'undefined');
      console.log('   - jsyaml available?', typeof jsyaml !== 'undefined');
      console.log('   - OPENAPI_SPEC_YAML available?', typeof OPENAPI_SPEC_YAML !== 'undefined');

      // Check if Redoc is available
      if (typeof Redoc === 'undefined') {
        console.error('❌ Redoc library failed to load');
        document.getElementById('redoc-container').innerHTML =
          '<div style="padding: 20px; color: red;"><strong>❌ Error:</strong> Redoc library failed to load.</div>';
        return;
      }

      // Check if js-yaml is available
      if (typeof jsyaml === 'undefined') {
        console.error('❌ js-yaml library failed to load');
        document.getElementById('redoc-container').innerHTML =
          '<div style="padding: 20px; color: red;"><strong>❌ Error:</strong> js-yaml library failed to load.</div>';
        return;
      }

      // Check if spec is available
      if (typeof OPENAPI_SPEC_YAML === 'undefined') {
        console.error('❌ API specification not loaded');
        document.getElementById('redoc-container').innerHTML =
          '<div style="padding: 20px; color: red;"><strong>❌ Error:</strong> API specification not loaded. Make sure spec-inline.js exists.</div>';
        return;
      }

      try {
        console.log('📝 Parsing OpenAPI YAML spec...');
        const spec = jsyaml.load(OPENAPI_SPEC_YAML);
        console.log('✅ YAML parsed successfully');

        // Log spec info
        const pathCount = Object.keys(spec.paths || {}).length;
        console.log(`📊 Spec contains ${pathCount} paths`);

        // Check for endpoints/{port}
        if (spec.paths && spec.paths['/sandboxes/{sandboxId}/endpoints/{port}']) {
          console.log('✨ Found /sandboxes/{sandboxId}/endpoints/{port} route');
        } else {
          console.warn('⚠️  /sandboxes/{sandboxId}/endpoints/{port} route not found in spec');
        }

        console.log('🎨 Initializing Redoc...');
        Redoc.init(spec, {
          scrollYOffset: 50,
          theme: {
            colors: {
              primary: {
                main: '#1f2937'
              }
            }
          }
        }, document.getElementById('redoc-container'));

        console.log('✅ Redoc initialized successfully!');
      } catch (error) {
        console.error('❌ Error during initialization:', error);
        document.getElementById('redoc-container').innerHTML =
          '<div style="padding: 20px; color: red;">' +
          '<strong>❌ Error:</strong> Failed to parse OpenAPI specification.<br/>' +
          '<code>' + error.message + '</code></div>';
      }
    });
  </script>
</body>
</html>


================================================
FILE: scripts/verify-license.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script verifies that required files contain the Apache 2.0 license header.
# It scans tracked source files and fails with a list of violations if any header
# is missing.

set -euo pipefail

REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
CURRENT_YEAR="$(date +%Y)"
MIN_YEAR="2025"
LICENSE_OWNER="Alibaba Group Holding Ltd."
LICENSE_REGEX="Copyright [0-9]{4} ${LICENSE_OWNER// / }"

# File extensions that are expected to carry a license header.
LICENSE_EXTS=(
  go py sh kt kts java ts tsx js jsx toml html css sql tf
)

# Explicit file basenames that should also be checked (e.g., Dockerfile)
LICENSE_BASENAMES=(
  Dockerfile
)

# Paths to ignore entirely.
IGNORED_PATHS=(
  "LICENSE"
  "NOTICE"
  "docs/"
  "scripts/spec-doc/index.html" # Generated doc
)

is_k8s_mock_go() {
  local file="${1-}"
  [[ -z "$file" ]] && return 1
  # Skip any Go mocks under kubernetes/internal:
  # - filenames ending with _mock.go
  # - any file under a /mock/ directory
  if [[ "$file" != kubernetes/internal/* ]]; then
    return 1
  fi
  if [[ "$file" == *"_mock.go" ]]; then
    return 0
  fi
  if [[ "$file" == */mock/*.go ]]; then
    return 0
  fi
  return 1
}

is_generated_to_skip() {
  local file="$1"
  # Skip common generated files
  if [[ "$file" == *"deepcopy.go" ]]; then
    return 0
  fi
  return 1
}

cd "$REPO_ROOT"

is_ignored() {
  local file="$1"
  for ignore in "${IGNORED_PATHS[@]}"; do
    if [[ "$ignore" == */ ]]; then
      if [[ "$file" == "$ignore"* ]]; then
        return 0
      fi
    elif [[ "$file" == "$ignore" ]]; then
      return 0
    fi
  done
  return 1
}

has_expected_extension() {
  local file="$1"
  local ext="${file##*.}"
  for candidate in "${LICENSE_EXTS[@]}"; do
    if [[ "$ext" == "$candidate" ]]; then
      return 0
    fi
  done
  return 1
}

has_expected_basename() {
  local file="$1"
  local base
  base="$(basename "$file")"
  for candidate in "${LICENSE_BASENAMES[@]}"; do
    if [[ "$base" == "$candidate" ]]; then
      return 0
    fi
  done
  return 1
}

missing=()

while IFS= read -r file; do
  # Skip ignored paths
  if is_ignored "$file"; then
    continue
  fi
  # Skip kubernetes internal mock go files
  if is_k8s_mock_go "$file"; then
    continue
  fi
  # Skip generated files
  if is_generated_to_skip "$file"; then
    continue
  fi

  # Only check files with expected extensions or basenames
  if ! has_expected_extension "$file" && ! has_expected_basename "$file"; then
    continue
  fi

  # Limit scan to the first 25 lines to allow shebangs/DOCTYPE above the header.
  header="$(head -n 25 "$file")"
  if ! echo "$header" | grep -Eq "$LICENSE_REGEX"; then
    missing+=("$file")
    continue
  fi
  found_year="$(echo "$header" | grep -Eo "$LICENSE_REGEX" | head -n1 | grep -Eo '[0-9]{4}')"
  if [[ -z "$found_year" || "$found_year" -gt "$CURRENT_YEAR" || "$found_year" -lt "$MIN_YEAR" ]]; then
    missing+=("$file")
  fi
done < <(git -C "$REPO_ROOT" ls-files)

if ((${#missing[@]} > 0)); then
  echo "Missing license header in the following files:"
  printf ' - %s\n' "${missing[@]}"
  exit 1
fi

echo "License headers verified."


================================================
FILE: sdks/Directory.Build.props
================================================
<Project>
  <PropertyGroup>
    <!-- Shared C# SDK packaging metadata -->
    <OpenSandboxPackageVersion>0.1.0</OpenSandboxPackageVersion>
    <OpenSandboxCodeInterpreterPackageVersion>0.1.0</OpenSandboxCodeInterpreterPackageVersion>
    <OpenSandboxDependencyVersionRange>[$(OpenSandboxPackageVersion),0.2.0)</OpenSandboxDependencyVersionRange>
  </PropertyGroup>
</Project>


================================================
FILE: sdks/code-interpreter/csharp/OpenSandbox.CodeInterpreter.sln
================================================

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OpenSandbox.CodeInterpreter", "src\OpenSandbox.CodeInterpreter\OpenSandbox.CodeInterpreter.csproj", "{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OpenSandbox.CodeInterpreter.Tests", "tests\OpenSandbox.CodeInterpreter.Tests\OpenSandbox.CodeInterpreter.Tests.csproj", "{B2C3D4E5-F6A7-8901-BCDE-F12345678901}"
EndProject
Global
	GlobalSection(SolutionConfigurationPlatforms) = preSolution
		Debug|Any CPU = Debug|Any CPU
		Release|Any CPU = Release|Any CPU
	EndGlobalSection
	GlobalSection(ProjectConfigurationPlatforms) = postSolution
		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|Any CPU.Build.0 = Release|Any CPU
		{B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Release|Any CPU.Build.0 = Release|Any CPU
	EndGlobalSection
EndGlobal


================================================
FILE: sdks/code-interpreter/csharp/README.md
================================================
# OpenSandbox Code Interpreter SDK for C#

English | [中文](README_zh.md)

A C# SDK for code interpretation with OpenSandbox. Provides high-level APIs for executing code in multiple languages (Python, JavaScript, TypeScript, Go, Java, Bash) within secure sandbox environments.

## Prerequisites

This SDK requires a Docker image containing the Code Interpreter runtime environment. You must use
`opensandbox/code-interpreter` (or a derivative image) with pre-installed runtimes for Python, Java, Go,
Node.js, and others.

For supported languages and versions, see the
[Environment Documentation](../../../sandboxes/code-interpreter/README.md).

## Installation

```bash
dotnet add package Alibaba.OpenSandbox.CodeInterpreter
```

## Quick Start

```csharp
using OpenSandbox;
using OpenSandbox.CodeInterpreter;
using OpenSandbox.CodeInterpreter.Models;
using OpenSandbox.Config;
using OpenSandbox.Core;

var config = new ConnectionConfig(new ConnectionConfigOptions
{
    Domain = "api.opensandbox.io",
    ApiKey = "your-api-key"
});

try
{
    // Create sandbox with code-interpreter runtime image and entrypoint.
    await using var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
    {
        ConnectionConfig = config,
        Image = "opensandbox/code-interpreter:v1.0.2",
        Entrypoint = new[] { "/opt/opensandbox/code-interpreter.sh" },
        Env = new Dictionary<string, string>
        {
            ["PYTHON_VERSION"] = "3.11",
            ["JAVA_VERSION"] = "17",
            ["NODE_VERSION"] = "20",
            ["GO_VERSION"] = "1.24"
        },
        TimeoutSeconds = 15 * 60
    });

    var interpreter = await CodeInterpreter.CreateAsync(sandbox);
    var execution = await interpreter.Codes.RunAsync(
        "print('Hello, World!')",
        new RunCodeOptions { Language = SupportedLanguage.Python });

    foreach (var msg in execution.Logs.Stdout)
    {
        Console.Write(msg.Text);
    }

    await sandbox.KillAsync();
}
catch (SandboxException ex)
{
    Console.Error.WriteLine($"Sandbox Error: [{ex.Error.Code}] {ex.Error.Message}");
}
```

## Logging (ILogger)

The SDK uses `Microsoft.Extensions.Logging` abstractions. Pass your own `ILoggerFactory`
through diagnostics options when creating the sandbox/code interpreter:

```csharp
using Microsoft.Extensions.Logging;
using OpenSandbox.Config;

using var loggerFactory = LoggerFactory.Create(builder =>
{
    builder.SetMinimumLevel(LogLevel.Debug);
    builder.AddConsole();
});

await using var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
{
    ConnectionConfig = new ConnectionConfig(),
    Image = "opensandbox/code-interpreter:v1.0.2",
    Entrypoint = new[] { "/opt/opensandbox/code-interpreter.sh" },
    Diagnostics = new SdkDiagnosticsOptions
    {
        LoggerFactory = loggerFactory
    }
});

var interpreter = await CodeInterpreter.CreateAsync(sandbox, new CodeInterpreterCreateOptions
{
    Diagnostics = new SdkDiagnosticsOptions
    {
        LoggerFactory = loggerFactory
    }
});
```

## Runtime Configuration

### Docker Image

The Code Interpreter SDK relies on a specialized runtime image. Ensure your sandbox provider has
`opensandbox/code-interpreter` available.

### Language Version Selection

You can specify language versions through environment variables when creating the sandbox:

| Language | Environment Variable | Example Value | Default (if unset) |
| --- | --- | --- | --- |
| Python | `PYTHON_VERSION` | `3.11` | Image default |
| Java | `JAVA_VERSION` | `17` | Image default |
| Node.js | `NODE_VERSION` | `20` | Image default |
| Go | `GO_VERSION` | `1.24` | Image default |

## Features

### Run with `Language` (default language context)

If you do not need explicit context IDs, run code by setting only `Language`.
When `Context` is omitted, execd creates/reuses a default session for that language, so state can persist across runs.

```csharp
await interpreter.Codes.RunAsync(
    "x = 42",
    new RunCodeOptions { Language = SupportedLanguage.Python });

var execution = await interpreter.Codes.RunAsync(
    "result = x\nresult",
    new RunCodeOptions { Language = SupportedLanguage.Python });

Console.WriteLine(execution.Results.FirstOrDefault()?.Text); // "42"
```

### Supported Languages

- Python (`SupportedLanguage.Python`)
- JavaScript (`SupportedLanguage.JavaScript`)
- TypeScript (`SupportedLanguage.TypeScript`)
- Go (`SupportedLanguage.Go`)
- Java (`SupportedLanguage.Java`)
- Bash (`SupportedLanguage.Bash`)

### Context Management

Contexts allow you to maintain state between code executions:

```csharp
// Create a context for Python
var context = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Python);

// Run code in the context - variables persist
await interpreter.Codes.RunAsync("x = 42", new RunCodeOptions { Context = context });
var result = await interpreter.Codes.RunAsync("print(x)", new RunCodeOptions { Context = context });
// Output: 42

// List contexts for a specific language
var pythonContexts = await interpreter.Codes.ListContextsAsync(SupportedLanguage.Python);

// Delete a specific context
await interpreter.Codes.DeleteContextAsync(context.Id!);

// Delete all contexts for a language
await interpreter.Codes.DeleteContextsAsync(SupportedLanguage.Python);
```

### Streaming Execution

For real-time output, use streaming:

```csharp
var request = new RunCodeRequest
{
    Code = "for i in range(5): print(i)",
    Context = new CodeContext { Language = SupportedLanguage.Python }
};

await foreach (var ev in interpreter.Codes.RunStreamAsync(request))
{
    switch (ev.Type)
    {
        case "stdout":
            Console.Write(ev.Text);
            break;
        case "stderr":
            Console.Error.Write(ev.Text);
            break;
        case "result":
            var text = ev.Results != null
                && ev.Results.TryGetValue("text/plain", out var value)
                ? value?.ToString()
                : null;
            Console.WriteLine($"Result: {text ?? "(no text/plain)"}");
            break;
        case "error":
            Console.WriteLine($"Error: {ev.Error}");
            break;
    }
}
```

### Event Handlers

Use handlers for fine-grained control over execution events:

```csharp
var execution = await interpreter.Codes.RunAsync(
    "print('Hello')\nprint('World')",
    new RunCodeOptions
    {
        Language = SupportedLanguage.Python,
        Handlers = new ExecutionHandlers
        {
            OnStdout = async msg => Console.Write($"[OUT] {msg.Text}"),
            OnStderr = async msg => Console.Error.Write($"[ERR] {msg.Text}"),
            OnResult = async result => Console.WriteLine($"[RESULT] {result.Text}"),
            OnError = async error => Console.WriteLine($"[ERROR] {error.Name}: {error.Value}"),
            OnExecutionComplete = async complete => Console.WriteLine($"[DONE] Took {complete.ExecutionTimeMs}ms")
        }
    });
```

### Interrupt Execution

Stop a running code execution:

```csharp
var context = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Python);

// Start a long-running task
var task = interpreter.Codes.RunAsync(
    "import time\nwhile True: time.sleep(1)",
    new RunCodeOptions { Context = context });

// Interrupt after some time
await Task.Delay(2000);
await interpreter.Codes.InterruptAsync(context.Id!);
```

### Access Sandbox Services

The code interpreter provides convenient access to underlying sandbox services:

```csharp
// File operations
await interpreter.Files.WriteFilesAsync(new[]
{
    new WriteEntry { Path = "/tmp/data.txt", Data = "Hello, World!" }
});
var content = await interpreter.Files.ReadFileAsync("/tmp/data.txt");

// Shell commands
var commandExecution = await interpreter.Commands.RunAsync("ls -la /tmp");
foreach (var msg in commandExecution.Logs.Stdout)
{
    Console.Write(msg.Text);
}

// Metrics
var metrics = await interpreter.Sandbox.GetMetricsAsync();
Console.WriteLine($"CPU: {metrics.CpuUsedPercentage}%, Memory: {metrics.MemoryUsedMiB}MiB");
```

## API Reference

### CodeInterpreter

| Method | Description |
|--------|-------------|
| `CreateAsync(sandbox, options?)` | Creates a code interpreter from a sandbox |

| Property | Description |
|----------|-------------|
| `Sandbox` | The underlying sandbox instance |
| `Codes` | The codes service for code execution |
| `Id` | The sandbox ID |
| `Files` | File system operations |
| `Commands` | Shell command execution |
| `Metrics` | Resource metrics |

### ICodes

| Method | Description |
|--------|-------------|
| `CreateContextAsync(language)` | Creates a new execution context |
| `GetContextAsync(contextId)` | Gets an existing context |
| `ListContextsAsync(language)` | Lists contexts for a specific language |
| `DeleteContextAsync(contextId)` | Deletes a specific context |
| `DeleteContextsAsync(language)` | Deletes all contexts for a language |
| `RunAsync(code, options?)` | Executes code and returns the result |
| `RunStreamAsync(request)` | Executes code with streaming output |
| `InterruptAsync(contextId)` | Interrupts a running execution |

> All async methods support `CancellationToken`.

## Requirements

- .NET Standard 2.0+ / .NET 6.0+
- OpenSandbox Sandbox SDK (`Alibaba.OpenSandbox`)

## Notes

- **Lifecycle**: `CodeInterpreter` wraps an existing `Sandbox` and reuses its connection and services.
- **Default context behavior**: `RunAsync(..., new RunCodeOptions { Language = ... })` uses the language default context.
- **Cleanup**: `DisposeAsync` only cleans local resources. Call `KillAsync()` to terminate the remote sandbox instance.

## License

Apache License 2.0


================================================
FILE: sdks/code-interpreter/csharp/README_zh.md
================================================
# Alibaba Code Interpreter SDK for C#

[English](README.md) | 中文

一个用于在安全隔离沙箱中执行代码的 C# SDK。它提供了高级 API，用于安全地运行 Python、Java、Go、TypeScript 等语言，并支持代码执行上下文管理。

## 前置条件

此 SDK 需要包含 Code Interpreter 运行时环境的 Docker 镜像。您必须使用 `opensandbox/code-interpreter` 镜像（或其衍生版本），该镜像预装了 Python、Java、Go、Node.js 等运行时。

有关支持的语言和版本的详细信息，请参阅[环境文档](../../../sandboxes/code-interpreter/README.md)。

## 安装

### NuGet

```bash
dotnet add package Alibaba.OpenSandbox.CodeInterpreter
```

### PackageReference

```xml
<PackageReference Include="Alibaba.OpenSandbox.CodeInterpreter" Version="0.1.0" />
```

## 快速开始

以下示例演示如何创建具有特定运行时配置的沙箱并执行简单脚本。

> **注意**：运行此示例之前，请确保 OpenSandbox 服务正在运行。有关启动说明，请参阅根目录的 [README.md](../../../README.md)。

```csharp
using OpenSandbox;
using OpenSandbox.CodeInterpreter;
using OpenSandbox.CodeInterpreter.Models;
using OpenSandbox.Config;

// 1. 配置连接
var config = new ConnectionConfig(new ConnectionConfigOptions
{
    Domain = "api.opensandbox.io",
    ApiKey = "your-api-key"
});

// 2. 创建带有 code-interpreter 镜像和运行时版本的 Sandbox
await using var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
{
    ConnectionConfig = config,
    Image = "opensandbox/code-interpreter:v1.0.2",
    Entrypoint = new[] { "/opt/opensandbox/code-interpreter.sh" },
    Env = new Dictionary<string, string>
    {
        ["PYTHON_VERSION"] = "3.11",
        ["JAVA_VERSION"] = "17",
        ["NODE_VERSION"] = "20",
        ["GO_VERSION"] = "1.24"
    },
    TimeoutSeconds = 15 * 60
});

// 3. 创建 CodeInterpreter 包装器
var ci = await CodeInterpreter.CreateAsync(sandbox);

// 4. 创建执行上下文 (Python)
var ctx = await ci.Codes.CreateContextAsync(SupportedLanguage.Python);

// 5. 运行代码
var result = await ci.Codes.RunAsync(
    "import sys\nprint(sys.version)\nresult = 2 + 2\nresult",
    new RunCodeOptions { Context = ctx });

// 6. 打印输出
Console.WriteLine(result.Results.FirstOrDefault()?.Text);

// 7. 清理远程实例（可选但推荐）
await sandbox.KillAsync();
```

## 日志（ILogger）

SDK 使用 `Microsoft.Extensions.Logging` 抽象。创建 Sandbox/CodeInterpreter 时可通过
diagnostics 传入你自己的 `ILoggerFactory`：

```csharp
using Microsoft.Extensions.Logging;
using OpenSandbox.Config;

using var loggerFactory = LoggerFactory.Create(builder =>
{
    builder.SetMinimumLevel(LogLevel.Debug);
    builder.AddConsole();
});

await using var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
{
    Image = "opensandbox/code-interpreter:v1.0.2",
    Diagnostics = new SdkDiagnosticsOptions
    {
        LoggerFactory = loggerFactory
    }
});

var ci = await CodeInterpreter.CreateAsync(sandbox, new CodeInterpreterCreateOptions
{
    Diagnostics = new SdkDiagnosticsOptions
    {
        LoggerFactory = loggerFactory
    }
});
```

## 运行时配置

### Docker 镜像

Code Interpreter SDK 依赖于专门的环境。请确保您的沙箱提供者有可用的 `opensandbox/code-interpreter` 镜像。

### 语言版本选择

您可以通过在创建 `Sandbox` 时设置相应的环境变量来指定所需的编程语言版本。

| 语言 | 环境变量 | 示例值 | 默认值（如未设置） |
| --- | --- | --- | --- |
| Python | `PYTHON_VERSION` | `3.11` | 镜像默认值 |
| Java | `JAVA_VERSION` | `17` | 镜像默认值 |
| Node.js | `NODE_VERSION` | `20` | 镜像默认值 |
| Go | `GO_VERSION` | `1.24` | 镜像默认值 |

```csharp
await using var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
{
    ConnectionConfig = config,
    Image = "opensandbox/code-interpreter:v1.0.2",
    Entrypoint = new[] { "/opt/opensandbox/code-interpreter.sh" },
    Env = new Dictionary<string, string>
    {
        ["JAVA_VERSION"] = "17",
        ["GO_VERSION"] = "1.24"
    }
});
```

## 使用示例

### 0. 使用 `Language` 运行（默认语言上下文）

如果您不需要管理显式的上下文 ID，可以仅通过指定 `Language` 来运行代码。
当省略 `Context.Id` 时，execd 可以为该语言创建/重用默认会话，因此状态可以在多次运行之间持久化。

```csharp
await ci.Codes.RunAsync("x = 42", new RunCodeOptions { Language = SupportedLanguage.Python });
var execution = await ci.Codes.RunAsync("result = x\nresult", new RunCodeOptions { Language = SupportedLanguage.Python });
Console.WriteLine(execution.Results.FirstOrDefault()?.Text); // "42"
```

### 0.1 上下文管理（列出/获取/删除）

您可以显式管理上下文（与 Python/Kotlin SDK 对齐）：

```csharp
var ctx = await ci.Codes.CreateContextAsync(SupportedLanguage.Python);

var same = await ci.Codes.GetContextAsync(ctx.Id!);
Console.WriteLine($"{same.Id}, {same.Language}");

var pyOnly = await ci.Codes.ListContextsAsync(SupportedLanguage.Python);

await ci.Codes.DeleteContextAsync(ctx.Id!);
await ci.Codes.DeleteContextsAsync(SupportedLanguage.Python); // 批量清理
```

### 1. Java 代码执行

```csharp
var javaCtx = await ci.Codes.CreateContextAsync(SupportedLanguage.Java);
var execution = await ci.Codes.RunAsync(
    @"System.out.println(""Calculating sum..."");
int a = 10;
int b = 20;
int sum = a + b;
System.out.println(""Sum: "" + sum);
sum",
    new RunCodeOptions { Context = javaCtx });

foreach (var msg in execution.Logs.Stdout)
{
    Console.WriteLine(msg.Text);
}
```

### 2. 流式输出处理

实时处理 stdout/stderr 和执行事件。

```csharp
using OpenSandbox.Models;

var handlers = new ExecutionHandlers
{
    OnStdout = async msg => Console.WriteLine($"STDOUT: {msg.Text}"),
    OnStderr = async msg => Console.Error.WriteLine($"STDERR: {msg.Text}"),
    OnResult = async r => Console.WriteLine($"RESULT: {r.Text}")
};

var pyCtx = await ci.Codes.CreateContextAsync(SupportedLanguage.Python);
await ci.Codes.RunAsync(
    "import time\nfor i in range(5):\n    print(i)\n    time.sleep(0.2)",
    new RunCodeOptions { Context = pyCtx, Handlers = handlers });
```

### 3. 使用 IAsyncEnumerable 流式处理

```csharp
var request = new RunCodeRequest
{
    Code = "for i in range(10): print(i)",
    Context = new CodeContext { Language = SupportedLanguage.Python }
};

await foreach (var ev in ci.Codes.RunStreamAsync(request))
{
    switch (ev.Type)
    {
        case "stdout":
            Console.Write(ev.Text);
            break;
        case "stderr":
            Console.Error.Write(ev.Text);
            break;
        case "result":
            Console.WriteLine($"结果: {ev.Results}");
            break;
        case "error":
            Console.WriteLine($"错误: {ev.Error}");
            break;
    }
}
```

### 4. 中断执行

```csharp
var ctx = await ci.Codes.CreateContextAsync(SupportedLanguage.Python);

// 启动长时间运行的任务
var task = ci.Codes.RunAsync(
    "import time\nwhile True: time.sleep(1)",
    new RunCodeOptions { Context = ctx });

// 一段时间后中断
await Task.Delay(2000);
await ci.Codes.InterruptAsync(ctx.Id!);
```

## API 参考

### CodeInterpreter

| 方法 | 描述 |
|------|------|
| `CreateAsync(sandbox, options?)` | 从沙箱创建代码解释器 |

| 属性 | 描述 |
|------|------|
| `Sandbox` | 底层沙箱实例 |
| `Codes` | 代码执行服务 |
| `Id` | 沙箱 ID |
| `Files` | 文件系统操作 |
| `Commands` | Shell 命令执行 |
| `Metrics` | 资源指标 |

### ICodes

| 方法 | 描述 |
|------|------|
| `CreateContextAsync(language)` | 创建新的执行上下文 |
| `GetContextAsync(contextId)` | 获取现有上下文 |
| `ListContextsAsync(language)` | 列出指定语言的上下文 |
| `DeleteContextAsync(contextId)` | 删除特定上下文 |
| `DeleteContextsAsync(language)` | 删除某语言的所有上下文 |
| `RunAsync(code, options?)` | 执行代码并返回结果 |
| `RunStreamAsync(request)` | 执行代码并流式输出 |
| `InterruptAsync(contextId)` | 中断正在运行的执行 |

## 注意事项

- **生命周期**：`CodeInterpreter` 包装现有的 `Sandbox` 实例并重用其连接配置。完成后调用 `sandbox.KillAsync()` 以释放资源。
- **默认上下文**：`Codes.RunAsync(..., new RunCodeOptions { Language = ... })` 使用语言默认上下文（状态可以在多次运行之间持久化）。
- **取消支持**：所有异步方法都支持 `CancellationToken`。

## 系统要求

- .NET Standard 2.0+ / .NET 6.0+
- OpenSandbox Sandbox SDK (`Alibaba.OpenSandbox`)

## 许可证

Apache License 2.0


================================================
FILE: sdks/code-interpreter/csharp/src/OpenSandbox.CodeInterpreter/Adapters/CodesAdapter.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Runtime.CompilerServices;
using System.Text;
using System.Text.Json;
using OpenSandbox.Adapters;
using OpenSandbox.CodeInterpreter.Models;
using OpenSandbox.CodeInterpreter.Services;
using OpenSandbox.Core;
using OpenSandbox.Internal;
using OpenSandbox.Models;
using Microsoft.Extensions.Logging;

namespace OpenSandbox.CodeInterpreter.Adapters;

/// <summary>
/// Adapter implementation for the codes service.
/// </summary>
internal sealed class CodesAdapter : ICodes
{
    private readonly HttpClientWrapper _client;
    private readonly HttpClient _sseHttpClient;
    private readonly string _baseUrl;
    private readonly IReadOnlyDictionary<string, string> _headers;
    private readonly ILogger _logger;

    private static readonly JsonSerializerOptions JsonOptions = new()
    {
        PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
        PropertyNameCaseInsensitive = true,
        DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
    };

    public CodesAdapter(
        HttpClientWrapper client,
        HttpClient sseHttpClient,
        string baseUrl,
        IReadOnlyDictionary<string, string> headers,
        ILogger logger)
    {
        _client = client ?? throw new ArgumentNullException(nameof(client));
        _sseHttpClient = sseHttpClient ?? throw new ArgumentNullException(nameof(sseHttpClient));
        _baseUrl = baseUrl?.TrimEnd('/') ?? throw new ArgumentNullException(nameof(baseUrl));
        _headers = headers ?? new Dictionary<string, string>();
        _logger = logger ?? throw new ArgumentNullException(nameof(logger));
    }

    public async Task<CodeContext> CreateContextAsync(string language, CancellationToken cancellationToken = default)
    {
        if (string.IsNullOrWhiteSpace(language))
        {
            throw new InvalidArgumentException("Language cannot be empty");
        }

        var request = new CreateContextRequest { Language = language };
        _logger.LogDebug("Creating code context (language={Language})", language);
        var response = await _client.PostAsync<CodeContext>("/code/context", request, cancellationToken).ConfigureAwait(false);

        if (response == null || string.IsNullOrEmpty(response.Language))
        {
            throw new SandboxApiException(
                message: "Create code context failed: unexpected response shape",
                error: new SandboxError(SandboxErrorCodes.UnexpectedResponse, "Create code context failed: unexpected response shape"));
        }

        return response;
    }

    public async Task<CodeContext> GetContextAsync(string contextId, CancellationToken cancellationToken = default)
    {
        if (string.IsNullOrWhiteSpace(contextId))
        {
            throw new InvalidArgumentException("contextId cannot be empty");
        }

        _logger.LogDebug("Fetching code context: {ContextId}", contextId);
        var response = await _client.GetAsync<CodeContext>($"/code/contexts/{Uri.EscapeDataString(contextId)}", cancellationToken: cancellationToken).ConfigureAwait(false);

        if (response == null || string.IsNullOrEmpty(response.Language))
        {
            throw new SandboxApiException(
                message: "Get code context failed: unexpected response shape",
                error: new SandboxError(SandboxErrorCodes.UnexpectedResponse, "Get code context failed: unexpected response shape"));
        }

        return response;
    }

    public async Task<IReadOnlyList<CodeContext>> ListContextsAsync(string language, CancellationToken cancellationToken = default)
    {
        if (string.IsNullOrWhiteSpace(language))
        {
            throw new InvalidArgumentException("Language cannot be empty");
        }

        _logger.LogDebug("Listing code contexts (language={Language})", language);
        var queryParams = new Dictionary<string, string?> { ["language"] = language };

        var response = await _client.GetAsync<List<CodeContext>>("/code/contexts", queryParams, cancellationToken).ConfigureAwait(false);

        if (response == null)
        {
            throw new SandboxApiException(
                message: "List code contexts failed: unexpected response shape",
                error: new SandboxError(SandboxErrorCodes.UnexpectedResponse, "List code contexts failed: unexpected response shape"));
        }

        return response;
    }

    public async Task DeleteContextAsync(string contextId, CancellationToken cancellationToken = default)
    {
        if (string.IsNullOrWhiteSpace(contextId))
        {
            throw new InvalidArgumentException("contextId cannot be empty");
        }

        _logger.LogInformation("Deleting code context: {ContextId}", contextId);
        await _client.DeleteAsync($"/code/contexts/{Uri.EscapeDataString(contextId)}", cancellationToken: cancellationToken).ConfigureAwait(false);
    }

    public async Task DeleteContextsAsync(string language, CancellationToken cancellationToken = default)
    {
        if (string.IsNullOrWhiteSpace(language))
        {
            throw new InvalidArgumentException("Language cannot be empty");
        }

        _logger.LogInformation("Deleting code contexts (language={Language})", language);
        var queryParams = new Dictionary<string, string?> { ["language"] = language };
        await _client.DeleteAsync("/code/contexts", queryParams, cancellationToken).ConfigureAwait(false);
    }

    public async Task InterruptAsync(string contextId, CancellationToken cancellationToken = default)
    {
        if (string.IsNullOrWhiteSpace(contextId))
        {
            throw new InvalidArgumentException("contextId cannot be empty");
        }

        _logger.LogInformation("Interrupting code execution for context: {ContextId}", contextId);
        var queryParams = new Dictionary<string, string?> { ["id"] = contextId };
        await _client.DeleteAsync("/code", queryParams, cancellationToken).ConfigureAwait(false);
    }

    public async IAsyncEnumerable<ServerStreamEvent> RunStreamAsync(
        RunCodeRequest request,
        [EnumeratorCancellation] CancellationToken cancellationToken = default)
    {
        if (request == null)
        {
            throw new InvalidArgumentException("request cannot be null");
        }

        if (string.IsNullOrWhiteSpace(request.Code))
        {
            throw new InvalidArgumentException("Code cannot be empty");
        }

        var url = $"{_baseUrl}/code";
        _logger.LogDebug("Running code stream (codeLength={CodeLength})", request.Code.Length);
        var json = JsonSerializer.Serialize(request, JsonOptions);

        using var httpRequest = new HttpRequestMessage(HttpMethod.Post, url)
        {
            Content = new StringContent(json, Encoding.UTF8, "application/json")
        };

        httpRequest.Headers.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("text/event-stream"));

        foreach (var header in _headers)
        {
            httpRequest.Headers.TryAddWithoutValidation(header.Key, header.Value);
        }

        using var response = await _sseHttpClient.SendAsync(httpRequest, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);

        await foreach (var ev in SseParser.ParseJsonEventStreamAsync<ServerStreamEvent>(response, "Run code failed", cancellationToken).ConfigureAwait(false))
        {
            yield return ev;
        }
    }

    public async Task<Execution> RunAsync(string code, RunCodeOptions? options = null, CancellationToken cancellationToken = default)
    {
        if (string.IsNullOrWhiteSpace(code))
        {
            throw new InvalidArgumentException("Code cannot be empty");
        }

        if (options?.Context != null && options.Language != null)
        {
            throw new InvalidArgumentException("Provide either options.Context or options.Language, not both");
        }

        var context = options?.Context
            ?? (options?.Language != null
                ? new CodeContext { Language = options.Language }
                : new CodeContext { Language = SupportedLanguage.Python });

        var request = new RunCodeRequest
        {
            Code = code,
            Context = context
        };

        var execution = new Execution();
        _logger.LogDebug("Running code (codeLength={CodeLength})", code.Length);
        var dispatcher = new ExecutionEventDispatcher(execution, options?.Handlers);

        await foreach (var ev in RunStreamAsync(request, cancellationToken).ConfigureAwait(false))
        {
            await dispatcher.DispatchAsync(ev).ConfigureAwait(false);
        }

        return execution;
    }
}


================================================
FILE: sdks/code-interpreter/csharp/src/OpenSandbox.CodeInterpreter/CodeInterpreter.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.CodeInterpreter.Factory;
using OpenSandbox.CodeInterpreter.Services;
using OpenSandbox.Config;
using OpenSandbox.Core;
using OpenSandbox.Services;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;

namespace OpenSandbox.CodeInterpreter;

/// <summary>
/// Options for creating a code interpreter.
/// </summary>
public class CodeInterpreterCreateOptions
{
    /// <summary>
    /// Gets or sets the adapter factory. If not provided, a default factory is used.
    /// </summary>
    public ICodeInterpreterAdapterFactory? AdapterFactory { get; set; }

    /// <summary>
    /// Gets or sets diagnostics options such as logging.
    /// </summary>
    public SdkDiagnosticsOptions? Diagnostics { get; set; }
}

/// <summary>
/// Code interpreter facade for executing code in multiple languages.
/// </summary>
/// <remarks>
/// This class wraps an existing <see cref="Sandbox"/> and provides a high-level API for code execution.
/// Use <see cref="Codes"/> to create contexts and run code.
/// <see cref="Files"/>, <see cref="Commands"/>, and <see cref="Metrics"/> are exposed for convenience
/// and are the same instances as on the underlying <see cref="Sandbox"/>.
/// This type does not own the remote sandbox lifecycle. Call <see cref="Sandbox.KillAsync"/> when you want to terminate
/// the remote instance. Dispose the wrapped <see cref="Sandbox"/> to release local SDK resources.
/// </remarks>
public sealed class CodeInterpreter
{
    /// <summary>
    /// Gets the underlying sandbox instance.
    /// </summary>
    public Sandbox Sandbox { get; }

    /// <summary>
    /// Gets the codes service for code execution operations.
    /// </summary>
    public ICodes Codes { get; }

    /// <summary>
    /// Gets the sandbox ID.
    /// </summary>
    public string Id => Sandbox.Id;

    /// <summary>
    /// Gets the filesystem service.
    /// </summary>
    public ISandboxFiles Files => Sandbox.Files;

    /// <summary>
    /// Gets the command execution service.
    /// </summary>
    public IExecdCommands Commands => Sandbox.Commands;

    /// <summary>
    /// Gets the metrics service.
    /// </summary>
    public IExecdMetrics Metrics => Sandbox.Metrics;

    private readonly ILogger _logger;

    private CodeInterpreter(Sandbox sandbox, ICodes codes, ILogger logger)
    {
        Sandbox = sandbox ?? throw new ArgumentNullException(nameof(sandbox));
        Codes = codes ?? throw new ArgumentNullException(nameof(codes));
        _logger = logger ?? throw new ArgumentNullException(nameof(logger));
        _logger.LogDebug("Code interpreter initialized for sandbox: {SandboxId}", sandbox.Id);
    }

    /// <summary>
    /// Creates a new code interpreter from an existing sandbox.
    /// </summary>
    /// <param name="sandbox">The sandbox to wrap.</param>
    /// <param name="options">Optional creation options.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>A new code interpreter instance.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="sandbox"/> is null.</exception>
    /// <exception cref="SandboxException">Thrown when endpoint discovery or adapter initialization fails.</exception>
    public static async Task<CodeInterpreter> CreateAsync(
        Sandbox sandbox,
        CodeInterpreterCreateOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        if (sandbox == null)
        {
            throw new InvalidArgumentException("sandbox cannot be null");
        }

        var loggerFactory = options?.Diagnostics?.LoggerFactory ?? sandbox.SharedLoggerFactory ?? NullLoggerFactory.Instance;
        var logger = loggerFactory.CreateLogger("OpenSandbox.CodeInterpreter.CodeInterpreter");
        var endpoint = await sandbox.GetEndpointAsync(Constants.DefaultExecdPort, cancellationToken).ConfigureAwait(false);
        logger.LogInformation("Creating code interpreter for sandbox: {SandboxId}", sandbox.Id);
        var protocol = sandbox.ConnectionConfig.Protocol == ConnectionProtocol.Https ? "https" : "http";
        var execdBaseUrl = $"{protocol}://{endpoint.EndpointAddress}";
        var execdHeaders = MergeHeaders(sandbox.ConnectionConfig.Headers, endpoint.Headers);
        var adapterFactory = options?.AdapterFactory ?? DefaultCodeInterpreterAdapterFactory.Create();

        var codes = adapterFactory.CreateCodes(new CreateCodesStackOptions
        {
            ConnectionConfig = sandbox.ConnectionConfig,
            ExecdBaseUrl = execdBaseUrl,
            ExecdHeaders = execdHeaders,
            HttpClientProvider = sandbox.SharedHttpClientProvider,
            LoggerFactory = loggerFactory
        });

        return new CodeInterpreter(sandbox, codes, logger);
    }

    private static IReadOnlyDictionary<string, string> MergeHeaders(
        IReadOnlyDictionary<string, string> baseHeaders,
        IReadOnlyDictionary<string, string>? overrideHeaders)
    {
        var merged = baseHeaders.ToDictionary(header => header.Key, header => header.Value);
        if (overrideHeaders != null)
        {
            foreach (var header in overrideHeaders)
            {
                merged[header.Key] = header.Value;
            }
        }

        return merged;
    }
}


================================================
FILE: sdks/code-interpreter/csharp/src/OpenSandbox.CodeInterpreter/Factory/DefaultCodeInterpreterAdapterFactory.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.CodeInterpreter.Adapters;
using OpenSandbox.CodeInterpreter.Services;
using OpenSandbox.Core;
using OpenSandbox.Internal;
using Microsoft.Extensions.Logging;

namespace OpenSandbox.CodeInterpreter.Factory;

/// <summary>
/// Default implementation of the code interpreter adapter factory.
/// </summary>
public class DefaultCodeInterpreterAdapterFactory : ICodeInterpreterAdapterFactory
{
    /// <summary>
    /// Creates a new instance of the default adapter factory.
    /// </summary>
    /// <returns>A new factory instance.</returns>
    public static DefaultCodeInterpreterAdapterFactory Create() => new();

    /// <inheritdoc />
    public ICodes CreateCodes(CreateCodesStackOptions options)
    {
        if (options == null)
        {
            throw new InvalidArgumentException("options cannot be null");
        }

        if (options.ConnectionConfig == null)
        {
            throw new InvalidArgumentException("options.ConnectionConfig cannot be null");
        }

        if (string.IsNullOrWhiteSpace(options.ExecdBaseUrl))
        {
            throw new InvalidArgumentException("options.ExecdBaseUrl cannot be null or empty");
        }

        if (options.ExecdHeaders == null)
        {
            throw new InvalidArgumentException("options.ExecdHeaders cannot be null");
        }

        if (options.HttpClientProvider == null)
        {
            throw new InvalidArgumentException("options.HttpClientProvider cannot be null");
        }

        if (options.LoggerFactory == null)
        {
            throw new InvalidArgumentException("options.LoggerFactory cannot be null");
        }

        var client = new HttpClientWrapper(
            options.HttpClientProvider.HttpClient,
            options.ExecdBaseUrl,
            options.ExecdHeaders,
            options.LoggerFactory.CreateLogger("OpenSandbox.HttpClientWrapper"));

        return new CodesAdapter(
            client,
            options.HttpClientProvider.SseHttpClient,
            options.ExecdBaseUrl,
            options.ExecdHeaders,
            options.LoggerFactory.CreateLogger("OpenSandbox.CodeInterpreter.CodesAdapter"));
    }
}


================================================
FILE: sdks/code-interpreter/csharp/src/OpenSandbox.CodeInterpreter/Factory/ICodeInterpreterAdapterFactory.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.CodeInterpreter.Services;
using OpenSandbox.Config;
using OpenSandbox;
using Microsoft.Extensions.Logging;

namespace OpenSandbox.CodeInterpreter.Factory;

/// <summary>
/// Options for creating a codes service stack.
/// </summary>
public class CreateCodesStackOptions
{
    /// <summary>
    /// Gets or sets the connection configuration.
    /// </summary>
    public required ConnectionConfig ConnectionConfig { get; set; }

    /// <summary>
    /// Gets or sets the execd API base URL.
    /// </summary>
    public required string ExecdBaseUrl { get; set; }

    /// <summary>
    /// Gets or sets headers to apply to execd requests.
    /// </summary>
    public required IReadOnlyDictionary<string, string> ExecdHeaders { get; set; }

    /// <summary>
    /// Gets or sets the HTTP client provider for this SDK instance.
    /// </summary>
    public required HttpClientProvider HttpClientProvider { get; set; }

    /// <summary>
    /// Gets or sets the logger factory for this SDK instance.
    /// </summary>
    public required ILoggerFactory LoggerFactory { get; set; }
}

/// <summary>
/// Factory interface for creating code interpreter adapters.
/// </summary>
public interface ICodeInterpreterAdapterFactory
{
    /// <summary>
    /// Creates a codes service instance.
    /// </summary>
    /// <param name="options">The creation options.</param>
    /// <returns>The codes service.</returns>
    ICodes CreateCodes(CreateCodesStackOptions options);
}


================================================
FILE: sdks/code-interpreter/csharp/src/OpenSandbox.CodeInterpreter/Models/CodeModels.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Text.Json.Serialization;

namespace OpenSandbox.CodeInterpreter.Models;

/// <summary>
/// Supported programming languages for code execution.
/// </summary>
public static class SupportedLanguage
{
    /// <summary>
    /// Python language.
    /// </summary>
    public const string Python = "python";

    /// <summary>
    /// Java language.
    /// </summary>
    public const string Java = "java";

    /// <summary>
    /// Go language.
    /// </summary>
    public const string Go = "go";

    /// <summary>
    /// TypeScript language.
    /// </summary>
    public const string TypeScript = "typescript";

    /// <summary>
    /// JavaScript language.
    /// </summary>
    public const string JavaScript = "javascript";

    /// <summary>
    /// Bash shell.
    /// </summary>
    public const string Bash = "bash";
}

/// <summary>
/// Represents a code execution context.
/// </summary>
public class CodeContext
{
    /// <summary>
    /// Gets or sets the context ID.
    /// </summary>
    [JsonPropertyName("id")]
    public string? Id { get; set; }

    /// <summary>
    /// Gets or sets the programming language.
    /// </summary>
    [JsonPropertyName("language")]
    public required string Language { get; set; }
}

/// <summary>
/// Request to run code.
/// </summary>
public class RunCodeRequest
{
    /// <summary>
    /// Gets or sets the code to execute.
    /// </summary>
    [JsonPropertyName("code")]
    public required string Code { get; set; }

    /// <summary>
    /// Gets or sets the execution context.
    /// </summary>
    [JsonPropertyName("context")]
    public required CodeContext Context { get; set; }
}

/// <summary>
/// Options for running code.
/// </summary>
public class RunCodeOptions
{
    /// <summary>
    /// Gets or sets the execution context. If provided, code runs in this context.
    /// </summary>
    public CodeContext? Context { get; set; }

    /// <summary>
    /// Gets or sets the language for a new ephemeral context.
    /// Cannot be used together with Context.
    /// </summary>
    /// <remarks>
    /// When only <see cref="Language"/> is provided and <see cref="Context"/> is null, execd creates or reuses
    /// a default session for that language, so state can persist across runs.
    /// </remarks>
    public string? Language { get; set; }

    /// <summary>
    /// Gets or sets the execution event handlers.
    /// </summary>
    public OpenSandbox.Models.ExecutionHandlers? Handlers { get; set; }
}

/// <summary>
/// Request to create a code context.
/// </summary>
internal class CreateContextRequest
{
    /// <summary>
    /// Gets or sets the programming language.
    /// </summary>
    [JsonPropertyName("language")]
    public required string Language { get; set; }
}


================================================
FILE: sdks/code-interpreter/csharp/src/OpenSandbox.CodeInterpreter/OpenSandbox.CodeInterpreter.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <TargetFrameworks>netstandard2.0;netstandard2.1;net6.0;net7.0;net8.0;net9.0;net10.0</TargetFrameworks>
    <LangVersion>latest</LangVersion>
    <Nullable>enable</Nullable>
    <ImplicitUsings>enable</ImplicitUsings>
    <RootNamespace>OpenSandbox.CodeInterpreter</RootNamespace>
    <AssemblyName>OpenSandbox.CodeInterpreter</AssemblyName>

    <!-- Package Information -->
    <PackageId>Alibaba.OpenSandbox.CodeInterpreter</PackageId>
    <Version>$(OpenSandboxCodeInterpreterPackageVersion)</Version>
    <Authors>Alibaba Group</Authors>
    <Company>Alibaba Group Holding Ltd.</Company>
    <Product>OpenSandbox Code Interpreter SDK</Product>
    <Description>A C# SDK for code interpretation with OpenSandbox. Provides high-level APIs for executing code in multiple languages (Python, JavaScript, TypeScript, Go, Java, Bash) within secure sandbox environments.</Description>
    <Copyright>Copyright 2026 Alibaba Group Holding Ltd.</Copyright>
    <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
    <PackageProjectUrl>https://open-sandbox.ai</PackageProjectUrl>
    <RepositoryUrl>https://github.com/alibaba/OpenSandbox.git</RepositoryUrl>
    <RepositoryType>git</RepositoryType>
    <PackageTags>sandbox;code-interpreter;execution;opensandbox;alibaba;python;javascript</PackageTags>
    <PackageReadmeFile>README.md</PackageReadmeFile>

    <!-- Build Settings -->
    <GenerateDocumentationFile>true</GenerateDocumentationFile>
    <NoWarn>$(NoWarn);CS1591</NoWarn>
    <TreatWarningsAsErrors Condition="'$(Configuration)' == 'Release'">true</TreatWarningsAsErrors>

    <!--
      Default to local source reference for development.
      For release packing with NuGet version range, run:
      dotnet pack -c Release -p:UseLocalOpenSandboxProjectReference=false
    -->
    <UseLocalOpenSandboxProjectReference Condition="'$(UseLocalOpenSandboxProjectReference)' == ''">true</UseLocalOpenSandboxProjectReference>
  </PropertyGroup>

  <!-- Expose internals to test project -->
  <ItemGroup>
    <InternalsVisibleTo Include="OpenSandbox.CodeInterpreter.Tests" />
  </ItemGroup>

  <!-- Use local project reference for day-to-day development -->
  <ItemGroup Condition="'$(UseLocalOpenSandboxProjectReference)' == 'true'">
    <ProjectReference Include="..\..\..\..\sandbox\csharp\src\OpenSandbox\OpenSandbox.csproj" />
  </ItemGroup>

  <!-- Use NuGet dependency range for release packaging -->
  <ItemGroup Condition="'$(UseLocalOpenSandboxProjectReference)' != 'true'">
    <PackageReference Include="Alibaba.OpenSandbox" Version="$(OpenSandboxDependencyVersionRange)" />
  </ItemGroup>

  <!-- Common Dependencies -->
  <ItemGroup>
    <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.2" />
    <PackageReference Include="System.Text.Json" Version="8.0.5" />
    <PackageReference Include="PolySharp" Version="1.14.1">
      <PrivateAssets>all</PrivateAssets>
      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
    </PackageReference>
  </ItemGroup>

  <!-- .NET Standard 2.0 specific dependencies -->
  <ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">
    <PackageReference Include="Microsoft.Bcl.AsyncInterfaces" Version="8.0.0" />
  </ItemGroup>

  <!-- Package files -->
  <ItemGroup>
    <None Include="..\..\README.md" Pack="true" PackagePath="\" />
  </ItemGroup>

</Project>


================================================
FILE: sdks/code-interpreter/csharp/src/OpenSandbox.CodeInterpreter/Services/ICodes.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.CodeInterpreter.Models;
using OpenSandbox.Core;
using OpenSandbox.Models;

namespace OpenSandbox.CodeInterpreter.Services;

/// <summary>
/// Service interface for code execution operations.
/// </summary>
public interface ICodes
{
    /// <summary>
    /// Creates a new code execution context for the specified language.
    /// </summary>
    /// <param name="language">The programming language (use <see cref="SupportedLanguage"/> constants).</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The created context.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="language"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task<CodeContext> CreateContextAsync(string language, CancellationToken cancellationToken = default);

    /// <summary>
    /// Gets an existing context by ID.
    /// </summary>
    /// <param name="contextId">The context ID.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The context.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="contextId"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task<CodeContext> GetContextAsync(string contextId, CancellationToken cancellationToken = default);

    /// <summary>
    /// Lists active contexts for the specified language.
    /// </summary>
    /// <param name="language">Required language filter.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>List of contexts.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="language"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task<IReadOnlyList<CodeContext>> ListContextsAsync(string language, CancellationToken cancellationToken = default);

    /// <summary>
    /// Deletes a context by ID.
    /// </summary>
    /// <param name="contextId">The context ID.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="contextId"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task DeleteContextAsync(string contextId, CancellationToken cancellationToken = default);

    /// <summary>
    /// Deletes all contexts for the specified language.
    /// </summary>
    /// <param name="language">The programming language.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="language"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task DeleteContextsAsync(string language, CancellationToken cancellationToken = default);

    /// <summary>
    /// Runs code and returns the execution result.
    /// </summary>
    /// <param name="code">The code to execute.</param>
    /// <param name="options">Optional execution options.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The execution result.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when required request fields are missing.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task<Execution> RunAsync(string code, RunCodeOptions? options = null, CancellationToken cancellationToken = default);

    /// <summary>
    /// Runs code and streams execution events.
    /// </summary>
    /// <param name="request">The run code request.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>An async enumerable of server stream events.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when the request is invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    IAsyncEnumerable<ServerStreamEvent> RunStreamAsync(RunCodeRequest request, CancellationToken cancellationToken = default);

    /// <summary>
    /// Interrupts a running code execution.
    /// </summary>
    /// <param name="contextId">The context ID to interrupt.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="contextId"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task InterruptAsync(string contextId, CancellationToken cancellationToken = default);
}


================================================
FILE: sdks/code-interpreter/csharp/tests/OpenSandbox.CodeInterpreter.Tests/CodeInterpreterTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.CodeInterpreter.Models;
using OpenSandbox.Core;
using Xunit;

namespace OpenSandbox.CodeInterpreter.Tests;

public class CodeInterpreterTests
{
    [Fact]
    public async Task CreateAsync_ThrowsOnNullSandbox()
    {
        await Assert.ThrowsAsync<InvalidArgumentException>(
            () => CodeInterpreter.CreateAsync(null!));
    }

    [Fact]
    public void CodeInterpreterCreateOptions_DefaultsAreNull()
    {
        var options = new CodeInterpreterCreateOptions();

        Assert.Null(options.AdapterFactory);
    }

    [Fact]
    public void CodeInterpreterCreateOptions_CanSetAdapterFactory()
    {
        var factory = new TestAdapterFactory();
        var options = new CodeInterpreterCreateOptions
        {
            AdapterFactory = factory
        };

        Assert.Same(factory, options.AdapterFactory);
    }

    private class TestAdapterFactory : Factory.ICodeInterpreterAdapterFactory
    {
        public Services.ICodes CreateCodes(Factory.CreateCodesStackOptions options)
        {
            throw new NotImplementedException();
        }
    }
}


================================================
FILE: sdks/code-interpreter/csharp/tests/OpenSandbox.CodeInterpreter.Tests/CodesAdapterTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Net;
using System.Net.Http.Headers;
using System.Text;
using OpenSandbox.CodeInterpreter.Adapters;
using OpenSandbox.CodeInterpreter.Models;
using OpenSandbox.Core;
using OpenSandbox.Internal;
using OpenSandbox.Models;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using Xunit;

namespace OpenSandbox.CodeInterpreter.Tests;

public class CodesAdapterTests
{
    [Fact]
    public async Task ListContextsAsync_ThrowsOnEmptyLanguage()
    {
        var adapter = CreateAdapter(
            new StubHttpMessageHandler((_, _) => Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK))),
            new StubHttpMessageHandler((_, _) => Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK))));

        await Assert.ThrowsAsync<InvalidArgumentException>(() => adapter.ListContextsAsync(" "));
    }

    [Fact]
    public async Task ListContextsAsync_SendsLanguageQuery()
    {
        var httpHandler = new StubHttpMessageHandler((request, _) =>
        {
            var body = "[{\"id\":\"ctx-1\",\"language\":\"python\"}]";
            var response = new HttpResponseMessage(HttpStatusCode.OK)
            {
                Content = new StringContent(body, Encoding.UTF8, "application/json")
            };
            return Task.FromResult(response);
        });

        var adapter = CreateAdapter(
            httpHandler,
            new StubHttpMessageHandler((_, _) => Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK))));

        var contexts = await adapter.ListContextsAsync("python");

        Assert.Single(contexts);
        Assert.Equal("python", contexts[0].Language);
        Assert.Contains(httpHandler.RequestUris, uri => uri.Contains("/code/contexts?language=python", StringComparison.Ordinal));
    }

    [Fact]
    public async Task RunStreamAsync_ThrowsOnEmptyCode()
    {
        var adapter = CreateAdapter(
            new StubHttpMessageHandler((_, _) => Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK))),
            new StubHttpMessageHandler((_, _) => Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK))));

        var request = new RunCodeRequest
        {
            Code = "   ",
            Context = new CodeContext { Language = SupportedLanguage.Python }
        };

        await Assert.ThrowsAsync<InvalidArgumentException>(() => DrainAsync(adapter.RunStreamAsync(request)));
    }

    [Fact]
    public async Task RunStreamAsync_ParsesSseEvent()
    {
        var sseHandler = new StubHttpMessageHandler((request, _) =>
        {
            var response = new HttpResponseMessage(HttpStatusCode.OK)
            {
                Content = new StringContent(
                    "data: {\"type\":\"stdout\",\"text\":\"hello\",\"timestamp\":1}\n\n",
                    Encoding.UTF8,
                    "text/event-stream")
            };
            return Task.FromResult(response);
        });

        var adapter = CreateAdapter(
            new StubHttpMessageHandler((_, _) => Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK))),
            sseHandler);

        var request = new RunCodeRequest
        {
            Code = "print('hello')",
            Context = new CodeContext { Language = SupportedLanguage.Python }
        };

        var events = new List<ServerStreamEvent>();
        await foreach (var ev in adapter.RunStreamAsync(request))
        {
            events.Add(ev);
        }

        Assert.Single(events);
        Assert.Equal(ServerStreamEventTypes.Stdout, events[0].Type);
        Assert.Equal("hello", events[0].Text);
        Assert.Contains(sseHandler.AcceptHeaders, value => value.Contains("text/event-stream", StringComparison.OrdinalIgnoreCase));
    }

    private static async Task DrainAsync<T>(IAsyncEnumerable<T> source)
    {
        await foreach (var _ in source)
        {
        }
    }

    private static CodesAdapter CreateAdapter(HttpMessageHandler httpHandler, HttpMessageHandler sseHandler)
    {
        var baseUrl = "http://execd.local";
        var headers = new Dictionary<string, string> { ["X-Test"] = "true" };
        var client = new HttpClientWrapper(new HttpClient(httpHandler), baseUrl, headers);
        var sseHttpClient = new HttpClient(sseHandler);
        var logger = NullLoggerFactory.Instance.CreateLogger("CodesAdapterTests");
        return new CodesAdapter(client, sseHttpClient, baseUrl, headers, logger);
    }

    private sealed class StubHttpMessageHandler : HttpMessageHandler
    {
        private readonly Func<HttpRequestMessage, CancellationToken, Task<HttpResponseMessage>> _handler;

        public StubHttpMessageHandler(Func<HttpRequestMessage, CancellationToken, Task<HttpResponseMessage>> handler)
        {
            _handler = handler;
        }

        public List<string> RequestUris { get; } = new();
        public List<string> AcceptHeaders { get; } = new();

        protected override async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
        {
            RequestUris.Add(request.RequestUri?.ToString() ?? string.Empty);
            AcceptHeaders.Add(string.Join(",", request.Headers.Accept.Select(MediaTypeToString)));
            return await _handler(request, cancellationToken).ConfigureAwait(false);
        }

        private static string MediaTypeToString(MediaTypeWithQualityHeaderValue value)
        {
            return value.MediaType ?? string.Empty;
        }
    }
}


================================================
FILE: sdks/code-interpreter/csharp/tests/OpenSandbox.CodeInterpreter.Tests/FactoryTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.CodeInterpreter.Factory;
using OpenSandbox.Core;
using OpenSandbox;
using Microsoft.Extensions.Logging.Abstractions;
using Xunit;

namespace OpenSandbox.CodeInterpreter.Tests;

public class FactoryTests
{
    [Fact]
    public void DefaultCodeInterpreterAdapterFactory_Create_ReturnsInstance()
    {
        var factory = DefaultCodeInterpreterAdapterFactory.Create();

        Assert.NotNull(factory);
        Assert.IsType<DefaultCodeInterpreterAdapterFactory>(factory);
    }

    [Fact]
    public void DefaultCodeInterpreterAdapterFactory_CreateCodes_ThrowsOnNullOptions()
    {
        var factory = DefaultCodeInterpreterAdapterFactory.Create();

        Assert.Throws<InvalidArgumentException>(() => factory.CreateCodes(null!));
    }

    [Fact]
    public void DefaultCodeInterpreterAdapterFactory_CreateCodes_ThrowsOnNullConnectionConfig()
    {
        var factory = DefaultCodeInterpreterAdapterFactory.Create();
        var options = new CreateCodesStackOptions
        {
            ConnectionConfig = null!,
            ExecdBaseUrl = "http://localhost:44772",
            ExecdHeaders = new Dictionary<string, string>(),
            HttpClientProvider = new HttpClientProvider(new OpenSandbox.Config.ConnectionConfig(), NullLoggerFactory.Instance),
            LoggerFactory = NullLoggerFactory.Instance
        };

        Assert.Throws<InvalidArgumentException>(() => factory.CreateCodes(options));
    }

    [Fact]
    public void DefaultCodeInterpreterAdapterFactory_CreateCodes_ThrowsOnEmptyBaseUrl()
    {
        var factory = DefaultCodeInterpreterAdapterFactory.Create();

        var options = new CreateCodesStackOptions
        {
            ConnectionConfig = new OpenSandbox.Config.ConnectionConfig(),
            ExecdBaseUrl = "",
            ExecdHeaders = new Dictionary<string, string>(),
            HttpClientProvider = new HttpClientProvider(new OpenSandbox.Config.ConnectionConfig(), NullLoggerFactory.Instance),
            LoggerFactory = NullLoggerFactory.Instance
        };

        Assert.Throws<InvalidArgumentException>(() => factory.CreateCodes(options));
    }

    [Fact]
    public void CreateCodesStackOptions_RequiredProperties()
    {
        var options = new CreateCodesStackOptions
        {
            ConnectionConfig = new OpenSandbox.Config.ConnectionConfig(),
            ExecdBaseUrl = "http://test:8080",
            ExecdHeaders = new Dictionary<string, string> { ["X-Test"] = "value" },
            HttpClientProvider = new HttpClientProvider(new OpenSandbox.Config.ConnectionConfig(), NullLoggerFactory.Instance),
            LoggerFactory = NullLoggerFactory.Instance
        };

        Assert.NotNull(options.ConnectionConfig);
        Assert.Equal("http://test:8080", options.ExecdBaseUrl);
        Assert.Equal("value", options.ExecdHeaders["X-Test"]);
        Assert.NotNull(options.HttpClientProvider);
        Assert.NotNull(options.LoggerFactory);
    }
}


================================================
FILE: sdks/code-interpreter/csharp/tests/OpenSandbox.CodeInterpreter.Tests/ModelsTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Text.Json;
using OpenSandbox.CodeInterpreter.Models;
using Xunit;

namespace OpenSandbox.CodeInterpreter.Tests;

public class ModelsTests
{
    [Fact]
    public void SupportedLanguage_HasCorrectValues()
    {
        Assert.Equal("python", SupportedLanguage.Python);
        Assert.Equal("java", SupportedLanguage.Java);
        Assert.Equal("go", SupportedLanguage.Go);
        Assert.Equal("typescript", SupportedLanguage.TypeScript);
        Assert.Equal("javascript", SupportedLanguage.JavaScript);
        Assert.Equal("bash", SupportedLanguage.Bash);
    }

    [Fact]
    public void CodeContext_SerializesToJson()
    {
        var context = new CodeContext
        {
            Id = "ctx-123",
            Language = SupportedLanguage.Python
        };

        var json = JsonSerializer.Serialize(context);
        Assert.Contains("\"id\":\"ctx-123\"", json);
        Assert.Contains("\"language\":\"python\"", json);
    }

    [Fact]
    public void CodeContext_DeserializesFromJson()
    {
        var json = "{\"id\":\"ctx-456\",\"language\":\"javascript\"}";
        var context = JsonSerializer.Deserialize<CodeContext>(json);

        Assert.NotNull(context);
        Assert.Equal("ctx-456", context.Id);
        Assert.Equal("javascript", context.Language);
    }

    [Fact]
    public void CodeContext_DeserializesWithNullId()
    {
        var json = "{\"language\":\"python\"}";
        var context = JsonSerializer.Deserialize<CodeContext>(json);

        Assert.NotNull(context);
        Assert.Null(context.Id);
        Assert.Equal("python", context.Language);
    }

    [Fact]
    public void RunCodeRequest_SerializesToJson()
    {
        var request = new RunCodeRequest
        {
            Code = "print(\"hello\")",
            Context = new CodeContext
            {
                Id = "ctx-789",
                Language = SupportedLanguage.Python
            }
        };

        var json = JsonSerializer.Serialize(request);
        Assert.Contains("\"code\":", json);
        Assert.Contains("print", json);
        Assert.Contains("\"context\":", json);
        Assert.Contains("\"id\":\"ctx-789\"", json);
        Assert.Contains("\"language\":\"python\"", json);
    }

    [Fact]
    public void RunCodeRequest_DeserializesFromJson()
    {
        var json = "{\"code\":\"console.log('test')\",\"context\":{\"id\":\"ctx-abc\",\"language\":\"javascript\"}}";
        var request = JsonSerializer.Deserialize<RunCodeRequest>(json);

        Assert.NotNull(request);
        Assert.Equal("console.log('test')", request.Code);
        Assert.NotNull(request.Context);
        Assert.Equal("ctx-abc", request.Context.Id);
        Assert.Equal("javascript", request.Context.Language);
    }

    [Fact]
    public void RunCodeOptions_DefaultsAreNull()
    {
        var options = new RunCodeOptions();

        Assert.Null(options.Context);
        Assert.Null(options.Language);
        Assert.Null(options.Handlers);
    }

    [Fact]
    public void RunCodeOptions_CanSetProperties()
    {
        var context = new CodeContext { Language = SupportedLanguage.Go };
        var handlers = new OpenSandbox.Models.ExecutionHandlers();

        var options = new RunCodeOptions
        {
            Context = context,
            Handlers = handlers
        };

        Assert.Same(context, options.Context);
        Assert.Same(handlers, options.Handlers);
    }

    [Fact]
    public void RunCodeOptions_CanSetLanguageOnly()
    {
        var options = new RunCodeOptions
        {
            Language = SupportedLanguage.TypeScript
        };

        Assert.Null(options.Context);
        Assert.Equal(SupportedLanguage.TypeScript, options.Language);
    }
}


================================================
FILE: sdks/code-interpreter/csharp/tests/OpenSandbox.CodeInterpreter.Tests/OpenSandbox.CodeInterpreter.Tests.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <TargetFramework>net10.0</TargetFramework>
    <LangVersion>latest</LangVersion>
    <Nullable>enable</Nullable>
    <ImplicitUsings>enable</ImplicitUsings>
    <IsPackable>false</IsPackable>
    <IsTestProject>true</IsTestProject>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
    <PackageReference Include="xunit" Version="2.9.2" />
    <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
      <PrivateAssets>all</PrivateAssets>
    </PackageReference>
    <PackageReference Include="coverlet.collector" Version="6.0.2">
      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
      <PrivateAssets>all</PrivateAssets>
    </PackageReference>
  </ItemGroup>

  <ItemGroup>
    <ProjectReference Include="..\..\src\OpenSandbox.CodeInterpreter\OpenSandbox.CodeInterpreter.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: sdks/code-interpreter/javascript/.nvmrc
================================================
20


================================================
FILE: sdks/code-interpreter/javascript/README.md
================================================
# Alibaba Code Interpreter SDK for JavaScript/TypeScript

English | [中文](README_zh.md)

A TypeScript/JavaScript SDK for executing code in secure, isolated sandboxes. It provides a high-level API for running Python, Java, Go, TypeScript, and other languages safely, with support for code execution contexts.

## Prerequisites

This SDK requires a Docker image containing the Code Interpreter runtime environment. You must use the `opensandbox/code-interpreter` image (or a derivative) which includes pre-installed runtimes for Python, Java, Go, Node.js, etc.

For detailed information about supported languages and versions, please refer to the [Environment Documentation](../../../sandboxes/code-interpreter/README.md).

## Installation

### npm

```bash
npm install @alibaba-group/opensandbox-code-interpreter
```

### pnpm

```bash
pnpm add @alibaba-group/opensandbox-code-interpreter
```

### yarn

```bash
yarn add @alibaba-group/opensandbox-code-interpreter
```

## Quick Start

The following example demonstrates how to create a sandbox with a specific runtime configuration and execute a simple script.

> **Note**: Before running this example, ensure the OpenSandbox service is running. See the root [README.md](../../../README.md) for startup instructions.

```ts
import { ConnectionConfig, Sandbox } from "@alibaba-group/opensandbox";
import { CodeInterpreter, SupportedLanguages } from "@alibaba-group/opensandbox-code-interpreter";

// 1. Configure connection
const config = new ConnectionConfig({
  domain: "api.opensandbox.io",
  apiKey: "your-api-key",
});

// 2. Create a Sandbox with the code-interpreter image + runtime versions
const sandbox = await Sandbox.create({
  connectionConfig: config,
  image: "opensandbox/code-interpreter:v1.0.2",
  entrypoint: ["/opt/opensandbox/code-interpreter.sh"],
  env: {
    PYTHON_VERSION: "3.11",
    JAVA_VERSION: "17",
    NODE_VERSION: "20",
    GO_VERSION: "1.24",
  },
  timeoutSeconds: 15 * 60,
});

// 3. Create CodeInterpreter wrapper
const ci = await CodeInterpreter.create(sandbox);

// 4. Create an execution context (Python)
const ctx = await ci.codes.createContext(SupportedLanguages.PYTHON);

// 5. Run code
const result = await ci.codes.run("import sys\nprint(sys.version)\nresult = 2 + 2\nresult", {
  context: ctx,
});

// 6. Print output
console.log(result.result[0]?.text);

// 7. Cleanup remote instance (optional but recommended)
await sandbox.kill();
await sandbox.close();
```

## Runtime Configuration

### Docker Image

The Code Interpreter SDK relies on a specialized environment. Ensure your sandbox provider has the `opensandbox/code-interpreter` image available.

### Language Version Selection

You can specify the desired version of a programming language by setting the corresponding environment variable when creating the `Sandbox`.

| Language | Environment Variable | Example Value | Default (if unset) |
| --- | --- | --- | --- |
| Python | `PYTHON_VERSION` | `3.11` | Image default |
| Java | `JAVA_VERSION` | `17` | Image default |
| Node.js | `NODE_VERSION` | `20` | Image default |
| Go | `GO_VERSION` | `1.24` | Image default |

```ts
const sandbox = await Sandbox.create({
  connectionConfig: config,
  image: "opensandbox/code-interpreter:v1.0.2",
  entrypoint: ["/opt/opensandbox/code-interpreter.sh"],
  env: {
    JAVA_VERSION: "17",
    GO_VERSION: "1.24",
  },
});
```

## Usage Examples

### 0. Run with `language` (default language context)

If you don't need to manage explicit context IDs, you can run code by specifying only `language`.
When `context.id` is omitted, execd can create/reuse a default session for that language, so state can persist across runs.

```ts
import { SupportedLanguages } from "@alibaba-group/opensandbox-code-interpreter";

await ci.codes.run("x = 42", { language: SupportedLanguages.PYTHON });
const execution = await ci.codes.run("result = x\nresult", { language: SupportedLanguages.PYTHON });
console.log(execution.result[0]?.text); // "42"
```

### 0.1 Context management (list/get/delete)

You can manage contexts explicitly (aligned with Python/Kotlin SDKs):

```ts
const ctx = await ci.codes.createContext(SupportedLanguages.PYTHON);

const same = await ci.codes.getContext(ctx.id!);
console.log(same.id, same.language);

const all = await ci.codes.listContexts();
const pyOnly = await ci.codes.listContexts(SupportedLanguages.PYTHON);

await ci.codes.deleteContext(ctx.id!);
await ci.codes.deleteContexts(SupportedLanguages.PYTHON); // bulk cleanup
```

### 1. Java Code Execution

```ts
import { SupportedLanguages } from "@alibaba-group/opensandbox-code-interpreter";

const javaCtx = await ci.codes.createContext(SupportedLanguages.JAVA);
const execution = await ci.codes.run(
  [
    'System.out.println("Calculating sum...");',
    "int a = 10;",
    "int b = 20;",
    "int sum = a + b;",
    'System.out.println("Sum: " + sum);',
    "sum",
  ].join("\n"),
  { context: javaCtx },
);
console.log(execution.logs.stdout.map((m) => m.text));
```

### 2. Streaming Output Handling

Handle stdout/stderr and execution events in real-time.

```ts
import type { ExecutionHandlers } from "@alibaba-group/opensandbox";
import { SupportedLanguages } from "@alibaba-group/opensandbox-code-interpreter";

const handlers: ExecutionHandlers = {
  onStdout: (m) => console.log("STDOUT:", m.text),
  onStderr: (m) => console.error("STDERR:", m.text),
  onResult: (r) => console.log("RESULT:", r.text),
};

const pyCtx = await ci.codes.createContext(SupportedLanguages.PYTHON);
await ci.codes.run("import time\nfor i in range(5):\n    print(i)\n    time.sleep(0.2)", {
  context: pyCtx,
  handlers,
});
```

## Notes

- **Lifecycle**: `CodeInterpreter` wraps an existing `Sandbox` instance and reuses its connection configuration. Each sandbox instance clones the transport via `ConnectionConfig.withTransportIfMissing()`, so call `sandbox.close()` when you are finished to release the Node.js keep-alive agent and avoid leak.
- **Default context**: `codes.run(..., { language })` uses a language default context (state can persist across runs).


================================================
FILE: sdks/code-interpreter/javascript/README_zh.md
================================================
# Alibaba Code Interpreter JavaScript/TypeScript SDK

中文 | [English](README.md)

一个用于在安全、隔离的沙箱环境中执行代码的 TypeScript/JavaScript SDK。该 SDK 提供了高级 API，支持安全地运行 Python、Java、Go、TypeScript 等语言，并具备“代码执行上下文（Context）”能力。

## 前置要求

本 SDK 需要配合包含 Code Interpreter 运行时环境的特定 Docker 镜像使用。请务必使用 `opensandbox/code-interpreter` 镜像（或其衍生镜像），其中预装了 Python、Java、Go、Node.js 等语言的运行环境。

关于支持的语言与具体版本信息，请参考 [环境文档](../../../sandboxes/code-interpreter/README_zh.md)。

## 安装指南

### npm

```bash
npm install @alibaba-group/opensandbox-code-interpreter
```

### pnpm

```bash
pnpm add @alibaba-group/opensandbox-code-interpreter
```

### yarn

```bash
yarn add @alibaba-group/opensandbox-code-interpreter
```

## 快速开始

以下示例展示了如何创建带指定运行时配置的 Sandbox，并执行一段简单脚本。

> **注意**: 在运行此示例之前，请确保 OpenSandbox 服务已启动。服务启动请参考根目录的 [README_zh.md](../../../docs/README_zh.md)。

```ts
import { ConnectionConfig, Sandbox } from "@alibaba-group/opensandbox";
import { CodeInterpreter, SupportedLanguages } from "@alibaba-group/opensandbox-code-interpreter";

// 1. 配置连接信息
const config = new ConnectionConfig({
  domain: "api.opensandbox.io",
  apiKey: "your-api-key",
});

// 2. 创建 Sandbox（必须使用 code-interpreter 镜像），并指定语言版本
const sandbox = await Sandbox.create({
  connectionConfig: config,
  image: "opensandbox/code-interpreter:v1.0.2",
  entrypoint: ["/opt/opensandbox/code-interpreter.sh"],
  env: {
    PYTHON_VERSION: "3.11",
    JAVA_VERSION: "17",
    NODE_VERSION: "20",
    GO_VERSION: "1.24",
  },
  timeoutSeconds: 15 * 60,
});

// 3. 创建 CodeInterpreter 包装器
const ci = await CodeInterpreter.create(sandbox);

// 4. 创建执行上下文（Python）
const ctx = await ci.codes.createContext(SupportedLanguages.PYTHON);

// 5. 运行代码
const result = await ci.codes.run("import sys\nprint(sys.version)\nresult = 2 + 2\nresult", {
  context: ctx,
});

// 6. 打印输出
console.log(result.result[0]?.text);

// 7. 清理远程实例（可选，但推荐）
await sandbox.kill();
await sandbox.close();
```

## 运行时配置

### Docker 镜像

Code Interpreter SDK 依赖于特定的运行环境。请确保你的沙箱服务提供商支持 `opensandbox/code-interpreter` 镜像。

### 语言版本选择

你可以在创建 `Sandbox` 时通过环境变量指定所需的编程语言版本。

| 语言 | 环境变量 | 示例值 | 默认值（若不设置） |
| --- | --- | --- | --- |
| Python | `PYTHON_VERSION` | `3.11` | 镜像默认值 |
| Java | `JAVA_VERSION` | `17` | 镜像默认值 |
| Node.js | `NODE_VERSION` | `20` | 镜像默认值 |
| Go | `GO_VERSION` | `1.24` | 镜像默认值 |

```ts
const sandbox = await Sandbox.create({
  connectionConfig: config,
  image: "opensandbox/code-interpreter:v1.0.2",
  entrypoint: ["/opt/opensandbox/code-interpreter.sh"],
  env: {
    JAVA_VERSION: "17",
    GO_VERSION: "1.24",
  },
});
```

## 核心功能示例

### 0. 直接传 `language`（使用该语言默认上下文）

如果你不需要显式管理 context id，可以只传 `language` 来执行代码。
当 `context.id` 省略时，execd 可以为该语言创建/复用默认 session，因此状态可以跨次执行保持：

```ts
import { SupportedLanguages } from "@alibaba-group/opensandbox-code-interpreter";

await ci.codes.run("x = 42", { language: SupportedLanguages.PYTHON });
const execution = await ci.codes.run("result = x\nresult", { language: SupportedLanguages.PYTHON });
console.log(execution.result[0]?.text); // "42"
```

### 0.1 Context 管理（list/get/delete）

你也可以显式管理 context（与 Python/Kotlin SDK 对齐）：

```ts
const ctx = await ci.codes.createContext(SupportedLanguages.PYTHON);

const same = await ci.codes.getContext(ctx.id!);
console.log(same.id, same.language);

const all = await ci.codes.listContexts();
const pyOnly = await ci.codes.listContexts(SupportedLanguages.PYTHON);

await ci.codes.deleteContext(ctx.id!);
await ci.codes.deleteContexts(SupportedLanguages.PYTHON); // 批量清理
```

### 1. Java 代码执行

```ts
import { SupportedLanguages } from "@alibaba-group/opensandbox-code-interpreter";

const javaCtx = await ci.codes.createContext(SupportedLanguages.JAVA);
const execution = await ci.codes.run(
  [
    'System.out.println("Calculating sum...");',
    "int a = 10;",
    "int b = 20;",
    "int sum = a + b;",
    'System.out.println("Sum: " + sum);',
    "sum",
  ].join("\n"),
  { context: javaCtx },
);
console.log(execution.logs.stdout.map((m) => m.text));
```

### 2. 流式输出处理

实时处理 stdout/stderr 等事件。

```ts
import type { ExecutionHandlers } from "@alibaba-group/opensandbox";
import { SupportedLanguages } from "@alibaba-group/opensandbox-code-interpreter";

const handlers: ExecutionHandlers = {
  onStdout: (m) => console.log("STDOUT:", m.text),
  onStderr: (m) => console.error("STDERR:", m.text),
  onResult: (r) => console.log("RESULT:", r.text),
};

const pyCtx = await ci.codes.createContext(SupportedLanguages.PYTHON);
await ci.codes.run("import time\nfor i in range(5):\n    print(i)\n    time.sleep(0.2)", {
  context: pyCtx,
  handlers,
});
```

## 说明

- **生命周期**：`CodeInterpreter` 基于既有的 `Sandbox` 实例进行包装，并复用其连接配置。SDK 会通过 `ConnectionConfig.withTransportIfMissing()` 为每个实例复刻 Transport，完成交互后请调用 `sandbox.close()` 释放 Node.js 的 keep-alive agent，以避免资源泄漏。
- **默认上下文**：`codes.run(..., { language })` 会使用语言默认 context（同语言的状态可跨次执行保持）。


================================================
FILE: sdks/code-interpreter/javascript/eslint.config.mjs
================================================
import path from "node:path";
import { fileURLToPath } from "node:url";
import { createBaseConfig } from "../../eslint.base.mjs";

const __dirname = path.dirname(fileURLToPath(import.meta.url));

export default createBaseConfig({
  tsconfigRootDir: __dirname,
  tsconfigPath: "./tsconfig.json",
  extraIgnores: ["src/**/*.d.ts", "src/**/*.js"],
});


================================================
FILE: sdks/code-interpreter/javascript/package.json
================================================
{
  "name": "@alibaba-group/opensandbox-code-interpreter",
  "version": "0.1.3",
  "description": "OpenSandbox Code Interpreter TypeScript/JavaScript SDK",
  "license": "Apache-2.0",
  "type": "module",
  "main": "./dist/index.js",
  "types": "./dist/index.d.ts",
  "exports": {
    ".": {
      "types": "./dist/index.d.ts",
      "import": "./dist/index.js",
      "require": "./dist/cjs/index.cjs",
      "default": "./dist/index.js"
    }
  },
  "browser": "./dist/index.js",
  "sideEffects": false,
  "repository": {
    "type": "git",
    "url": "https://github.com/alibaba/OpenSandbox.git"
  },
  "bugs": {
    "url": "https://github.com/alibaba/OpenSandbox/issues"
  },
  "homepage": "https://open-sandbox.ai",
  "files": [
    "dist",
    "src"
  ],
  "engines": {
    "node": ">=20"
  },
  "packageManager": "pnpm@9.15.0",
  "scripts": {
    "build": "tsup",
    "test": "pnpm run build && node --test tests/*.test.mjs",
    "lint": "eslint src --max-warnings 0",
    "clean": "rm -rf dist"
  },
  "dependencies": {
    "@alibaba-group/opensandbox": "workspace:^"
  },
  "devDependencies": {
    "@eslint/js": "^9.39.2",
    "eslint": "^9.39.2",
    "tsup": "^8.5.0",
    "typescript": "^5.7.2",
    "typescript-eslint": "^8.52.0"
  }
}


================================================
FILE: sdks/code-interpreter/javascript/src/adapters/codesAdapter.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { ExecdClient, ExecdPaths } from "@alibaba-group/opensandbox/internal";
import type { ServerStreamEvent } from "@alibaba-group/opensandbox";
import type { Execution, ExecutionHandlers } from "@alibaba-group/opensandbox";
import {
  ExecutionEventDispatcher,
  InvalidArgumentException,
} from "@alibaba-group/opensandbox";

import type { Codes } from "../services/codes.js";
import type { CodeContext, SupportedLanguage } from "../models.js";
import { throwOnOpenApiFetchError } from "./openapiError.js";
import { parseJsonEventStream } from "./sse.js";

type ApiCreateContextRequest =
  ExecdPaths["/code/context"]["post"]["requestBody"]["content"]["application/json"];
type ApiCreateContextOk =
  ExecdPaths["/code/context"]["post"]["responses"][200]["content"]["application/json"];
type ApiGetContextOk =
  ExecdPaths["/code/contexts/{context_id}"]["get"]["responses"][200]["content"]["application/json"];
type ApiListContextsOk =
  ExecdPaths["/code/contexts"]["get"]["responses"][200]["content"]["application/json"];
type ApiRunCodeRequest =
  ExecdPaths["/code"]["post"]["requestBody"]["content"]["application/json"];

/**
 * Single-layer codes adapter for the Code Interpreter SDK.
 *
 * - Handles HTTP/SSE streaming via the underlying execd adapter
 * - Builds the structured {@link Execution} result for `run(...)`
 */
function joinUrl(baseUrl: string, pathname: string): string {
  const base = baseUrl.endsWith("/") ? baseUrl.slice(0, -1) : baseUrl;
  const path = pathname.startsWith("/") ? pathname : `/${pathname}`;
  return `${base}${path}`;
}

export class CodesAdapter implements Codes {
  private readonly fetch: typeof fetch;

  constructor(
    private readonly client: ExecdClient,
    private readonly opts: { baseUrl: string; fetch?: typeof fetch; headers?: Record<string, string> },
  ) {
    this.fetch = opts.fetch ?? fetch;
  }

  async createContext(language: SupportedLanguage): Promise<CodeContext> {
    const body: ApiCreateContextRequest = { language };
    const { data, error, response } = await this.client.POST("/code/context", {
      body,
    });
    throwOnOpenApiFetchError({ error, response }, "Create code context failed");
    const ok = data as ApiCreateContextOk | undefined;
    if (!ok || typeof ok !== "object") {
      throw new Error("Create code context failed: unexpected response shape");
    }
    if (typeof ok.language !== "string" || !ok.language) {
      throw new Error("Create code context failed: missing language");
    }
    return { id: ok.id, language: ok.language };
  }

  async getContext(contextId: string): Promise<CodeContext> {
    if (!contextId?.trim()) {
      throw new InvalidArgumentException({ message: "contextId cannot be empty" });
    }
    const { data, error, response } = await this.client.GET("/code/contexts/{context_id}", {
      params: { path: { context_id: contextId } },
    });
    throwOnOpenApiFetchError({ error, response }, "Get code context failed");
    const ok = data as ApiGetContextOk | undefined;
    if (!ok || typeof ok !== "object") {
      throw new Error("Get code context failed: unexpected response shape");
    }
    if (typeof (ok as any).language !== "string" || !(ok as any).language) {
      throw new Error("Get code context failed: missing language");
    }
    return { id: (ok as any).id, language: (ok as any).language };
  }

  async listContexts(language?: SupportedLanguage): Promise<CodeContext[]> {
    const { data, error, response } = await this.client.GET("/code/contexts", {
      params: language ? { query: { language } } : undefined,
    } as any);
    throwOnOpenApiFetchError({ error, response }, "List code contexts failed");
    const ok = data as ApiListContextsOk | undefined;
    if (!Array.isArray(ok)) {
      throw new Error("List code contexts failed: unexpected response shape");
    }
    return ok
      .filter((c) => c && typeof c === "object")
      .map((c: any) => ({ id: c.id, language: c.language as any }));
  }

  async deleteContext(contextId: string): Promise<void> {
    if (!contextId?.trim()) {
      throw new InvalidArgumentException({ message: "contextId cannot be empty" });
    }
    const { error, response } = await this.client.DELETE("/code/contexts/{context_id}", {
      params: { path: { context_id: contextId } },
    });
    throwOnOpenApiFetchError({ error, response }, "Delete code context failed");
  }

  async deleteContexts(language: SupportedLanguage): Promise<void> {
    const { error, response } = await this.client.DELETE("/code/contexts", {
      params: { query: { language } },
    });
    throwOnOpenApiFetchError({ error, response }, "Delete code contexts failed");
  }

  async interrupt(contextId: string): Promise<void> {
    const { error, response } = await this.client.DELETE("/code", {
      params: { query: { id: contextId } },
    });
    throwOnOpenApiFetchError({ error, response }, "Interrupt code failed");
  }

  async *runStream(req: ApiRunCodeRequest, signal?: AbortSignal): AsyncIterable<ServerStreamEvent> {
    const url = joinUrl(this.opts.baseUrl, "/code");
    const body = JSON.stringify(req);
    const res = await this.fetch(url, {
      method: "POST",
      headers: {
        "accept": "text/event-stream",
        "content-type": "application/json",
        ...(this.opts.headers ?? {}),
      },
      body,
      signal,
    });

    for await (const ev of parseJsonEventStream<ServerStreamEvent>(res, { fallbackErrorMessage: "Run code failed" })) {
      yield ev;
    }
  }

  async run(
    code: string,
    opts: { context?: CodeContext; language?: SupportedLanguage; handlers?: ExecutionHandlers; signal?: AbortSignal } = {},
  ): Promise<Execution> {
    if (!code.trim()) {
      throw new InvalidArgumentException({ message: "Code cannot be empty" });
    }

    if (opts.context && opts.language) {
      throw new InvalidArgumentException({ message: "Provide either opts.context or opts.language, not both" });
    }

    const context: CodeContext =
      opts.context ??
      (opts.language
        ? { language: opts.language }
        : { language: "python" });

    // Make the OpenAPI contract explicit so backend schema changes surface quickly.
    const req: ApiRunCodeRequest = {
      code,
      context: { id: context.id, language: context.language },
    };

    const execution: Execution = {
      logs: { stdout: [], stderr: [] },
      result: [],
    };
    const dispatcher = new ExecutionEventDispatcher(execution, opts.handlers);

    for await (const ev of this.runStream(req, opts.signal)) {
      await dispatcher.dispatch(ev as any);
    }

    return execution;
  }
}

================================================
FILE: sdks/code-interpreter/javascript/src/adapters/openapiError.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { SandboxApiException, SandboxError } from "@alibaba-group/opensandbox";

export function throwOnOpenApiFetchError(
  result: { error?: unknown; response: Response },
  fallbackMessage: string,
): void {
  if (!result.error) return;

  const requestId = result.response.headers.get("x-request-id") ?? undefined;
  const status = (result.response as any).status ?? 0;

  const err = result.error as any;
  const message =
    err?.message ??
    err?.error?.message ??
    fallbackMessage;

  const code = err?.code ?? err?.error?.code;
  const msg = err?.message ?? err?.error?.message ?? message;

  throw new SandboxApiException({
    message: msg,
    statusCode: status,
    requestId,
    error: code
      ? new SandboxError(String(code), String(msg ?? ""))
      : new SandboxError(SandboxError.UNEXPECTED_RESPONSE, String(msg ?? "")),
    rawBody: result.error,
  });
}

================================================
FILE: sdks/code-interpreter/javascript/src/adapters/sse.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { SandboxApiException, SandboxError } from "@alibaba-group/opensandbox";

function tryParseJson(line: string): unknown | undefined {
  try {
    return JSON.parse(line);
  } catch {
    return undefined;
  }
}

/**
 * Parses an SSE-like stream that may be either:
 * - standard SSE frames (`data: {...}\n\n`)
 * - newline-delimited JSON (one JSON object per line)
 */
export async function* parseJsonEventStream<T>(
  res: Response,
  opts?: { fallbackErrorMessage?: string },
): AsyncIterable<T> {
  if (!res.ok) {
    const text = await res.text().catch(() => "");
    const parsed = tryParseJson(text);
    const err = parsed && typeof parsed === "object" ? (parsed as any) : undefined;
    const requestId = res.headers.get("x-request-id") ?? undefined;
    const message = err?.message ?? opts?.fallbackErrorMessage ?? `Stream request failed (status=${res.status})`;
    const code = err?.code ? String(err.code) : SandboxError.UNEXPECTED_RESPONSE;
    throw new SandboxApiException({
      message,
      statusCode: res.status,
      requestId,
      error: new SandboxError(code, err?.message ? String(err.message) : message),
      rawBody: parsed ?? text,
    });
  }

  if (!res.body) return;

  const reader = res.body.getReader();
  const decoder = new TextDecoder("utf-8");
  let buf = "";

  while (true) {
    const { value, done } = await reader.read();
    if (done) break;

    buf += decoder.decode(value, { stream: true });
    let idx: number;

    while ((idx = buf.indexOf("\n")) >= 0) {
      const rawLine = buf.slice(0, idx);
      buf = buf.slice(idx + 1);

      const line = rawLine.trim();
      if (!line) continue;

      // Support standard SSE "data:" prefix
      if (line.startsWith(":")) continue;
      if (line.startsWith("event:") || line.startsWith("id:") || line.startsWith("retry:")) continue;

      const jsonLine = line.startsWith("data:") ? line.slice("data:".length).trim() : line;
      if (!jsonLine) continue;

      const parsed = tryParseJson(jsonLine);
      if (!parsed) continue;
      yield parsed as T;
    }
  }

  // flush last line if exists
  const last = buf.trim();
  if (last) {
    const jsonLine = last.startsWith("data:") ? last.slice("data:".length).trim() : last;
    const parsed = tryParseJson(jsonLine);
    if (parsed) yield parsed as T;
  }
}

================================================
FILE: sdks/code-interpreter/javascript/src/factory/adapterFactory.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { Sandbox } from "@alibaba-group/opensandbox";
import type { Codes } from "../services/codes.js";

export interface CreateCodesStackOptions {
  sandbox: Sandbox;
  execdBaseUrl: string;
  endpointHeaders?: Record<string, string>;
}

/**
 * Factory abstraction for Code Interpreter SDK to decouple from concrete adapters/clients.
 */
export interface AdapterFactory {
  createCodes(opts: CreateCodesStackOptions): Codes;
}


================================================
FILE: sdks/code-interpreter/javascript/src/factory/defaultAdapterFactory.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { createExecdClient } from "@alibaba-group/opensandbox/internal";
import type { AdapterFactory, CreateCodesStackOptions } from "./adapterFactory.js";
import { CodesAdapter } from "../adapters/codesAdapter.js";
import type { Codes } from "../services/codes.js";

export class DefaultAdapterFactory implements AdapterFactory {
  createCodes(opts: CreateCodesStackOptions): Codes {
    const headers: Record<string, string> = {
      ...(opts.sandbox.connectionConfig.headers ?? {}),
      ...(opts.endpointHeaders ?? {}),
    };
    const client = createExecdClient({
      baseUrl: opts.execdBaseUrl,
      headers,
      fetch: opts.sandbox.connectionConfig.fetch,
    });

    return new CodesAdapter(client, {
      baseUrl: opts.execdBaseUrl,
      headers,
      // Streaming calls (SSE) use a dedicated fetch, aligned with Kotlin/Python SDKs.
      fetch: opts.sandbox.connectionConfig.sseFetch,
    });
  }
}

export function createDefaultAdapterFactory(): AdapterFactory {
  return new DefaultAdapterFactory();
}


================================================
FILE: sdks/code-interpreter/javascript/src/index.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

export { CodeInterpreter } from "./interpreter.js";
export type { CodeInterpreterCreateOptions } from "./interpreter.js";

export type { AdapterFactory } from "./factory/adapterFactory.js";
export { DefaultAdapterFactory, createDefaultAdapterFactory } from "./factory/defaultAdapterFactory.js";

export type { CodeContext, SupportedLanguage } from "./models.js";
export { SupportedLanguage as SupportedLanguages } from "./models.js";

export type { Codes } from "./services/codes.js";

export type {
  Execution,
  ExecutionComplete,
  ExecutionError,
  ExecutionHandlers,
  ExecutionInit,
  ExecutionResult,
  OutputMessage,
} from "@alibaba-group/opensandbox";

================================================
FILE: sdks/code-interpreter/javascript/src/interpreter.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { DEFAULT_EXECD_PORT } from "@alibaba-group/opensandbox";
import type { Sandbox } from "@alibaba-group/opensandbox";

import { createDefaultAdapterFactory } from "./factory/defaultAdapterFactory.js";
import type { AdapterFactory } from "./factory/adapterFactory.js";
import type { Codes } from "./services/codes.js";

export interface CodeInterpreterCreateOptions {
  adapterFactory?: AdapterFactory;
}

/**
 * Code interpreter facade (JS/TS).
 *
 * This class wraps an existing {@link Sandbox} and provides a high-level API for code execution.
 *
 * - Use {@link codes} to create contexts and run code.
 * - {@link files}, {@link commands}, and {@link metrics} are exposed for convenience and are
 *   the same instances as on the underlying {@link Sandbox}.
 */
export class CodeInterpreter {
  private constructor(
    readonly sandbox: Sandbox,
    readonly codes: Codes,
  ) {}

  static async create(sandbox: Sandbox, opts: CodeInterpreterCreateOptions = {}): Promise<CodeInterpreter> {
    const endpoint = await sandbox.getEndpoint(DEFAULT_EXECD_PORT);
    const execdBaseUrl = `${sandbox.connectionConfig.protocol}://${endpoint.endpoint}`;
    const adapterFactory = opts.adapterFactory ?? createDefaultAdapterFactory();
    const codes = adapterFactory.createCodes({
      sandbox,
      execdBaseUrl,
      endpointHeaders: endpoint.headers,
    });

    return new CodeInterpreter(sandbox, codes);
  }

  get id() {
    return this.sandbox.id;
  }

  get files() {
    return this.sandbox.files;
  }

  get commands() {
    return this.sandbox.commands;
  }

  get metrics() {
    return this.sandbox.metrics;
  }
}


================================================
FILE: sdks/code-interpreter/javascript/src/models.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

export const SupportedLanguage = {
  PYTHON: "python",
  JAVA: "java",
  GO: "go",
  TYPESCRIPT: "typescript",
  JAVASCRIPT: "javascript",
  BASH: "bash",
} as const;

export type SupportedLanguage =
  (typeof SupportedLanguage)[keyof typeof SupportedLanguage];

export interface CodeContext {
  id?: string;
  language: SupportedLanguage | (string & {});
}

export interface RunCodeRequest {
  code: string;
  context: CodeContext;
}

================================================
FILE: sdks/code-interpreter/javascript/src/services/codes.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { ServerStreamEvent } from "@alibaba-group/opensandbox";
import type { Execution, ExecutionHandlers } from "@alibaba-group/opensandbox";
import type { CodeContext, RunCodeRequest, SupportedLanguage } from "../models.js";

export interface Codes {
  createContext(language: SupportedLanguage): Promise<CodeContext>;
  /**
   * Get an existing context by id.
   */
  getContext(contextId: string): Promise<CodeContext>;
  /**
   * List active contexts. If language is provided, filters by language/runtime.
   */
  listContexts(language?: SupportedLanguage): Promise<CodeContext[]>;
  /**
   * Delete a context by id.
   */
  deleteContext(contextId: string): Promise<void>;
  /**
   * Delete all contexts under the specified language/runtime.
   */
  deleteContexts(language: SupportedLanguage): Promise<void>;

  run(
    code: string,
    opts?: { context?: CodeContext; language?: SupportedLanguage; handlers?: ExecutionHandlers; signal?: AbortSignal },
  ): Promise<Execution>;

  runStream(
    req: RunCodeRequest,
    signal?: AbortSignal,
  ): AsyncIterable<ServerStreamEvent>;

  interrupt(contextId: string): Promise<void>;
}

================================================
FILE: sdks/code-interpreter/javascript/tests/defaultAdapterFactory.headers.test.mjs
================================================
import assert from "node:assert/strict";
import test from "node:test";

import { DefaultAdapterFactory } from "../dist/index.js";

test("DefaultAdapterFactory merges sandbox and endpoint headers for code requests", async () => {
  const recorded = [];
  const fetchImpl = async (input, init = {}) => {
    const request = input instanceof Request ? input : new Request(input, init);
    const url = new URL(request.url);
    const headers = Object.fromEntries(request.headers.entries());
    recorded.push({
      url: request.url,
      method: request.method,
      headers,
    });

    if (url.pathname === "/code/context") {
      return new Response(JSON.stringify({ id: "ctx-1", language: "python" }), {
        status: 200,
        headers: { "content-type": "application/json" },
      });
    }

    return new Response(
      [
        JSON.stringify({ type: "stdout", text: "hello", timestamp: 1 }),
        JSON.stringify({ type: "execution_complete", execution_time: 2, timestamp: 2 }),
      ].join("\n"),
      {
        status: 200,
        headers: { "content-type": "text/event-stream" },
      }
    );
  };

  const sandbox = {
    connectionConfig: {
      headers: { "x-global": "global" },
      fetch: fetchImpl,
      sseFetch: fetchImpl,
    },
  };

  const factory = new DefaultAdapterFactory();
  const codes = factory.createCodes({
    sandbox,
    execdBaseUrl: "http://sandbox.internal:3456",
    endpointHeaders: { "x-endpoint": "endpoint" },
  });

  const context = await codes.createContext("python");
  assert.equal(context.id, "ctx-1");

  const execution = await codes.run("print('hello')");
  assert.equal(execution.logs.stdout[0]?.text, "hello");

  assert.equal(recorded.length, 2);
  assert.equal(recorded[0].url, "http://sandbox.internal:3456/code/context");
  assert.equal(recorded[0].headers["x-global"], "global");
  assert.equal(recorded[0].headers["x-endpoint"], "endpoint");
  assert.equal(recorded[1].url, "http://sandbox.internal:3456/code");
  assert.equal(recorded[1].headers["x-global"], "global");
  assert.equal(recorded[1].headers["x-endpoint"], "endpoint");
  assert.equal(recorded[1].headers.accept, "text/event-stream");
});


================================================
FILE: sdks/code-interpreter/javascript/tests/interpreter.headers.test.mjs
================================================
import assert from "node:assert/strict";
import test from "node:test";

import { CodeInterpreter } from "../dist/index.js";
import { DEFAULT_EXECD_PORT } from "../../../sandbox/javascript/dist/index.js";

test("CodeInterpreter.create forwards endpoint headers to adapter factory", async () => {
  const calls = [];
  const sandbox = {
    connectionConfig: {
      protocol: "https",
      headers: { "x-global": "global" },
    },
    async getEndpoint(port) {
      assert.equal(port, DEFAULT_EXECD_PORT);
      return {
        endpoint: "sandbox.internal:3456",
        headers: { "x-endpoint": "endpoint" },
      };
    },
  };
  const codes = { kind: "codes" };
  const adapterFactory = {
    createCodes(opts) {
      calls.push(opts);
      return codes;
    },
  };

  const interpreter = await CodeInterpreter.create(sandbox, { adapterFactory });

  assert.equal(interpreter.codes, codes);
  assert.equal(calls.length, 1);
  assert.equal(calls[0].execdBaseUrl, "https://sandbox.internal:3456");
  assert.deepEqual(calls[0].endpointHeaders, { "x-endpoint": "endpoint" });
});


================================================
FILE: sdks/code-interpreter/javascript/tsconfig.json
================================================
{
  "extends": "../../tsconfig.base.json",
  "compilerOptions": {
    "outDir": "dist",
    "rootDir": "src"
  },
  "include": ["src"],
  "exclude": ["node_modules", "dist", "**/*.test.ts"]
}

================================================
FILE: sdks/code-interpreter/javascript/tsup.config.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { defineConfig } from "tsup";

const entries = ["src/index.ts"];

export default defineConfig([
  {
    entry: entries,
    format: ["esm"],
    dts: true,
    outDir: "dist",
    clean: true,
    sourcemap: true,
    target: "es2022",
  },
  {
    entry: entries,
    format: ["cjs"],
    outDir: "dist/cjs",
    clean: false,
    sourcemap: true,
    target: "es2022",
    outExtension: () => ({ js: ".cjs" }),
  },
]);


================================================
FILE: sdks/code-interpreter/kotlin/LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: sdks/code-interpreter/kotlin/README.md
================================================
# Alibaba Code Interpreter SDK for Kotlin

English | [中文](README_zh.md)

A powerful Kotlin SDK for executing code in secure, isolated sandboxes. This SDK provides a high-level API for running Python, Java, Go, TypeScript, and other languages safely, with support for code execution contexts.

## Prerequisites

This SDK requires a specific Docker image containing the Code Interpreter runtime environment. You must use the `opensandbox/code-interpreter` image (or a derivative) which includes pre-installed runtimes for Python, Java, Go, Node.js, etc.

## Installation

### Gradle (Kotlin DSL)

```kotlin
dependencies {
    implementation("com.alibaba.opensandbox:code-interpreter:{latest_version}")
}
```

### Maven

```xml
<dependency>
    <groupId>com.alibaba.opensandbox</groupId>
    <artifactId>code-interpreter</artifactId>
    <version>{latest_version}</version>
</dependency>
```

## Quick Start

The following example demonstrates how to initialize the client with a specific Python version and execute a simple script.

```java
import com.alibaba.opensandbox.codeinterpreter.CodeInterpreter;
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.CodeContext;
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.Execution;
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.RunCodeRequest;
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.SupportedLanguage;
import com.alibaba.opensandbox.sandbox.Sandbox;
import com.alibaba.opensandbox.sandbox.config.ConnectionConfig;
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException;

public class QuickStart {
    public static void main(String[] args) {
        // 1. Configure connection
        ConnectionConfig config = ConnectionConfig.builder()
            .domain("api.opensandbox.io")
            .apiKey("your-api-key")
            .build();

        // 2. Create a Sandbox with specific runtime configuration
        // Note: You must use the code-interpreter image
        // Use try-with-resources to ensure sandbox is closed
        try (Sandbox sandbox = Sandbox.builder()
                .connectionConfig(config)
                .image("opensandbox/code-interpreter:v1.0.2")
                .entrypoint("/opt/opensandbox/code-interpreter.sh")
                .env("PYTHON_VERSION", "3.11") // Select specific language version
                .build()) {

            // 3. Create CodeInterpreter wrapper
            CodeInterpreter interpreter = CodeInterpreter.builder()
                .fromSandbox(sandbox)
                .build();

            // 4. Create an execution context (Python)
            CodeContext context = interpreter.codes().createContext(SupportedLanguage.PYTHON);

            // 5. Run code
            Execution result = interpreter.codes().run(
                RunCodeRequest.builder()
                    .code("import sys; print(f'Running on Python {sys.version}')")
                    .context(context)
                    .build()
            );

            // 6. Print output
            if (!result.getLogs().getStdout().isEmpty()) {
                System.out.println(result.getLogs().getStdout().get(0).getText());
            }

            // 7. Cleanup
            // Note: kill() terminates the remote instance; close() (auto-called) cleans up local resources
            sandbox.kill();
        } catch (SandboxException e) {
            // Handle Sandbox specific exceptions
            System.err.println("Sandbox Error: [" + e.getError().getCode() + "] " + e.getError().getMessage());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
```

## Runtime Configuration

### Docker Image

The Code Interpreter SDK relies on a specialized environment. Ensure your sandbox provider has the `opensandbox/code-interpreter` image available.

For detailed information about supported languages and versions, please refer to the [Environment Documentation](../../../sandboxes/code-interpreter/README.md).

### Language Version Selection

You can specify the desired version of a programming language by setting the corresponding environment variable when building the `Sandbox`.

| Language | Environment Variable | Example Value | Default (if unset) |
| -------- | -------------------- | ------------- | ------------------ |
| Python   | `PYTHON_VERSION`     | `3.11`        | Image default      |
| Java     | `JAVA_VERSION`       | `17`          | Image default      |
| Node.js  | `NODE_VERSION`       | `20`          | Image default      |
| Go       | `GO_VERSION`         | `1.24`        | Image default      |

```java
Sandbox sandbox = Sandbox.builder()
    .image("opensandbox/code-interpreter:v1.0.2")
    .entrypoint("/opt/opensandbox/code-interpreter.sh")
    .env("JAVA_VERSION", "17")
    .env("GO_VERSION", "1.23")
    .build();
```

## Usage Examples

### 0. Run with `language` (default language context)

If you don't need to manage explicit session IDs, you can run code by specifying only `language`.
When `context.id` is omitted, **execd will create/reuse a default session for that language**, so
state can persist across runs:

```java
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.SupportedLanguage;

// Default Python context: state persists across runs
interpreter.codes().run("x = 42", SupportedLanguage.PYTHON);
Execution execution = interpreter.codes().run("result = x\nresult", SupportedLanguage.PYTHON);
System.out.println(execution.getResult().get(0).getText()); // 42
```

### 1. Java Code Execution

Execute Java code snippets dynamically.

```java
CodeContext javaContext = interpreter.codes().createContext(SupportedLanguage.JAVA);

RunCodeRequest request = RunCodeRequest.builder()
    .code(
        "System.out.println(\"Calculating sum...\");\n" +
        "int a = 10;\n" +
        "int b = 20;\n" +
        "int sum = a + b;\n" +
        "System.out.println(\"Sum: \" + sum);\n" +
        "sum" // Return value
    )
    .context(javaContext)
    .build();

Execution execution = interpreter.codes().run(request);

// Handle results
System.out.println("Execution ID: " + execution.getId());
execution.getLogs().getStdout().forEach(log -> System.out.println(log.getText()));
```

### 2. Python with State Persistence

Variables defined in one execution are available in subsequent executions within the same context.

```java
CodeContext pythonContext = interpreter.codes().createContext(SupportedLanguage.PYTHON);

// Step 1: Define variables
RunCodeRequest step1 = RunCodeRequest.builder()
    .code(
        "users = ['Alice', 'Bob', 'Charlie']\n" +
        "print(f'Initialized {len(users)} users')"
    )
    .context(pythonContext)
    .build();
interpreter.codes().run(step1);

// Step 2: Use variables from previous step
RunCodeRequest step2 = RunCodeRequest.builder()
    .code(
        "users.append('Dave')\n" +
        "print(f'Updated users: {users}')"
    )
    .context(pythonContext)
    .build();

Execution result = interpreter.codes().run(step2);
// Output: Updated users: ['Alice', 'Bob', 'Charlie', 'Dave']
```

### 3. Streaming Output Handling

Handle standard output, error output, and execution events in real-time.

```java
ExecutionHandlers handlers = ExecutionHandlers.builder()
    .onStdout(msg -> System.out.println("STDOUT: " + msg.getText()))
    .onStderr(msg -> System.err.println("STDERR: " + msg.getText()))
    .onResult(res -> System.out.println("Result: " + res.getText()))
    .onError(err -> System.err.println("Error: " + err.getValue()))
    .onExecutionComplete(complete ->
        System.out.println("Finished in " + complete.getExecutionTimeInMillis() + "ms")
    )
    .build();

RunCodeRequest request = RunCodeRequest.builder()
    .code("import time\nfor i in range(5):\n    print(i)\n    time.sleep(0.5)")
    .context(pythonContext)
    .handlers(handlers)
    .build();

interpreter.codes().run(request);
```

### 4. Multi-Language Context Isolation

Different languages run in isolated environments.

```java
CodeContext pyCtx = interpreter.codes().createContext(SupportedLanguage.PYTHON);
CodeContext goCtx = interpreter.codes().createContext(SupportedLanguage.GO);

// Python Context
interpreter.codes().run(
    RunCodeRequest.builder()
        .code("print('Running in Python')")
        .context(pyCtx)
        .build()
);

// Go Context
interpreter.codes().run(
    RunCodeRequest.builder()
        .code(
            "package main\n" +
            "func main() { println(\"Running in Go\") }"
        )
        .context(goCtx)
        .build()
);
```


================================================
FILE: sdks/code-interpreter/kotlin/README_zh.md
================================================
# Alibaba Code Interpreter SDK for Kotlin

中文 | [English](README.md)

一个用于在安全、隔离的沙箱环境中执行代码的 Kotlin SDK。该 SDK 提供了高级 API，支持安全地运行 Python、Java、Go、TypeScript 等语言，并具备代码执行上下文（Context）能力。

## 前置要求

本 SDK 需要配合包含 Code Interpreter 运行时环境的特定 Docker 镜像使用。请务必使用 `opensandbox/code-interpreter` 镜像（或其衍生镜像），其中预装了 Python、Java、Go、Node.js 等语言的运行环境。

## 安装指南

### Gradle (Kotlin DSL)

```kotlin
dependencies {
    implementation("com.alibaba.opensandbox:code-interpreter:{latest_version}")
}
```

### Maven

```xml
<dependency>
    <groupId>com.alibaba.opensandbox</groupId>
    <artifactId>code-interpreter</artifactId>
    <version>{latest_version}</version>
</dependency>
```

## 快速开始

以下示例展示了如何初始化客户端，指定 Python 版本并执行一段简单的脚本。

```java
import com.alibaba.opensandbox.codeinterpreter.CodeInterpreter;
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.CodeContext;
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.Execution;
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.RunCodeRequest;
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.SupportedLanguage;
import com.alibaba.opensandbox.sandbox.Sandbox;
import com.alibaba.opensandbox.sandbox.config.ConnectionConfig;
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException;

public class QuickStart {
    public static void main(String[] args) {
        // 1. 配置连接信息
        ConnectionConfig config = ConnectionConfig.builder()
            .domain("api.opensandbox.io")
            .apiKey("your-api-key")
            .build();

        // 2. 创建 Sandbox 实例
        // 注意: 必须使用 code-interpreter 专用镜像
        // 使用 try-with-resources 确保资源正确关闭
        try (Sandbox sandbox = Sandbox.builder()
                .connectionConfig(config)
                .image("sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2")
                .entrypoint("/opt/opensandbox/code-interpreter.sh")
                .env("PYTHON_VERSION", "3.11") // 指定语言版本
                .build()) {

            // 3. 创建 CodeInterpreter 包装器
            CodeInterpreter interpreter = CodeInterpreter.builder()
                .fromSandbox(sandbox)
                .build();

            // 4. 创建执行上下文 (Python)
            CodeContext context = interpreter.codes().createContext(SupportedLanguage.PYTHON);

            // 5. 运行代码
            Execution result = interpreter.codes().run(
                RunCodeRequest.builder()
                    .code("import sys; print(f'Running on Python {sys.version}')")
                    .context(context)
                    .build()
            );

            // 6. 打印输出
            if (!result.getLogs().getStdout().isEmpty()) {
                System.out.println(result.getLogs().getStdout().get(0).getText());
            }

            // 7. 清理资源
            // 注意: kill() 会立即终止远程沙箱实例；try-with-resources 会自动调用 close() 清理本地资源
            sandbox.kill();
        } catch (SandboxException e) {
            // 处理 Sandbox 特定异常
            System.err.println("沙箱错误: [" + e.getError().getCode() + "] " + e.getError().getMessage());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
```

## 运行时配置

### Docker 镜像

Code Interpreter SDK 依赖于特定的运行环境。请确保你的沙箱服务提供商支持 `opensandbox/code-interpreter` 镜像。

关于支持的语言和具体版本的详细信息，请参考 [环境文档](../../../sandboxes/code-interpreter/README_zh.md)。

### 语言版本选择

你可以在创建 `Sandbox` 时通过环境变量指定所需的编程语言版本。

| 语言    | 环境变量         | 示例值 | 默认值 (若不设置) |
| ------- | ---------------- | ------ | ----------------- |
| Python  | `PYTHON_VERSION` | `3.11` | 镜像默认值        |
| Java    | `JAVA_VERSION`   | `17`   | 镜像默认值        |
| Node.js | `NODE_VERSION`   | `20`   | 镜像默认值        |
| Go      | `GO_VERSION`     | `1.24` | 镜像默认值        |

```java
Sandbox sandbox = Sandbox.builder()
    .image("opensandbox/code-interpreter:v1.0.2")
    .entrypoint("/opt/opensandbox/code-interpreter.sh")
    .env("JAVA_VERSION", "17")
    .env("GO_VERSION", "1.23")
    .build();
```

## 核心功能示例

### 0. 直接传 `language`（使用该语言默认上下文）

如果你不需要显式管理 session id，可以只传 `language` 来执行代码。
当 `context.id` 省略时，**execd 会为该语言创建/复用默认 session**，因此状态可以跨次执行保持：

```java
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.SupportedLanguage;

// Python 默认上下文：状态会在多次 run 之间保持
interpreter.codes().run("x = 42", SupportedLanguage.PYTHON);
Execution execution = interpreter.codes().run("result = x\nresult", SupportedLanguage.PYTHON);
System.out.println(execution.getResult().get(0).getText()); // 42
```

### 1. Java 代码执行

动态执行 Java 代码片段。

```java
CodeContext javaContext = interpreter.codes().createContext(SupportedLanguage.JAVA);

RunCodeRequest request = RunCodeRequest.builder()
    .code(
        "System.out.println(\"Calculating sum...\");\n" +
        "int a = 10;\n" +
        "int b = 20;\n" +
        "int sum = a + b;\n" +
        "System.out.println(\"Sum: \" + sum);\n" +
        "sum" // 返回值
    )
    .context(javaContext)
    .build();

Execution execution = interpreter.codes().run(request);

// 处理结果
System.out.println("Execution ID: " + execution.getId());
execution.getLogs().getStdout().forEach(log -> System.out.println(log.getText()));
```

### 2. Python 持久化状态

在同一个上下文中，变量状态可以跨次执行保持。

```java
CodeContext pythonContext = interpreter.codes().createContext(SupportedLanguage.PYTHON);

// 步骤 1: 定义变量
RunCodeRequest step1 = RunCodeRequest.builder()
    .code(
        "users = ['Alice', 'Bob', 'Charlie']\n" +
        "print(f'Initialized {len(users)} users')"
    )
    .context(pythonContext)
    .build();
interpreter.codes().run(step1);

// 步骤 2: 使用上一步的变量
RunCodeRequest step2 = RunCodeRequest.builder()
    .code(
        "users.append('Dave')\n" +
        "print(f'Updated users: {users}')"
    )
    .context(pythonContext)
    .build();

Execution result = interpreter.codes().run(step2);
// 输出: Updated users: ['Alice', 'Bob', 'Charlie', 'Dave']
```

### 3. 流式输出处理

实时处理标准输出、错误输出和执行事件。

```java
ExecutionHandlers handlers = ExecutionHandlers.builder()
    .onStdout(msg -> System.out.println("STDOUT: " + msg.getText()))
    .onStderr(msg -> System.err.println("STDERR: " + msg.getText()))
    .onResult(res -> System.out.println("Result: " + res.getText()))
    .onError(err -> System.err.println("Error: " + err.getValue()))
    .onExecutionComplete(complete ->
        System.out.println("Finished in " + complete.getExecutionTimeInMillis() + "ms")
    )
    .build();

RunCodeRequest request = RunCodeRequest.builder()
    .code("import time\nfor i in range(5):\n    print(i)\n    time.sleep(0.5)")
    .context(pythonContext)
    .handlers(handlers)
    .build();

interpreter.codes().run(request);
```

### 4. 多语言上下文隔离

不同语言在隔离的环境中运行。

```java
CodeContext pyCtx = interpreter.codes().createContext(SupportedLanguage.PYTHON);
CodeContext goCtx = interpreter.codes().createContext(SupportedLanguage.GO);

// Python 上下文
interpreter.codes().run(
    RunCodeRequest.builder()
        .code("print('Running in Python')")
        .context(pyCtx)
        .build()
);

// Go 上下文
interpreter.codes().run(
    RunCodeRequest.builder()
        .code(
            "package main\n" +
            "func main() { println(\"Running in Go\") }"
        )
        .context(goCtx)
        .build()
);
```


================================================
FILE: sdks/code-interpreter/kotlin/build.gradle.kts
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

@file:Suppress("UnstableApiUsage")

import org.gradle.api.GradleException
import org.jetbrains.kotlin.gradle.dsl.KotlinJvmProjectExtension

fun Project.resolveVersionFromTag(expectedTagPrefix: String): String? {
    val refName = System.getenv("GITHUB_REF_NAME") ?: System.getenv("GITHUB_REF")?.removePrefix("refs/tags/")
    val fromEnv =
        refName
            ?.trim()
            ?.takeIf { it.startsWith(expectedTagPrefix) }
            ?.removePrefix(expectedTagPrefix)
            ?.trim()
            ?.takeIf { it.isNotEmpty() }
    return fromEnv
}

buildscript {
    repositories {
        mavenCentral()
        gradlePluginPortal()
    }

    dependencies {
        classpath(libs.bundles.jackson.build)
    }
}

plugins {
    alias(libs.plugins.kotlin.jvm) apply false
    alias(libs.plugins.kotlin.serialization) apply false
    alias(libs.plugins.dokka) apply false
    alias(libs.plugins.spotless)
    alias(libs.plugins.mavenPublish) apply false
}

val manualProjectVersion = project.findProperty("project.version") as String
val tagVersion =
    project.resolveVersionFromTag(
        expectedTagPrefix = "java/code-interpreter/v",
    )

if (tagVersion != null && tagVersion != manualProjectVersion) {
    throw GradleException(
        "Ref/tag version mismatch: expected version '$manualProjectVersion' from gradle.properties, " +
            "but got '$tagVersion' from tag 'java/code-interpreter/v...'. Please align the tag and project.version.",
    )
}

extra["project.version"] = manualProjectVersion

allprojects {
    group = project.findProperty("project.group") as String
    version = manualProjectVersion

    repositories {
        mavenCentral()
    }
}

configure<com.diffplug.gradle.spotless.SpotlessExtension> {
    kotlin {
        target("**/*.kt")
        targetExclude("**/build/**/*.kt", "**/bin/**/*.kt", "**/generated/**/*.kt")
        ktlint()
    }
    kotlinGradle {
        target("**/*.gradle.kts")
        ktlint()
    }
}

val kotlinJvmId = libs.plugins.kotlin.jvm.get().pluginId
val kotlinSerializationId = libs.plugins.kotlin.serialization.get().pluginId
val dokkaId = libs.plugins.dokka.get().pluginId
val mavenPublishId = libs.plugins.mavenPublish.get().pluginId

subprojects {
    apply(plugin = mavenPublishId)
    if (name != "code-interpreter-bom") {
        apply(plugin = kotlinJvmId)
        apply(plugin = kotlinSerializationId)
        apply(plugin = dokkaId)

        configure<KotlinJvmProjectExtension> {
            jvmToolchain(8)
            compilerOptions {
                javaParameters.set(true)
                freeCompilerArgs.add("-Xjvm-default=all")
            }
        }
    }

    // Include license file in published artifacts (jars/sources jars) for compliance and clarity.
    tasks.withType<Jar>().configureEach {
        from(rootProject.file("LICENSE")) {
            into("META-INF")
        }
    }

    configure<com.vanniktech.maven.publish.MavenPublishBaseExtension> {
        coordinates(project.group.toString(), project.name, project.version.toString())
        publishToMavenCentral()
        if (!gradle.startParameter.taskNames.any { it.contains("publishToMavenLocal") }) {
            signAllPublications()
        }
        pom {
            name.set(project.name)
            description.set("Alibaba Code Interpreter SDK")
            inceptionYear.set("2025")
            url.set("https://github.com/alibaba/OpenSandbox")
            licenses {
                license {
                    name.set("The Apache License, Version 2.0")
                    url.set("https://www.apache.org/licenses/LICENSE-2.0.txt")
                    distribution.set("repo")
                }
            }
            developers {
                developer {
                    id.set("alibaba")
                    name.set("Alibaba Group")
                    url.set("https://github.com/alibaba")
                    email.set("ninan.nn@alibaba-inc.com")
                }
            }
            scm {
                url.set("https://github.com/alibaba/OpenSandbox")
                connection.set("scm:git:https://github.com/alibaba/OpenSandbox.git")
                developerConnection.set("scm:git:ssh://git@github.com/alibaba/OpenSandbox.git")
            }
        }
    }
}


================================================
FILE: sdks/code-interpreter/kotlin/code-interpreter/build.gradle.kts
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

repositories {
    if (project.hasProperty("useMavenLocal")) {
        mavenLocal()
    }
    mavenCentral()

    val sandboxVersion = libs.versions.sandbox.get()
    if (sandboxVersion.contains("SNAPSHOT", ignoreCase = true)) {
        maven {
            url = uri("https://central.sonatype.com/repository/maven-snapshots/")
            mavenContent {
                snapshotsOnly()
            }
        }
    }
}

dependencies {
    api(libs.sandbox)
    implementation(libs.sandbox.api)

    api(libs.kotlin.stdlib)
    api(libs.slf4j.api)

    implementation(libs.okhttp)
    implementation(libs.okhttp.logging)
    implementation(libs.bundles.serialization)

    testImplementation(libs.bundles.testing)
    testRuntimeOnly(libs.junit.platform.launcher)
}

// Configure test tasks to use JDK 17
tasks.withType<Test> {
    javaLauncher.set(
        javaToolchains.launcherFor {
            languageVersion.set(JavaLanguageVersion.of(17))
        },
    )
    useJUnitPlatform()
}

// Configure test compilation to use JDK 17
tasks.withType<org.jetbrains.kotlin.gradle.tasks.KotlinCompile> {
    if (name.contains("test", ignoreCase = true)) {
        compilerOptions {
            jvmTarget.set(org.jetbrains.kotlin.gradle.dsl.JvmTarget.JVM_17)
        }
    }
}

tasks.withType<JavaCompile> {
    if (name.contains("test", ignoreCase = true)) {
        javaCompiler.set(
            javaToolchains.compilerFor {
                languageVersion.set(JavaLanguageVersion.of(17))
            },
        )
    }
}

tasks.withType<org.jetbrains.dokka.gradle.DokkaTask>().configureEach {
    dokkaSourceSets {
        named("main") {
            moduleName.set("CodeInterpreter")
        }
    }
}


================================================
FILE: sdks/code-interpreter/kotlin/code-interpreter/src/main/kotlin/com/alibaba/opensandbox/codeinterpreter/CodeInterpreter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.codeinterpreter

import com.alibaba.opensandbox.codeinterpreter.domain.services.Codes
import com.alibaba.opensandbox.codeinterpreter.infrastructure.factory.AdapterFactory
import com.alibaba.opensandbox.sandbox.Sandbox
import com.alibaba.opensandbox.sandbox.domain.exceptions.InvalidArgumentException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxInternalException
import com.alibaba.opensandbox.sandbox.domain.models.execd.DEFAULT_EXECD_PORT
import org.slf4j.LoggerFactory

/**
 * Code Interpreter SDK providing secure, isolated code execution capabilities.
 *
 * This class extends the basic Sandbox functionality with specialized code execution features,
 * including multi-language support, session management, and variable persistence.
 *
 * ## Key Features
 *
 * - **Multi-language Code Execution**: Support for Python, JavaScript, Bash, Java, Kotlin
 * - **Session Management**: Persistent execution contexts with variable state
 * - **Sandbox Integration**: Full access to underlying sandbox file system and command execution
 * - **Streaming Execution**: Real-time code execution with output streaming
 * - **Variable Inspection**: Access to execution variables and state
 *
 * ## Usage Example
 *
 * ```kotlin
 * // First create a sandbox instance
 * val sandbox = Sandbox.builder()
 *     .image("python:3.11")
 *     .resource { put("memory", "2Gi") }
 *     .build()
 *
 * // Then wrap it with code interpreter capabilities
 * val interpreter = CodeInterpreter.builder()
 *     .fromSandbox(sandbox)
 *     .build()
 *
 * // Execute code with context
 * val context = interpreter.codes().createContext(SupportedLanguage.PYTHON)
 * val result = interpreter.codes().run(
 *     RunCodeRequest.builder()
 *         .code("print('Hello World')")
 *         .context(context)
 *         .build()
 * )
 * println(result.stdout) // Output: Hello World
 *
 * // Access underlying sandbox for file operations
 * interpreter.sandbox().files().writeFile("data.txt", "Hello")
 * val fileResult = interpreter.codes().run(
 *     RunCodeRequest.builder()
 *         .code("with open('data.txt') as f: print(f.read())")
 *         .context(context)
 *         .build()
 * )
 *
 * // Always clean up resources
 * interpreter.kill()
 * interpreter.sandbox().close()
 * ```
 */
class CodeInterpreter internal constructor(
    private val sandbox: Sandbox,
    private val codeService: Codes,
) {
    private val logger = LoggerFactory.getLogger(CodeInterpreter::class.java)

    /**
     * Provides access to the underlying sandbox instance.
     */
    fun sandbox(): Sandbox = sandbox

    /**
     * Gets the unique identifier of this code interpreter (same as underlying sandbox ID).
     */
    val id: String get() = sandbox.id

    /**
     * Provides access to file system operations within the sandbox.
     *
     * Allows writing, reading, listing, and deleting files and directories.
     *
     * @return Service for filesystem manipulation
     */
    fun files() = sandbox.files()

    /**
     * Provides access to command execution operations.
     *
     * Allows running shell commands, capturing output, and managing processes.
     *
     * @return Service for command execution
     */
    fun commands() = sandbox.commands()

    /**
     * Provides access to sandbox metrics and monitoring.
     *
     * Allows retrieving resource usage statistics (CPU, memory) and other performance metrics.
     *
     * @return Service for metrics retrieval
     */
    fun metrics() = sandbox.metrics()

    /**
     * Provides access to code execution operations.
     *
     * This service enables:
     * - Multi-language code execution (Python, JavaScript, Bash, etc.)
     * - Execution context management with persistent variables
     * - Real-time output streaming and interruption capabilities
     *
     * @return Service for advanced code execution with session support
     */
    fun codes() = codeService

    companion object {
        private val logger = LoggerFactory.getLogger(CodeInterpreter::class.java)

        /**
         * Creates a new [Builder] for creating CodeInterpreter instances.
         *
         * CodeInterpreter instances must be created from existing Sandbox instances
         * using the fromSandbox() method on the builder.
         *
         * @return A new Builder instance
         */
        @JvmStatic
        fun builder(): Builder = Builder()

        /**
         * Creates a CodeInterpreter from an existing Sandbox instance.
         *
         * This internal method handles the creation and initialization of CodeInterpreter
         * services, including the code execution service and language configuration.
         *
         * @param sandbox Existing sandbox instance to wrap with code execution capabilities
         * @return CodeInterpreter instance wrapping the sandbox
         * @throws SandboxException if creation fails
         * @throws SandboxInternalException if internal service initialization fails
         */
        internal fun create(sandbox: Sandbox): CodeInterpreter {
            logger.info("Creating code interpreter from existing sandbox: {}", sandbox.id)

            val factory = AdapterFactory(sandbox.httpClientProvider())

            try {
                // Connect to the execd daemon endpoint for code execution services
                val codeInterpreterEndpoint = sandbox.getEndpoint(DEFAULT_EXECD_PORT)
                val codeExecutionService = factory.createCodes(codeInterpreterEndpoint)

                logger.info("Code interpreter {} created from sandbox successfully", sandbox.id)

                return CodeInterpreter(sandbox, codeExecutionService)
            } catch (e: Exception) {
                throw when (e) {
                    is SandboxException -> e
                    else -> SandboxInternalException("Failed to create code interpreter from sandbox: ${e.message}", e)
                }
            }
        }
    }

    /**
     * Builder for creating CodeInterpreter instances from existing Sandbox instances.
     *
     * CodeInterpreter must be created by wrapping an existing Sandbox instance with
     * code execution capabilities. This design ensures clear separation of concerns:
     * - Sandbox handles infrastructure (containers, resources, networking)
     * - CodeInterpreter adds code execution capabilities on top
     *
     * ## Usage Example
     *
     * ```kotlin
     * // First create a sandbox with desired configuration
     * val sandbox = Sandbox.builder()
     *     .image("python:3.11")
     *     .resource { put("memory", "4Gi") }
     *     .env { put("PYTHONPATH", "/custom/path") }
     *     .build()
     *
     * // Then wrap it with code interpreter capabilities
     * val interpreter = CodeInterpreter.builder()
     *     .fromSandbox(sandbox)
     *     .connectionConfig(customConfig)  // Optional
     *     .build()
     *
     * // Use the interpreter
     * val result = interpreter.codes().run(RunCodeRequest.builder().code("print('Hello World!')").build())
     * ```
     */

    class Builder internal constructor() {
        private var sandbox: Sandbox? = null

        /**
         * Specifies the Sandbox instance to wrap with code interpreter capabilities.
         *
         * This is the only way to create a CodeInterpreter - by extending an existing
         * Sandbox instance with code execution functionality.
         *
         * @param sandbox Existing sandbox instance to wrap
         * @return This builder for method chaining
         * @throws InvalidArgumentException if sandbox is null
         */
        fun fromSandbox(sandbox: Sandbox): Builder {
            this.sandbox = sandbox
            return this
        }

        /**
         * Creates the CodeInterpreter instance from the configured sandbox.
         *
         * @return CodeInterpreter instance wrapping the specified sandbox
         * @throws InvalidArgumentException if no sandbox was specified via fromSandbox()
         */
        fun build(): CodeInterpreter {
            val sandboxInstance =
                sandbox ?: throw InvalidArgumentException(
                    "Sandbox instance must be specified via fromSandbox(). " +
                        "Create a Sandbox first, then wrap it with CodeInterpreter.",
                )
            return create(sandboxInstance)
        }
    }
}


================================================
FILE: sdks/code-interpreter/kotlin/code-interpreter/src/main/kotlin/com/alibaba/opensandbox/codeinterpreter/domain/models/execd/executions/CodeModels.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions

import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.ExecutionHandlers

/**
 * Supported programming languages for code execution.
 *
 * This object defines the languages that are officially supported by the code interpreter.
 * When adding new languages, ensure corresponding execution environments are available.
 */
object SupportedLanguage {
    const val PYTHON = "python"
    const val JAVA = "java"
    const val GO = "go"
    const val TYPESCRIPT = "typescript"
    const val BASH = "bash"
    const val JAVASCRIPT = "javascript"
}

/**
 * Represents an execution context for code interpretation.
 *
 * A CodeContext maintains the execution environment for a specific programming
 * language, including the working directory, language configuration, and
 * persistent state across multiple code executions.
 *
 * ## Context Lifecycle
 *
 * 1. **Creation**: Context is created with language and working directory
 * 2. **Execution**: Code runs within this context, building up state
 * 3. **Persistence**: Variables, imports, and functions persist between executions
 * 4. **Cleanup**: Context can be explicitly destroyed or garbage collected
 *
 * @property id Unique identifier for this execution context
 * @property language Programming language for this context (e.g., "python", "javascript")
 * @property cwd Current working directory for code execution
 */
class CodeContext private constructor(
    val id: String?,
    val language: String,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var language: String = SupportedLanguage.PYTHON

        private var id: String? = null

        fun id(id: String?): Builder {
            this.id = id
            return this
        }

        fun language(language: String): Builder {
            this.language = language
            return this
        }

        fun build(): CodeContext {
            return CodeContext(
                id = id,
                language = language,
            )
        }
    }
}

/**
 * Request model for executing code within a specific context.
 *
 * This model encapsulates all the information needed to execute a piece of
 * code, including the code itself and the execution context. The context
 * determines the language interpreter, working directory, and persistent state.
 *
 * ## Usage Patterns
 *
 * ### Simple Execution
 * ```kotlin
 * val request = RunCodeRequest.builder()
 *     .code("print('Hello World')")
 *     .build()
 * ```
 *
 * ### Context-Aware Execution
 * ```kotlin
 * val context = CodeContext.builder()
 *     .id("session-123")
 *     .language("python")
 *     .cwd("/workspace")
 *     .build()
 * val request = RunCodeRequest.builder()
 *     .code("import pandas as pd; df = pd.read_csv('data.csv')")
 *     .context(context)
 *     .build()
 * ```
 *
 * @property code The source code to execute
 * @property context Optional execution context. If null, a temporary context will be created
 */
class RunCodeRequest private constructor(
    val code: String,
    val context: CodeContext,
    val handlers: ExecutionHandlers?,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var code: String? = null
        private var context: CodeContext = CodeContext.builder().build()
        private var handlers: ExecutionHandlers? = null

        fun code(code: String): Builder {
            require(code.isNotBlank()) { "Code cannot be blank" }
            this.code = code
            return this
        }

        fun context(context: CodeContext): Builder {
            this.context = context
            return this
        }

        fun handlers(handlers: ExecutionHandlers?): Builder {
            this.handlers = handlers
            return this
        }

        fun build(): RunCodeRequest {
            val codeValue = code ?: throw IllegalArgumentException("Code must be specified")
            return RunCodeRequest(
                code = codeValue,
                context = context,
                handlers = handlers,
            )
        }
    }
}


================================================
FILE: sdks/code-interpreter/kotlin/code-interpreter/src/main/kotlin/com/alibaba/opensandbox/codeinterpreter/domain/services/Codes.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.codeinterpreter.domain.services

import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.CodeContext
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.RunCodeRequest
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.Execution
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.ExecutionHandlers

/**
 * Code execution operations for multi-language code interpretation.
 *
 * This service provides advanced code execution capabilities with context management,
 * session persistence, and multi-language support.
 */
interface Codes {
    /**
     * Gets an existing execution context by id.
     *
     * A [CodeContext] represents a persistent execution session (kernel/runtime) that can be reused
     * across multiple executions to preserve state (variables, imports, working directory, etc.).
     *
     * @param id Execution context id
     * @return The existing [CodeContext]
     */
    fun getContext(id: String): CodeContext

    /**
     * Lists active execution contexts for a given language/runtime.
     *
     * This is useful for debugging, monitoring, or cleaning up leaked contexts.
     *
     * @param language Execution runtime (e.g., "python", "bash", "java")
     * @return List of [CodeContext] currently available for the given language
     */
    fun listContexts(language: String): List<CodeContext>

    /**
     * Creates a new execution context for code interpretation.
     *
     * @param language The programming language for this context (e.g., "python", "javascript")
     * @return A new [CodeContext] with the specified configuration
     */
    fun createContext(language: String): CodeContext

    /**
     * Deletes an execution context (session) by id.
     *
     * This should terminate the underlying context thread/process and release resources.
     *
     * @param id Execution context id to delete
     */
    fun deleteContext(id: String)

    /**
     * Deletes all execution contexts under a specific language/runtime.
     *
     * This is a bulk cleanup operation intended for context management.
     *
     * @param language Target execution runtime whose contexts should be deleted
     */
    fun deleteContexts(language: String)

    /**
     * Executes code within the specified context.
     *
     * @param request The code execution request containing code and context
     * @return Execution with stdout, stderr, exit code, and execution metadata
     */
    fun run(request: RunCodeRequest): Execution

    /**
     * Executes code within the specified context.
     *
     * @param code The code to run
     * @param context The context to run code
     * @param handlers execution events handlers
     * @return Execution with stdout, stderr, exit code, and execution metadata
     */
    fun run(
        code: String,
        context: CodeContext,
        handlers: ExecutionHandlers,
    ): Execution {
        return run(RunCodeRequest.builder().code(code).context(context).handlers(handlers).build())
    }

    /**
     * Executes code within the specified context.
     *
     * @param code The code to run
     * @param context The context to run code
     * @return Execution with stdout, stderr, exit code, and execution metadata
     */
    fun run(
        code: String,
        context: CodeContext,
    ): Execution {
        return run(RunCodeRequest.builder().code(code).context(context).build())
    }

    /**
     * Run code with specified language within the default context
     *
     * @param code The code to run
     * @param language The language of code
     * @param handlers execution events handlers
     * @return Execution with stdout, stderr, exit code, and execution metadata
     */
    fun run(
        code: String,
        language: String,
        handlers: ExecutionHandlers,
    ): Execution {
        return run(
            RunCodeRequest
                .builder()
                .code(code)
                .context(CodeContext.builder().language(language).build()).handlers(handlers).build(),
        )
    }

    /**
     * Run code with specified language within the default context
     *
     * @param code The code to run
     * @param language The language of code
     * @return Execution with stdout, stderr, exit code, and execution metadata
     */
    fun run(
        code: String,
        language: String,
    ): Execution {
        return run(
            RunCodeRequest
                .builder()
                .code(code)
                .context(CodeContext.builder().language(language).build()).build(),
        )
    }

    /**
     * Interrupts a currently running code execution.
     *
     * @param executionId The unique identifier of the execution to interrupt
     */
    fun interrupt(executionId: String)
}


================================================
FILE: sdks/code-interpreter/kotlin/code-interpreter/src/main/kotlin/com/alibaba/opensandbox/codeinterpreter/infrastructure/adapters/converter/CodeExecutionConverter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.codeinterpreter.infrastructure.adapters.converter

import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.CodeContext
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.RunCodeRequest
import com.alibaba.opensandbox.sandbox.api.models.execd.CodeContext as ApiCodeContext
import com.alibaba.opensandbox.sandbox.api.models.execd.RunCodeRequest as ApiRunCodeRequest

object CodeExecutionConverter {
    fun RunCodeRequest.toApiRunCodeRequest(): ApiRunCodeRequest {
        return ApiRunCodeRequest(
            code = this.code,
            context = this.context?.toApiCodeContext(),
        )
    }

    fun CodeContext.toApiCodeContext(): ApiCodeContext {
        return ApiCodeContext(
            id = this.id,
            language = this.language,
        )
    }

    fun ApiCodeContext.toCodeContext(): CodeContext {
        return CodeContext.builder()
            .id(this.id)
            .language(this.language)
            .build()
    }
}


================================================
FILE: sdks/code-interpreter/kotlin/code-interpreter/src/main/kotlin/com/alibaba/opensandbox/codeinterpreter/infrastructure/adapters/service/CodesAdapter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.codeinterpreter.infrastructure.adapters.service

import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.CodeContext
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.RunCodeRequest
import com.alibaba.opensandbox.codeinterpreter.domain.services.Codes
import com.alibaba.opensandbox.codeinterpreter.infrastructure.adapters.converter.CodeExecutionConverter.toApiRunCodeRequest
import com.alibaba.opensandbox.codeinterpreter.infrastructure.adapters.converter.CodeExecutionConverter.toCodeContext
import com.alibaba.opensandbox.sandbox.HttpClientProvider
import com.alibaba.opensandbox.sandbox.api.execd.CodeInterpretingApi
import com.alibaba.opensandbox.sandbox.api.models.execd.EventNode
import com.alibaba.opensandbox.sandbox.domain.exceptions.InvalidArgumentException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxApiException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxError
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxError.Companion.UNEXPECTED_RESPONSE
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.Execution
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.ExecutionEventDispatcher
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.jsonParser
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.parseSandboxError
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.toSandboxException
import okhttp3.Headers.Companion.toHeaders
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.Request
import okhttp3.RequestBody.Companion.toRequestBody
import org.slf4j.LoggerFactory
import com.alibaba.opensandbox.sandbox.api.models.execd.CodeContextRequest as ApiCodeContextRequest

class CodesAdapter(
    private val execdEndpoint: SandboxEndpoint,
    private val httpClientProvider: HttpClientProvider,
) : Codes {
    companion object {
        private const val RUN_CODE_PATH = "/code"
    }

    private val logger = LoggerFactory.getLogger(CodesAdapter::class.java)
    private val baseUrl = "${httpClientProvider.config.protocol}://${execdEndpoint.endpoint}"
    private val apiClient =
        httpClientProvider.httpClient.newBuilder()
            .addInterceptor { chain ->
                val requestBuilder = chain.request().newBuilder()
                execdEndpoint.headers.forEach { (key, value) ->
                    requestBuilder.header(key, value)
                }
                chain.proceed(requestBuilder.build())
            }
            .build()
    private val api =
        CodeInterpretingApi(baseUrl, apiClient)

    override fun getContext(id: String): CodeContext {
        try {
            val result = api.getContext(id)
            return result.toCodeContext()
        } catch (e: Exception) {
            logger.error("Failed to get context", e)
            throw e.toSandboxException()
        }
    }

    override fun listContexts(language: String): List<CodeContext> {
        try {
            val list = api.listContexts(language)
            return list.map { it.toCodeContext() }
        } catch (e: Exception) {
            logger.error("Failed to list contexts", e)
            throw e.toSandboxException()
        }
    }

    override fun createContext(language: String): CodeContext {
        try {
            val request = ApiCodeContextRequest(language = language)
            val result = api.createCodeContext(request)
            return result.toCodeContext()
        } catch (e: Exception) {
            logger.error("Failed to create context", e)
            throw e.toSandboxException()
        }
    }

    override fun deleteContext(id: String) {
        try {
            api.deleteContext(id)
        } catch (e: Exception) {
            logger.error("Failed to delete context", e)
            throw e.toSandboxException()
        }
    }

    override fun deleteContexts(language: String) {
        try {
            deleteContexts(language)
        } catch (e: Exception) {
            logger.error("Failed to delete contexts", e)
            throw e.toSandboxException()
        }
    }

    override fun run(request: RunCodeRequest): Execution {
        if (request.code.isEmpty()) {
            throw InvalidArgumentException("Code cannot be empty")
        }
        try {
            val apiRequest = request.toApiRunCodeRequest()
            val httpRequest =
                Request.Builder()
                    .url("$baseUrl$RUN_CODE_PATH")
                    .post(
                        jsonParser.encodeToString(apiRequest).toRequestBody("application/json".toMediaType()),
                    )
                    .headers(execdEndpoint.headers.toHeaders())
                    .build()

            val execution = Execution()

            httpClientProvider.sseClient.newCall(httpRequest).execute().use { response ->
                if (!response.isSuccessful) {
                    val errorBodyString = response.body?.string()
                    val sandboxError = parseSandboxError(errorBodyString)
                    val message = "Failed to run code. Status code: ${response.code}, Body: $errorBodyString"
                    throw SandboxApiException(
                        message = message,
                        statusCode = response.code,
                        error = sandboxError ?: SandboxError(UNEXPECTED_RESPONSE),
                        requestId = response.header("X-Request-ID"),
                    )
                }

                response.body?.byteStream()?.bufferedReader(Charsets.UTF_8)?.use { reader ->
                    val dispatcher = ExecutionEventDispatcher(execution, request.handlers)
                    reader.lineSequence()
                        .filter(String::isNotBlank)
                        .forEach { line ->
                            try {
                                val eventNode = jsonParser.decodeFromString<EventNode>(line)
                                dispatcher.dispatch(eventNode)
                            } catch (e: Exception) {
                                logger.error("Failed to parse SSE line: {}", line, e)
                            }
                        }
                }
            }

            return execution
        } catch (e: Exception) {
            logger.error("Failed to run code (length: {})", request.code.length, e)
            throw e.toSandboxException()
        }
    }

    override fun interrupt(executionId: String) {
        try {
            api.interruptCode(executionId)
        } catch (e: Exception) {
            logger.error("Failed to interrupt code execution", e)
            throw e.toSandboxException()
        }
    }
}


================================================
FILE: sdks/code-interpreter/kotlin/code-interpreter/src/main/kotlin/com/alibaba/opensandbox/codeinterpreter/infrastructure/factory/AdapterFactory.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.codeinterpreter.infrastructure.factory

import com.alibaba.opensandbox.codeinterpreter.domain.services.Codes
import com.alibaba.opensandbox.codeinterpreter.infrastructure.adapters.service.CodesAdapter
import com.alibaba.opensandbox.sandbox.HttpClientProvider
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint

class AdapterFactory(
    private val httpClientProvider: HttpClientProvider,
) {
    fun createCodes(endpoint: SandboxEndpoint): Codes {
        return CodesAdapter(endpoint, httpClientProvider)
    }
}


================================================
FILE: sdks/code-interpreter/kotlin/code-interpreter/src/test/kotlin/com/alibaba/opensandbox/codeinterpreter/CodeInterpreterTest.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.codeinterpreter

import com.alibaba.opensandbox.codeinterpreter.domain.services.Codes
import com.alibaba.opensandbox.sandbox.Sandbox
import com.alibaba.opensandbox.sandbox.domain.services.Commands
import com.alibaba.opensandbox.sandbox.domain.services.Filesystem
import com.alibaba.opensandbox.sandbox.domain.services.Metrics
import io.mockk.every
import io.mockk.impl.annotations.MockK
import io.mockk.junit5.MockKExtension
import io.mockk.mockk
import io.mockk.verify
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertSame
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.extension.ExtendWith

@ExtendWith(MockKExtension::class)
class CodeInterpreterTest {
    @MockK
    lateinit var sandbox: Sandbox

    @MockK
    lateinit var codeService: Codes

    private lateinit var codeInterpreter: CodeInterpreter
    private val sandboxId = "sandbox-id"

    @BeforeEach
    fun setUp() {
        every { sandbox.id } returns sandboxId
        codeInterpreter = CodeInterpreter(sandbox, codeService)
    }

    @Test
    fun `id should return sandbox id`() {
        assertEquals(sandboxId, codeInterpreter.id)
    }

    @Test
    fun `sandbox should return underlying sandbox`() {
        assertSame(sandbox, codeInterpreter.sandbox())
    }

    @Test
    fun `files should delegate to sandbox files`() {
        val filesService = mockk<Filesystem>()
        every { sandbox.files() } returns filesService

        assertSame(filesService, codeInterpreter.files())
        verify { sandbox.files() }
    }

    @Test
    fun `commands should delegate to sandbox commands`() {
        val commandService = mockk<Commands>()
        every { sandbox.commands() } returns commandService

        assertSame(commandService, codeInterpreter.commands())
        verify { sandbox.commands() }
    }

    @Test
    fun `metrics should delegate to sandbox metrics`() {
        val metricsService = mockk<Metrics>()
        every { sandbox.metrics() } returns metricsService

        assertSame(metricsService, codeInterpreter.metrics())
        verify { sandbox.metrics() }
    }

    @Test
    fun `codes should return code service`() {
        assertSame(codeService, codeInterpreter.codes())
    }
}


================================================
FILE: sdks/code-interpreter/kotlin/code-interpreter/src/test/kotlin/com/alibaba/opensandbox/codeinterpreter/infrastructure/adapters/service/CodesAdapterTest.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.codeinterpreter.infrastructure.adapters.service

import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.RunCodeRequest
import com.alibaba.opensandbox.sandbox.HttpClientProvider
import com.alibaba.opensandbox.sandbox.config.ConnectionConfig
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxApiException
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.ExecutionHandlers
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import okhttp3.mockwebserver.MockResponse
import okhttp3.mockwebserver.MockWebServer
import org.junit.jupiter.api.AfterEach
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertThrows
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
import java.util.concurrent.CountDownLatch
import java.util.concurrent.TimeUnit

class CodesAdapterTest {
    private lateinit var mockWebServer: MockWebServer
    private lateinit var codesAdapter: CodesAdapter
    private lateinit var httpClientProvider: HttpClientProvider

    @BeforeEach
    fun setUp() {
        mockWebServer = MockWebServer()
        mockWebServer.start()

        val host = mockWebServer.hostName
        val port = mockWebServer.port
        val config =
            ConnectionConfig.builder()
                .domain("$host:$port")
                .protocol("http")
                .build()

        val endpoint = SandboxEndpoint("$host:$port")
        httpClientProvider = HttpClientProvider(config)
        codesAdapter = CodesAdapter(endpoint, httpClientProvider)
    }

    @AfterEach
    fun tearDown() {
        mockWebServer.shutdown()
        httpClientProvider.close()
    }

    @Test
    fun `createContext should send correct request`() {
        mockWebServer.enqueue(
            MockResponse()
                .setResponseCode(200)
                .setBody("""{"id":"ctx-123", "language":"python"}"""),
        )

        val context = codesAdapter.createContext("python")

        assertEquals("ctx-123", context.id)
        assertEquals("python", context.language)

        val request = mockWebServer.takeRequest()
        assertEquals("POST", request.method)
        assertEquals("/code/context", request.path)
    }

    @Test
    fun `createContext should include endpoint headers`() {
        mockWebServer.enqueue(
            MockResponse()
                .setResponseCode(200)
                .setBody("""{"id":"ctx-123", "language":"python"}"""),
        )

        val host = mockWebServer.hostName
        val port = mockWebServer.port
        val config =
            ConnectionConfig.builder()
                .domain("$host:$port")
                .protocol("http")
                .build()
        val endpoint = SandboxEndpoint("$host:$port", mapOf("X-Endpoint" to "endpoint"))

        HttpClientProvider(config).use { provider ->
            val adapter = CodesAdapter(endpoint, provider)
            adapter.createContext("python")
        }

        val request = mockWebServer.takeRequest()
        assertEquals("endpoint", request.getHeader("X-Endpoint"))
    }

    @Test
    fun `run should stream events correctly`() {
        // SSE format
        val event1 = """{"type":"stdout","text":"Hello World","timestamp":1672531200000}"""
        val event2 = """{"type":"execution_complete","execution_time":100,"timestamp":1672531201000}"""

        val responseBody = "$event1\n$event2\n"

        mockWebServer.enqueue(
            MockResponse()
                .setResponseCode(200)
                .setBody(responseBody),
        )

        val receivedOutput = StringBuilder()
        val latch = CountDownLatch(1)
        var executionTime = -1L

        val handlers =
            ExecutionHandlers.builder()
                .onStdout { msg -> receivedOutput.append(msg.text) }
                .onExecutionComplete { complete ->
                    executionTime = complete.executionTimeInMillis
                    latch.countDown()
                }
                .build()

        val request =
            RunCodeRequest.builder()
                .code("print('Hello World')")
                .handlers(handlers)
                .build()

        codesAdapter.run(request)

        assertTrue(latch.await(2, TimeUnit.SECONDS), "Timed out waiting for completion")
        assertEquals("Hello World", receivedOutput.toString())
        assertEquals(100L, executionTime)

        val recordedRequest = mockWebServer.takeRequest()
        assertEquals("/code", recordedRequest.path)
        assertEquals("POST", recordedRequest.method)
    }

    @Test
    fun `run should include endpoint headers`() {
        val event1 = """{"type":"stdout","text":"Hello World","timestamp":1672531200000}"""
        val event2 = """{"type":"execution_complete","execution_time":100,"timestamp":1672531201000}"""

        mockWebServer.enqueue(
            MockResponse()
                .setResponseCode(200)
                .setBody("$event1\n$event2\n"),
        )

        val host = mockWebServer.hostName
        val port = mockWebServer.port
        val config =
            ConnectionConfig.builder()
                .domain("$host:$port")
                .protocol("http")
                .build()
        val endpoint = SandboxEndpoint("$host:$port", mapOf("X-Endpoint" to "endpoint"))

        HttpClientProvider(config).use { provider ->
            val adapter = CodesAdapter(endpoint, provider)
            val request =
                RunCodeRequest.builder()
                    .code("print('Hello World')")
                    .handlers(ExecutionHandlers.builder().build())
                    .build()

            adapter.run(request)
        }

        val recordedRequest = mockWebServer.takeRequest()
        assertEquals("endpoint", recordedRequest.getHeader("X-Endpoint"))
    }

    @Test
    fun `interrupt should send correct request`() {
        mockWebServer.enqueue(MockResponse().setResponseCode(204))

        codesAdapter.interrupt("exec-123")

        val request = mockWebServer.takeRequest()
        assertEquals("DELETE", request.method)
        assertEquals("/code", request.requestUrl?.encodedPath)
        assertEquals("exec-123", request.requestUrl?.queryParameter("id"))
    }

    @Test
    fun `run should expose request id on api exception`() {
        mockWebServer.enqueue(
            MockResponse()
                .setResponseCode(500)
                .addHeader("X-Request-ID", "req-kotlin-code-123")
                .setBody("""{"code":"INTERNAL_ERROR","message":"boom"}"""),
        )

        val request = RunCodeRequest.builder().code("print('boom')").build()
        val ex = assertThrows(SandboxApiException::class.java) { codesAdapter.run(request) }

        assertEquals(500, ex.statusCode)
        assertEquals("req-kotlin-code-123", ex.requestId)
    }
}


================================================
FILE: sdks/code-interpreter/kotlin/code-interpreter-bom/build.gradle.kts
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

plugins {
    `java-platform`
}

dependencies {
    constraints {
        api(project(":code-interpreter"))

        api(libs.kotlin.stdlib)
        api(libs.okhttp)
        api(libs.okhttp.logging)
        api(libs.kotlinx.serialization.json)
        api(libs.slf4j.api)
    }
}


================================================
FILE: sdks/code-interpreter/kotlin/gradle/libs.versions.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[versions]
kotlin = "2.2.21"
kotlinx-serialization = "1.9.0"
okhttp = "4.12.0"
slf4j = "2.0.9"
junit = "5.10.1"
mockk = "1.13.8"
spotless = "6.23.3"
maven-publish = "0.35.0"
dokka = "1.9.10"
jackson = "2.18.2"
sandbox = "1.0.5"
junit-platform = "1.13.4"

[libraries]
# Kotlin
kotlin-stdlib = { module = "org.jetbrains.kotlin:kotlin-stdlib", version.ref = "kotlin" }

# HTTP
okhttp = { module = "com.squareup.okhttp3:okhttp", version.ref = "okhttp" }
okhttp-logging = { module = "com.squareup.okhttp3:logging-interceptor", version.ref = "okhttp" }
okhttp-mockwebserver = { module = "com.squareup.okhttp3:mockwebserver", version.ref = "okhttp" }

# Serialization
kotlinx-serialization-json = { module = "org.jetbrains.kotlinx:kotlinx-serialization-json", version.ref = "kotlinx-serialization" }

# Logging
slf4j-api = { module = "org.slf4j:slf4j-api", version.ref = "slf4j" }

# Testing
junit-jupiter = { module = "org.junit.jupiter:junit-jupiter", version.ref = "junit" }
mockk = { module = "io.mockk:mockk", version.ref = "mockk" }
junit-platform-launcher = { module = "org.junit.platform:junit-platform-launcher", version = "junit-platform" }

# Jackson(build-time)
jackson-core = { module = "com.fasterxml.jackson.core:jackson-core", version.ref = "jackson" }
jackson-databind = { module = "com.fasterxml.jackson.core:jackson-databind", version.ref = "jackson" }
jackson-yaml = { module = "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml", version.ref = "jackson" }
jackson-kotlin = { module = "com.fasterxml.jackson.module:jackson-module-kotlin", version.ref = "jackson" }

# sandbox
sandbox = { module = "com.alibaba.opensandbox:sandbox", version.ref = "sandbox" }
sandbox-api = { module = "com.alibaba.opensandbox:sandbox-api", version.ref = "sandbox" }

[plugins]
kotlin-jvm = { id = "org.jetbrains.kotlin.jvm", version.ref = "kotlin" }
kotlin-serialization = { id = "org.jetbrains.kotlin.plugin.serialization", version.ref = "kotlin" }
spotless = { id = "com.diffplug.spotless", version.ref = "spotless" }
mavenPublish = { id = "com.vanniktech.maven.publish", version.ref = "maven-publish" }
dokka = { id = "org.jetbrains.dokka", version.ref = "dokka" }

[bundles]
serialization = ["kotlinx-serialization-json"]
testing = ["junit-jupiter", "mockk", "okhttp-mockwebserver"]
jackson-build = ["jackson-core", "jackson-databind", "jackson-yaml", "jackson-kotlin"]


================================================
FILE: sdks/code-interpreter/kotlin/gradle/wrapper/gradle-wrapper.properties
================================================
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-9.2.1-all.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists


================================================
FILE: sdks/code-interpreter/kotlin/gradle.properties
================================================
# Build optimization
org.gradle.jvmargs=-Xmx4g -XX:MaxMetaspaceSize=512m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8
org.gradle.caching=true
org.gradle.parallel=true

# Project metadata
project.group=com.alibaba.opensandbox
project.version=1.0.5
project.description=A Kotlin SDK for Code Interpreter


================================================
FILE: sdks/code-interpreter/kotlin/gradlew
================================================
#!/bin/sh

#
# Copyright © 2015 the original authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
#

##############################################################################
#
#   Gradle start up script for POSIX generated by Gradle.
#
#   Important for running:
#
#   (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
#       noncompliant, but you have some other compliant shell such as ksh or
#       bash, then to run this script, type that shell name before the whole
#       command line, like:
#
#           ksh Gradle
#
#       Busybox and similar reduced shells will NOT work, because this script
#       requires all of these POSIX shell features:
#         * functions;
#         * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
#           «${var#prefix}», «${var%suffix}», and «$( cmd )»;
#         * compound commands having a testable exit status, especially «case»;
#         * various built-in commands including «command», «set», and «ulimit».
#
#   Important for patching:
#
#   (2) This script targets any POSIX shell, so it avoids extensions provided
#       by Bash, Ksh, etc; in particular arrays are avoided.
#
#       The "traditional" practice of packing multiple parameters into a
#       space-separated string is a well documented source of bugs and security
#       problems, so this is (mostly) avoided, by progressively accumulating
#       options in "$@", and eventually passing that to Java.
#
#       Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
#       and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
#       see the in-line comments for details.
#
#       There are tweaks for specific operating systems such as AIX, CygWin,
#       Darwin, MinGW, and NonStop.
#
#   (3) This script is generated from the Groovy template
#       https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
#       within the Gradle project.
#
#       You can find Gradle at https://github.com/gradle/gradle/.
#
##############################################################################

# Attempt to set APP_HOME

# Resolve links: $0 may be a link
app_path=$0

# Need this for daisy-chained symlinks.
while
    APP_HOME=${app_path%"${app_path##*/}"}  # leaves a trailing /; empty if no leading path
    [ -h "$app_path" ]
do
    ls=$( ls -ld "$app_path" )
    link=${ls#*' -> '}
    case $link in             #(
      /*)   app_path=$link ;; #(
      *)    app_path=$APP_HOME$link ;;
    esac
done

# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum

warn () {
    echo "$*"
} >&2

die () {
    echo
    echo "$*"
    echo
    exit 1
} >&2

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "$( uname )" in                #(
  CYGWIN* )         cygwin=true  ;; #(
  Darwin* )         darwin=true  ;; #(
  MSYS* | MINGW* )  msys=true    ;; #(
  NONSTOP* )        nonstop=true ;;
esac


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD=$JAVA_HOME/jre/sh/java
    else
        JAVACMD=$JAVA_HOME/bin/java
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD=java
    if ! command -v java >/dev/null 2>&1
    then
        die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
fi

# Increase the maximum file descriptors if we can.
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
    case $MAX_FD in #(
      max*)
        # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
        # shellcheck disable=SC2039,SC3045
        MAX_FD=$( ulimit -H -n ) ||
            warn "Could not query maximum file descriptor limit"
    esac
    case $MAX_FD in  #(
      '' | soft) :;; #(
      *)
        # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
        # shellcheck disable=SC2039,SC3045
        ulimit -n "$MAX_FD" ||
            warn "Could not set maximum file descriptor limit to $MAX_FD"
    esac
fi

# Collect all arguments for the java command, stacking in reverse order:
#   * args from the command line
#   * the main class name
#   * -classpath
#   * -D...appname settings
#   * --module-path (only if needed)
#   * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.

# For Cygwin or MSYS, switch paths to Windows format before running java
if "$cygwin" || "$msys" ; then
    APP_HOME=$( cygpath --path --mixed "$APP_HOME" )

    JAVACMD=$( cygpath --unix "$JAVACMD" )

    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    for arg do
        if
            case $arg in                                #(
              -*)   false ;;                            # don't mess with options #(
              /?*)  t=${arg#/} t=/${t%%/*}              # looks like a POSIX filepath
                    [ -e "$t" ] ;;                      #(
              *)    false ;;
            esac
        then
            arg=$( cygpath --path --ignore --mixed "$arg" )
        fi
        # Roll the args list around exactly as many times as the number of
        # args, so each arg winds up back in the position where it started, but
        # possibly modified.
        #
        # NB: a `for` loop captures its iteration list before it begins, so
        # changing the positional parameters here affects neither the number of
        # iterations, nor the values presented in `arg`.
        shift                   # remove old arg
        set -- "$@" "$arg"      # push replacement arg
    done
fi


# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Collect all arguments for the java command:
#   * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
#     and any embedded shellness will be escaped.
#   * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
#     treated as '${Hostname}' itself on the command line.

set -- \
        "-Dorg.gradle.appname=$APP_BASE_NAME" \
        -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \
        "$@"

# Stop when "xargs" is not available.
if ! command -v xargs >/dev/null 2>&1
then
    die "xargs is not available"
fi

# Use "xargs" to parse quoted args.
#
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
#
# In Bash we could simply go:
#
#   readarray ARGS < <( xargs -n1 <<<"$var" ) &&
#   set -- "${ARGS[@]}" "$@"
#
# but POSIX shell has neither arrays nor command substitution, so instead we
# post-process each arg (as a line of input to sed) to backslash-escape any
# character that might be a shell metacharacter, then use eval to reverse
# that process (while maintaining the separation between arguments), and wrap
# the whole thing up as a single "set" statement.
#
# This will of course break if any of these variables contains a newline or
# an unmatched quote.
#

eval "set -- $(
        printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
        xargs -n1 |
        sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
        tr '\n' ' '
    )" '"$@"'

exec "$JAVACMD" "$@"


================================================
FILE: sdks/code-interpreter/kotlin/settings.gradle.kts
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

rootProject.name = "code-interpreter-parent"

plugins {
    id("org.gradle.toolchains.foojay-resolver-convention") version("1.0.0")
}

include(":code-interpreter")
include(":code-interpreter-bom")


================================================
FILE: sdks/code-interpreter/python/LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: sdks/code-interpreter/python/Makefile
================================================
.PHONY: help install dev-install format lint type-check test test-cov clean docs build publish

# Default target
help: ## Show this help message
	@echo "Available commands:"
	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}'

install: ## Install package dependencies
	uv sync

dev-install: ## Install package with development dependencies
	uv sync --group dev

format: ## Format code with ruff
	uv run ruff format .

lint: ## Run linting with ruff
	uv run ruff check .

type-check: ## Run type checking with pyright
	uv run pyright

test: ## Run tests
	uv run pytest

test-cov: ## Run tests with coverage
	uv run pytest --cov=src/opensandbox --cov-report=html --cov-report=term

clean: ## Clean build artifacts
	rm -rf build/
	rm -rf dist/
	rm -rf *.egg-info/
	rm -rf .pytest_cache/
	rm -rf .coverage
	rm -rf htmlcov/
	find . -type d -name __pycache__ -exec rm -rf {} +
	find . -name "*.pyc" -delete

docs: ## Generate documentation
	cd docs && uv run sphinx-build -b html . _build/html

build: ## Build package
	uv build

publish: ## Publish to PyPI (requires authentication)
	uv publish

# Development workflow targets
check: format lint type-check ## Run all code quality checks

ci: dev-install check test ## Run CI pipeline locally

# Docker targets
docker-build: ## Build Docker image for development
	docker build -t opensandbox-code-interpreter-dev .

docker-test: ## Run tests in Docker container
	docker run --rm -v $(PWD):/app opensandbox-code-interpreter-dev make test


================================================
FILE: sdks/code-interpreter/python/README.md
================================================
# OpenSandbox Code Interpreter SDK for Python

English | [中文](README_zh.md)

A Python SDK for executing code in secure, isolated sandboxes. It provides a high-level API for running Python, Java,
Go, TypeScript, and other languages safely, with support for code execution contexts.

## Prerequisites

This SDK requires a Docker image containing the Code Interpreter runtime environment. You must use the
`opensandbox/code-interpreter` image (or a derivative) which includes pre-installed runtimes for Python, Java, Go,
Node.js, etc.

For detailed information about supported languages and versions, refer to the
[Environment Documentation](../../../sandboxes/code-interpreter/README.md).

## Installation

### pip

```bash
pip install opensandbox-code-interpreter
```

### uv

```bash
uv add opensandbox-code-interpreter
```

## Quick Start

The following example demonstrates how to create a sandbox with a specific runtime configuration and execute a simple
script.

```python
import asyncio
from datetime import timedelta

from code_interpreter import CodeInterpreter, SupportedLanguage
from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


async def main() -> None:
    # 1. Configure connection
    config = ConnectionConfig(
        domain="api.opensandbox.io",
        api_key="your-api-key",
        request_timeout=timedelta(seconds=60),
    )

    # 2. Create a Sandbox with the code-interpreter image + runtime versions
    sandbox = await Sandbox.create(
        "opensandbox/code-interpreter:v1.0.2",
        connection_config=config,
        entrypoint=["/opt/opensandbox/code-interpreter.sh"],
        env={
            "PYTHON_VERSION": "3.11",
            "JAVA_VERSION": "17",
            "NODE_VERSION": "20",
            "GO_VERSION": "1.24",
        },
    )

    # 3. Use async context manager to ensure local resources are cleaned up
    async with sandbox:
        # 4. Create CodeInterpreter wrapper
        interpreter = await CodeInterpreter.create(sandbox=sandbox)

        # 5. Create an execution context (Python)
        context = await interpreter.codes.create_context(SupportedLanguage.PYTHON)

        # 6. Run code
        result = await interpreter.codes.run(
            "import sys\nprint(sys.version)\nresult = 2 + 2\nresult",
            context=context,
        )

        # Alternatively, you can pass a language directly (recommended: SupportedLanguage.*).
        # This uses the default context for that language (state can persist across runs).
        # result = await interpreter.codes.run("print('hi')", language=SupportedLanguage.PYTHON)

        # 7. Print output
        if result.result:
            print(result.result[0].text)

        # 8. Cleanup remote instance (optional but recommended)
        await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())
```

### Synchronous Quick Start

If you prefer a synchronous API, use `SandboxSync` + `CodeInterpreterSync`:

```python
from datetime import timedelta

import httpx
from code_interpreter import CodeInterpreterSync
from opensandbox import SandboxSync
from opensandbox.config import ConnectionConfigSync

config = ConnectionConfigSync(
    domain="api.opensandbox.io",
    api_key="your-api-key",
    request_timeout=timedelta(seconds=60),
    transport=httpx.HTTPTransport(limits=httpx.Limits(max_connections=20)),
)

sandbox = SandboxSync.create(
    "opensandbox/code-interpreter:v1.0.2",
    connection_config=config,
    entrypoint=["/opt/opensandbox/code-interpreter.sh"],
    env={"PYTHON_VERSION": "3.11"},
)
with sandbox:
    interpreter = CodeInterpreterSync.create(sandbox=sandbox)
    result = interpreter.codes.run("result = 2 + 2\nresult")
    if result.result:
        print(result.result[0].text)
    sandbox.kill()
```

### Installing Python packages at runtime

You can install packages directly via `sandbox.commands.run(...)`:

```python
execution = await sandbox.commands.run("pip install pandas numpy")
```

## Runtime Configuration

### Docker Image

The Code Interpreter SDK relies on a specialized environment. Ensure your sandbox provider has the
`opensandbox/code-interpreter` image available.

### Language Version Selection

You can specify the desired version of a programming language by setting the corresponding environment variable when
creating the `Sandbox`.

| Language | Environment Variable | Example Value | Default (if unset) |
| -------- | -------------------- | ------------- | ------------------ |
| Python   | `PYTHON_VERSION`     | `3.11`        | Image default      |
| Java     | `JAVA_VERSION`       | `17`          | Image default      |
| Node.js  | `NODE_VERSION`       | `20`          | Image default      |
| Go       | `GO_VERSION`         | `1.24`        | Image default      |

## Usage Examples

### 0. Run with `language` (default language context)

You can pass `language` directly (recommended: `SupportedLanguage.*`) and skip `create_context`.
When `context.id` is omitted, **execd will create/reuse a default session for that language**, so
state can persist across runs:

```python
from code_interpreter import SupportedLanguage

execution = await interpreter.codes.run(
    "result = 2 + 2\nresult",
    language=SupportedLanguage.PYTHON,
)
assert execution.result and execution.result[0].text == "4"
```

State persistence example (default Python context):

```python
from code_interpreter import SupportedLanguage

await interpreter.codes.run("x = 42", language=SupportedLanguage.PYTHON)
execution = await interpreter.codes.run("result = x\nresult", language=SupportedLanguage.PYTHON)
assert execution.result and execution.result[0].text == "42"
```

### 1. Java Code Execution

```python
from code_interpreter import SupportedLanguage

ctx = await interpreter.codes.create_context(SupportedLanguage.JAVA)
execution = await interpreter.codes.run(
    (
        'System.out.println("Calculating sum...");\n'
        + "int a = 10;\n"
        + "int b = 20;\n"
        + "int sum = a + b;\n"
        + 'System.out.println("Sum: " + sum);\n'
        + "sum"
    ),
    context=ctx,
)

print(execution.id)
for msg in execution.logs.stdout:
    print(msg.text)
```

### 2. Python with State Persistence

Variables defined in one execution are available in subsequent executions within the same context.

```python
from code_interpreter import SupportedLanguage

ctx = await interpreter.codes.create_context(SupportedLanguage.PYTHON)

await interpreter.codes.run(
    "users = ['Alice', 'Bob', 'Charlie']\nprint(len(users))",
    context=ctx,
)

result = await interpreter.codes.run(
    "users.append('Dave')\nprint(users)\nresult = users\nresult",
    context=ctx,
)
```

### 3. Streaming Output Handling

Handle stdout/stderr and execution events in real-time.

```python
from opensandbox.models.execd import ExecutionHandlers
from code_interpreter import SupportedLanguage

async def on_stdout(msg):
    print("STDOUT:", msg.text)

async def on_stderr(msg):
    print("STDERR:", msg.text)

handlers = ExecutionHandlers(on_stdout=on_stdout, on_stderr=on_stderr)

ctx = await interpreter.codes.create_context(SupportedLanguage.PYTHON)
await interpreter.codes.run(
    "import time\nfor i in range(5):\n    print(i)\n    time.sleep(0.5)",
    context=ctx,
    handlers=handlers,
)
```

### 4. Multi-Language Context Isolation

Different languages run in isolated environments.

```python
from code_interpreter import SupportedLanguage

py_ctx = await interpreter.codes.create_context(SupportedLanguage.PYTHON)
go_ctx = await interpreter.codes.create_context(SupportedLanguage.GO)

await interpreter.codes.run("print('Running in Python')", context=py_ctx)
await interpreter.codes.run(
    "package main\nfunc main() { println(\"Running in Go\") }",
    context=go_ctx,
)
```

## Notes

- **Lifecycle**: `CodeInterpreter` wraps an existing `Sandbox` instance and reuses its connection configuration.
- **Asyncio/event loop**: avoid sharing long-lived clients across multiple event loops (e.g. pytest-asyncio defaults).


================================================
FILE: sdks/code-interpreter/python/README_zh.md
================================================
# OpenSandbox Code Interpreter SDK for Python

中文 | [English](README.md)

一个用于在安全、隔离的沙箱环境中执行代码的 Python SDK。该 SDK 提供了高级 API，支持安全地运行 Python、Java、Go、TypeScript
等语言，并具备“代码执行上下文（Context）”能力。

## 前置要求

本 SDK 需要配合包含 Code Interpreter 运行时环境的特定 Docker 镜像使用。请务必使用 `opensandbox/code-interpreter` 镜像（或其衍生镜像），其中预装了 Python、Java、Go、Node.js 等语言的运行环境。

关于支持的语言与具体版本信息，请参考 [环境文档](../../../sandboxes/code-interpreter/README_zh.md)。

## 安装指南

### pip

```bash
pip install opensandbox-code-interpreter
```

### uv

```bash
uv add opensandbox-code-interpreter
```

## 快速开始

以下示例展示了如何创建带指定运行时配置的 Sandbox，并执行一段简单脚本。

```python
import asyncio
from datetime import timedelta

from code_interpreter import CodeInterpreter, SupportedLanguage
from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig


async def main() -> None:
    # 1. 配置连接信息
    config = ConnectionConfig(
        domain="api.opensandbox.io",
        api_key="your-api-key",
        request_timeout=timedelta(seconds=60),
    )

    # 2. 创建 Sandbox（必须使用 code-interpreter 镜像），并指定语言版本
    sandbox = await Sandbox.create(
        "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2",
        connection_config=config,
        entrypoint=["/opt/opensandbox/code-interpreter.sh"],
        env={
            "PYTHON_VERSION": "3.11",
            "JAVA_VERSION": "17",
            "NODE_VERSION": "20",
            "GO_VERSION": "1.24",
        },
    )

    # 3. 使用异步上下文管理器，确保本地资源正确清理
    async with sandbox:
        # 4. 创建 CodeInterpreter 包装器
        interpreter = await CodeInterpreter.create(sandbox=sandbox)

        # 5. 创建执行上下文（Python）
        context = await interpreter.codes.create_context(SupportedLanguage.PYTHON)

        # 6. 运行代码
        result = await interpreter.codes.run(
            "import sys\nprint(sys.version)\nresult = 2 + 2\nresult",
            context=context,
        )

        # 或者：直接传入 language（推荐使用 SupportedLanguage.*），使用该语言默认上下文执行（可跨次保持状态）
        # result = await interpreter.codes.run("print('hi')", language=SupportedLanguage.PYTHON)

        # 7. 打印输出
        if result.result:
            print(result.result[0].text)

        # 8. 清理远程实例（可选，但推荐）
        await sandbox.kill()


if __name__ == "__main__":
    asyncio.run(main())
```

### 同步版本快速开始

如果你更偏好同步 API，可以使用 `SandboxSync` + `CodeInterpreterSync`：

```python
from datetime import timedelta

import httpx
from code_interpreter import CodeInterpreterSync
from opensandbox import SandboxSync
from opensandbox.config import ConnectionConfigSync

config = ConnectionConfigSync(
    domain="api.opensandbox.io",
    api_key="your-api-key",
    request_timeout=timedelta(seconds=60),
    transport=httpx.HTTPTransport(limits=httpx.Limits(max_connections=20)),
)

sandbox = SandboxSync.create(
    "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:v1.0.2",
    connection_config=config,
    entrypoint=["/opt/opensandbox/code-interpreter.sh"],
    env={"PYTHON_VERSION": "3.11"},
)
with sandbox:
    interpreter = CodeInterpreterSync.create(sandbox=sandbox)
    result = interpreter.codes.run("result = 2 + 2\nresult")
    if result.result:
        print(result.result[0].text)
    sandbox.kill()
```

### 运行时安装 Python 依赖

可以直接通过 `sandbox.commands.run(...)` 安装依赖：

```python
execution = await sandbox.commands.run("pip install pandas numpy")
```

## 运行时配置

### Docker 镜像

Code Interpreter SDK 依赖于特定的运行环境。请确保你的沙箱服务提供商支持 `opensandbox/code-interpreter` 镜像。

### 语言版本选择

你可以在创建 `Sandbox` 时通过环境变量指定所需的编程语言版本。

| 语言    | 环境变量         | 示例值 | 默认值（若不设置） |
| ------- | ---------------- | ------ | ------------------ |
| Python  | `PYTHON_VERSION` | `3.11` | 镜像默认值         |
| Java    | `JAVA_VERSION`   | `17`   | 镜像默认值         |
| Node.js | `NODE_VERSION`   | `20`   | 镜像默认值         |
| Go      | `GO_VERSION`     | `1.24` | 镜像默认值         |

## 核心功能示例

### 0. 直接传 `language`（使用该语言默认上下文）

可以直接传入 `language`（推荐：`SupportedLanguage.*`），跳过 `create_context`。
当 `context.id` 省略时，**execd 会为该语言创建/复用默认 session**，因此状态可以跨次执行保持：

```python
from code_interpreter import SupportedLanguage

execution = await interpreter.codes.run(
    "result = 2 + 2\nresult",
    language=SupportedLanguage.PYTHON,
)
assert execution.result and execution.result[0].text == "4"
```

状态持久化示例（Python 默认上下文）：

```python
from code_interpreter import SupportedLanguage

await interpreter.codes.run("x = 42", language=SupportedLanguage.PYTHON)
execution = await interpreter.codes.run("result = x\nresult", language=SupportedLanguage.PYTHON)
assert execution.result and execution.result[0].text == "42"
```

### 1. Java 代码执行

```python
from code_interpreter import SupportedLanguage

ctx = await interpreter.codes.create_context(SupportedLanguage.JAVA)
execution = await interpreter.codes.run(
    (
        'System.out.println("Calculating sum...");\n'
        + "int a = 10;\n"
        + "int b = 20;\n"
        + "int sum = a + b;\n"
        + 'System.out.println("Sum: " + sum);\n'
        + "sum"
    ),
    context=ctx,
)

print(execution.id)
for msg in execution.logs.stdout:
    print(msg.text)
```

### 2. Python 持久化状态

在同一个上下文中，变量状态可以跨次执行保持。

```python
from code_interpreter import SupportedLanguage

ctx = await interpreter.codes.create_context(SupportedLanguage.PYTHON)

await interpreter.codes.run(
    "users = ['Alice', 'Bob', 'Charlie']\nprint(len(users))",
    context=ctx,
)

result = await interpreter.codes.run(
    "users.append('Dave')\nprint(users)\nresult = users\nresult",
    context=ctx,
)
```

### 3. 流式输出处理

实时处理 stdout/stderr 等事件。

```python
from opensandbox.models.execd import ExecutionHandlers
from code_interpreter import SupportedLanguage

async def on_stdout(msg):
    print("STDOUT:", msg.text)

async def on_stderr(msg):
    print("STDERR:", msg.text)

handlers = ExecutionHandlers(on_stdout=on_stdout, on_stderr=on_stderr)

ctx = await interpreter.codes.create_context(SupportedLanguage.PYTHON)
await interpreter.codes.run(
    "import time\nfor i in range(5):\n    print(i)\n    time.sleep(0.5)",
    context=ctx,
    handlers=handlers,
)
```

### 4. 多语言上下文隔离

不同语言在隔离的环境中运行。

```python
from code_interpreter import SupportedLanguage

py_ctx = await interpreter.codes.create_context(SupportedLanguage.PYTHON)
go_ctx = await interpreter.codes.create_context(SupportedLanguage.GO)

await interpreter.codes.run("print('Running in Python')", context=py_ctx)
await interpreter.codes.run(
    "package main\nfunc main() { println(\"Running in Go\") }",
    context=go_ctx,
)
```

## 说明

- **生命周期**：`CodeInterpreter` 基于既有的 `Sandbox` 实例进行包装，并复用其连接配置。
- **Asyncio/event loop**：避免在多个 event loop 间共享长生命周期的 client/transport（例如 pytest-asyncio 默认行为）。


================================================
FILE: sdks/code-interpreter/python/pyproject.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[build-system]
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"

[project]
name = "opensandbox-code-interpreter"
dynamic = ["version"]
description = "OpenSandbox Code Interpreter Python SDK - Advanced code execution with persistent contexts"
authors = [
    { name = "OpenSandbox Team", email = "ninan.nn@alibaba-inc.com" }
]
license = { file = "LICENSE" }
readme = "README.md"
requires-python = ">=3.10"
keywords = ["sandbox", "code-interpreter", "code-execution", "sdk", "opensandbox"]
classifiers = [
    "Development Status :: 3 - Alpha",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: Apache Software License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3 :: Only",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Topic :: Software Development :: Libraries",
    "Typing :: Typed",
]
dependencies = [
    "pydantic>=2.4.2,<3.0",
    "opensandbox>=0.1.5,<0.2.0",
]

[project.urls]
Homepage = "https://open-sandbox.ai"
Repository = "https://github.com/alibaba/OpenSandbox"
Documentation = "https://open-sandbox.ai"
Issues = "https://github.com/alibaba/OpenSandbox/issues"

[tool.hatch.version]
source = "vcs"

[tool.hatch.version.raw-options]
# This package is in a subdirectory; explicitly point setuptools-scm at the git root.
root = "../../.."
tag_regex = "^python/code-interpreter/v(?P<version>\\d+\\.\\d+\\.\\d+(?:[\\.\\w\\+\\-]*)?)$"
git_describe_command = 'git describe --dirty --tags --long --match "python/code-interpreter/v*"'
fallback_version = "0.1.0"

[tool.hatch.build]
include = [
    "LICENSE",
    "src/**/py.typed",
    "src/code_interpreter"
]

[dependency-groups]
dev = [
    "pytest>=7.0.0",
    "pytest-asyncio>=0.21.0",
    "pytest-cov>=4.0.0",
    "ruff>=0.14.8",
    "pyright>=1.1.407",
]

[tool.hatch.build.targets.wheel]
packages = ["src/code_interpreter"]

[tool.ruff]
target-version = "py310"
line-length = 88

[tool.ruff.lint]
select = [
    "E",  # pycodestyle errors
    "W",  # pycodestyle warnings
    "F",  # pyflakes
    "I",  # isort
    "B",  # flake8-bugbear
    "C4", # flake8-comprehensions
    "UP", # pyupgrade
]
ignore = [
    "E501", # line too long, handled by black
    "B008", # do not perform function calls in argument defaults
    "C901", # too complex
]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]

[tool.pyright]
typeCheckingMode = "standard"
pythonVersion = "3.10"
pythonPlatform = "All"

include = ["src"]

exclude = [
    "**/node_modules",
    "**/__pycache__",
    "src/opensandbox/api/**",
]

venvPath = "."
venv = ".venv"

reportMissingImports = true
reportMissingTypeStubs = false

[tool.pytest.ini_options]
minversion = "6.0"
addopts = "-ra -q --strict-markers --strict-config"
testpaths = ["tests"]
python_files = ["test_*.py", "*_test.py"]
asyncio_mode = "auto"

[tool.coverage.run]
source = ["src"]
branch = true


[tool.uv.sources]
opensandbox = { path = "../../sandbox/python", editable = true }


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
OpenSandbox Code Interpreter SDK.

This package provides secure, isolated code execution capabilities built on top
of the OpenSandbox infrastructure. It supports multiple programming languages,
session management, and variable persistence across executions.
"""

from importlib.metadata import PackageNotFoundError
from importlib.metadata import version as _pkg_version

from code_interpreter.code_interpreter import CodeInterpreter
from code_interpreter.models.code import (
    CodeContext,
    SupportedLanguage,
)
from code_interpreter.sync.code_interpreter import CodeInterpreterSync

__all__ = [
    "CodeInterpreter",
    "CodeInterpreterSync",
    "CodeContext",
    "SupportedLanguage",
]

try:
    __version__ = _pkg_version("opensandbox-code-interpreter")
except PackageNotFoundError:  # pragma: no cover
    # Fallback for editable/uninstalled source checkouts.
    __version__ = "0.0.0"


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/adapters/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Adapter implementations for code execution services.
"""

from code_interpreter.adapters.code_adapter import CodesAdapter
from code_interpreter.adapters.converter.code_execution_converter import (
    CodeExecutionConverter,
)
from code_interpreter.adapters.factory import AdapterFactory

__all__ = [
    "CodesAdapter",
    "CodeExecutionConverter",
    "AdapterFactory",
]


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/adapters/code_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Adapter implementation for code execution service.

Provides the concrete implementation of Codes by wrapping auto-generated
API clients and handling SSE streaming for real-time code execution.
"""

import json
import logging
import time

import httpx
from opensandbox.adapters.converter.event_node import EventNode
from opensandbox.adapters.converter.exception_converter import (
    ExceptionConverter,
)
from opensandbox.adapters.converter.execution_event_dispatcher import (
    ExecutionEventDispatcher,
)
from opensandbox.adapters.converter.response_handler import (
    extract_request_id,
    handle_api_error,
    require_parsed,
)
from opensandbox.config import ConnectionConfig
from opensandbox.exceptions import InvalidArgumentException, SandboxApiException
from opensandbox.models.execd import Execution, ExecutionHandlers
from opensandbox.models.sandboxes import SandboxEndpoint

from code_interpreter.adapters.converter.code_execution_converter import (
    CodeExecutionConverter,
)
from code_interpreter.models.code import CodeContext, SupportedLanguage
from code_interpreter.services.code import Codes

logger = logging.getLogger(__name__)


def _normalize_sse_event(event_dict: dict) -> dict:
    if "type" in event_dict and "timestamp" in event_dict:
        return event_dict
    if "code" in event_dict and "message" in event_dict:
        return {
            "type": "error",
            "timestamp": int(time.time() * 1000),
            "error": {
                "ename": str(event_dict["code"]),
                "evalue": str(event_dict["message"]),
                "traceback": [],
            },
        }
    return event_dict


class CodesAdapter(Codes):
    """
    Adapter implementation for code execution service.

    This adapter wraps auto-generated API clients and provides the concrete
    implementation of the Codes interface. It handles both standard
    API calls and SSE streaming for real-time code execution output.

    Similar to CommandServiceAdapter, this adapter uses:
    - Generated API clients for simple operations (create_context, interrupt)
    - Direct httpx SSE streaming for run
    - ExceptionConverter for unified exception handling
    """

    RUN_CODE_PATH = "/code"
    CREATE_CONTEXT_PATH = "/code/context"

    def __init__(
        self, execd_endpoint: SandboxEndpoint, connection_config: ConnectionConfig
    ) -> None:
        """
        Initialize the code service adapter.

        Args:
            execd_endpoint: Endpoint for execd daemon connection
            connection_config: Shared connection configuration (transport, headers, timeouts)
        """
        self.execd_endpoint = execd_endpoint
        self.connection_config = connection_config
        from opensandbox.api.execd import Client

        protocol = self.connection_config.protocol
        base_url = f"{protocol}://{self.execd_endpoint.endpoint}"
        timeout_seconds = self.connection_config.request_timeout.total_seconds()
        timeout = httpx.Timeout(timeout_seconds)

        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
            **self.execd_endpoint.headers,
        }

        # Execd API does not require authentication
        self._client = Client(
            base_url=base_url,
            timeout=timeout,
        )

        # Inject httpx client (adapter-owned)
        self._httpx_client = httpx.AsyncClient(
            base_url=base_url,
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client.set_async_httpx_client(self._httpx_client)

        # SSE client (read timeout disabled)
        sse_headers = {
            **headers,
            "Accept": "text/event-stream",
            "Cache-Control": "no-cache",
        }
        self._sse_client = httpx.AsyncClient(
            headers=sse_headers,
            timeout=httpx.Timeout(
                connect=timeout_seconds,
                read=None,
                write=timeout_seconds,
                pool=None,
            ),
            transport=self.connection_config.transport,
        )

    async def _get_client(self):
        """Return the client for execd API (no auth required)."""
        return self._client

    def _get_execd_url(self, path: str) -> str:
        """Build URL for execd endpoint."""
        protocol = self.connection_config.protocol
        return f"{protocol}://{self.execd_endpoint.endpoint}{path}"

    async def _get_sse_client(self) -> httpx.AsyncClient:
        """Return SSE client (read timeout disabled) for execd streaming."""
        return self._sse_client

    async def create_context(self, language: str) -> CodeContext:
        """
        Creates a new execution context for code interpretation.

        Uses the generated API client for this non-streaming operation.
        """
        try:
            from opensandbox.api.execd.api.code_interpreting import create_code_context
            from opensandbox.api.execd.models.code_context_request import (
                CodeContextRequest,
            )

            client = await self._get_client()
            api_request = CodeContextRequest(language=language)

            response_obj = await create_code_context.asyncio_detailed(
                client=client,
                body=api_request,
            )

            handle_api_error(response_obj, "Create code context")
            from opensandbox.api.execd.models.code_context import (
                CodeContext as ApiCodeContext,
            )

            parsed = require_parsed(response_obj, ApiCodeContext, "Create code context")
            return CodeExecutionConverter.from_api_code_context(parsed)

        except Exception as e:
            logger.error("Failed to create context", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def get_context(self, context_id: str) -> CodeContext:
        try:
            from opensandbox.api.execd.api.code_interpreting import get_context
            from opensandbox.api.execd.models.code_context import (
                CodeContext as ApiCodeContext,
            )

            client = await self._get_client()
            response_obj = await get_context.asyncio_detailed(
                client=client,
                context_id=context_id,
            )
            handle_api_error(response_obj, "Get code context")
            parsed = require_parsed(response_obj, ApiCodeContext, "Get code context")
            return CodeExecutionConverter.from_api_code_context(parsed)
        except Exception as e:
            logger.error("Failed to get context", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def list_contexts(self, language: str) -> list[CodeContext]:
        try:
            from opensandbox.api.execd.api.code_interpreting import list_contexts

            client = await self._get_client()
            response_obj = await list_contexts.asyncio_detailed(
                client=client,
                language=language,
            )
            handle_api_error(response_obj, "List code contexts")
            parsed_list = require_parsed(response_obj, list, "List code contexts")
            return [CodeExecutionConverter.from_api_code_context(c) for c in parsed_list]
        except Exception as e:
            logger.error("Failed to list contexts", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def delete_context(self, context_id: str) -> None:
        try:
            from opensandbox.api.execd.api.code_interpreting import delete_context

            client = await self._get_client()
            response_obj = await delete_context.asyncio_detailed(
                client=client,
                context_id=context_id,
            )
            handle_api_error(response_obj, "Delete code context")
        except Exception as e:
            logger.error("Failed to delete context", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def delete_contexts(self, language: str) -> None:
        try:
            from opensandbox.api.execd.api.code_interpreting import (
                delete_contexts_by_language,
            )

            client = await self._get_client()
            response_obj = await delete_contexts_by_language.asyncio_detailed(
                client=client,
                language=language,
            )
            handle_api_error(response_obj, "Delete code contexts by language")
        except Exception as e:
            logger.error("Failed to delete contexts", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def run(
        self,
        code: str,
        *,
        language: str | None = None,
        context: CodeContext | None = None,
        handlers: ExecutionHandlers | None = None,
    ) -> Execution:
        """
        Executes code within the specified context using SSE streaming.

        Similar to CommandServiceAdapter.run, this uses direct httpx
        streaming to handle SSE responses from the execd service.
        """
        if not code.strip():
            raise InvalidArgumentException("Code cannot be empty")

        try:
            if context is not None and language is not None and context.language != language:
                raise InvalidArgumentException(
                    f"language '{language}' must match context.language '{context.language}'"
                )

            # Default context: language default context (server-side behavior).
            # When context.id is omitted, execd will create/reuse a default session per language.
            if context is None:
                context = CodeContext(language=language or SupportedLanguage.PYTHON)
            api_request = CodeExecutionConverter.to_api_run_code_request(code, context)

            # Prepare URL
            url = self._get_execd_url(self.RUN_CODE_PATH)

            execution = Execution(
                id=None,
                execution_count=None,
                result=[],
                error=None,
            )

            # Use SSE client for streaming responses (read timeout disabled)
            client = await self._get_sse_client()

            # Use streaming request for SSE
            async with client.stream("POST", url, json=api_request) as response:
                if response.status_code != 200:
                    await response.aread()
                    error_body = response.text
                    logger.error(
                        "Failed to run code. Status: %s, Body: %s",
                        response.status_code,
                        error_body,
                    )
                    raise SandboxApiException(
                        message=f"Failed to run code. Status code: {response.status_code}",
                        status_code=response.status_code,
                        request_id=extract_request_id(response.headers),
                    )

                dispatcher = ExecutionEventDispatcher(execution, handlers)

                async for line in response.aiter_lines():
                    if not line.strip():
                        continue

                    # Handle potential SSE format "data: ..."
                    data = line
                    if data.startswith("data:"):
                        data = data[5:].strip()

                    try:
                        event_dict = _normalize_sse_event(json.loads(data))
                        event_node = EventNode(**event_dict)
                        await dispatcher.dispatch(event_node)
                    except json.JSONDecodeError:
                        logger.debug("Failed to parse SSE line: %s", line)
                        continue
                    except Exception as e:
                        logger.error("Error processing event: %s", data, exc_info=e)
                        continue

            return execution

        except Exception as e:
            logger.error(
                "Failed to run code (length: %s)", len(code), exc_info=e
            )
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def interrupt(self, execution_id: str) -> None:
        """
        Interrupts a currently running code execution.

        Uses the generated API client for this operation.
        """
        try:
            from opensandbox.api.execd.api.code_interpreting import interrupt_code

            client = await self._get_client()
            response_obj = await interrupt_code.asyncio_detailed(
                client=client,
                id=execution_id,
            )

            handle_api_error(response_obj, "Interrupt code execution")

        except Exception as e:
            logger.error("Failed to interrupt code execution", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/adapters/converter/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Model converters for code execution adapters.
"""

from code_interpreter.adapters.converter.code_execution_converter import (
    CodeExecutionConverter,
)

__all__ = [
    "CodeExecutionConverter",
]


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/adapters/converter/code_execution_converter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Converter for code execution models between domain and API layers.

Handles the transformation of code execution requests and contexts
between the domain model and auto-generated API client models.
"""

from typing import Any

from opensandbox.api.execd.models import CodeContext as ApiCodeContext

from code_interpreter.models.code import CodeContext


class CodeExecutionConverter:
    """
    Converts code execution models between domain and API representations.
    """

    @staticmethod
    def to_api_run_code_request(code: str, context: CodeContext | None) -> dict[str, Any]:
        """
        Converts domain code + context to API request dictionary.

        Args:
            code: Source code to execute
            context: Optional execution context (language + optional id)

        Returns:
            Dictionary representation for API call
        """
        result: dict[str, Any] = {"code": code}

        if context is not None:
            result["context"] = CodeExecutionConverter.to_api_code_context(context)

        return result

    @staticmethod
    def to_api_code_context(context: CodeContext) -> dict[str, Any]:
        """
        Converts domain CodeContext to API context dictionary.

        Args:
            context: Domain model code context

        Returns:
            Dictionary representation for API call
        """
        result: dict[str, Any] = {
            "language": context.language,
        }

        if context.id:
            result["id"] = context.id

        return result

    @staticmethod
    def from_api_code_context(api_context: ApiCodeContext) -> CodeContext:
        """
        Converts API CodeContextResponse to domain CodeContext.

        Args:
            api_context: API response from create_code_context

        Returns:
            Domain model code context
        """
        from opensandbox.api.execd.types import Unset

        context_id = None if isinstance(api_context.id, Unset) else api_context.id

        return CodeContext(
            id=context_id,
            language=api_context.language
        )

    @staticmethod
    def from_api_code_context_dict(api_context: dict[str, Any]) -> CodeContext:
        """
        Converts API code context dictionary to domain CodeContext.

        Args:
            api_context: API response dictionary containing context data

        Returns:
            Domain model code context
        """
        return CodeContext(
            id=api_context.get("id"),
            language=api_context.get("language", "python")
        )


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/adapters/factory.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Factory for creating code interpreter services.

Provides a centralized way to create and configure code execution services
with proper dependency injection and configuration management.
"""

from opensandbox.config import ConnectionConfig
from opensandbox.models.sandboxes import SandboxEndpoint

from code_interpreter.adapters.code_adapter import CodesAdapter
from code_interpreter.services.code import Codes


class AdapterFactory:
    """
    Factory for creating code interpreter service instances.

    This factory handles the creation of code execution services with proper
    configuration and dependency injection, ensuring all services have access
    to the required HTTP client and endpoint configuration.
    """

    def __init__(self, connection_config: ConnectionConfig) -> None:
        """
        Initialize the factory with shared connection configuration.

        Args:
            connection_config: Shared connection configuration (transport, headers, timeouts)
        """
        self.connection_config = connection_config

    def create_code_execution_service(self, endpoint: SandboxEndpoint) -> Codes:
        """
        Create a code execution service for the specified endpoint.

        Args:
            endpoint: Sandbox endpoint for code execution services.

        Returns:
            Configured code service instance.
        """
        return CodesAdapter(endpoint, self.connection_config)


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/code_interpreter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Code Interpreter SDK providing secure, isolated code execution capabilities.

This module provides the main CodeInterpreter class that extends basic Sandbox
functionality with specialized code execution features, including multi-language
support, session management, and variable persistence.
"""

import logging

from opensandbox.exceptions import (
    InvalidArgumentException,
    SandboxException,
    SandboxInternalException,
)
from opensandbox.sandbox import Sandbox

from code_interpreter.adapters.factory import AdapterFactory
from code_interpreter.services.code import Codes

logger = logging.getLogger(__name__)


class CodeInterpreter:
    """
    Code Interpreter SDK providing secure, isolated code execution capabilities.

    This class extends the basic Sandbox functionality with specialized code execution features,
    including multi-language support, session management, and variable persistence.

    Key Features:

    - Multi-language Code Execution: Support for Python, JavaScript, Bash, Java, Kotlin
    - Session Management: Persistent execution contexts with variable state
    - Sandbox Integration: Full access to underlying sandbox file system and command execution
    - Streaming Execution: Real-time code execution with output streaming
    - Variable Inspection: Access to execution variables and state

    Usage Example:

    ```python
    # First create a sandbox instance

    sandbox = await Sandbox.create(
        "python:3.11",
        resource={"cpu": "1", "memory": "2Gi"}
    )

    # Then create a code interpreter wrapping the sandbox
    interpreter = await CodeInterpreter.create(sandbox=sandbox)

    # Execute code with context
    from code_interpreter.models.code import SupportedLanguage
    context = await interpreter.codes.create_context(SupportedLanguage.PYTHON)
    result = await interpreter.codes.run("print('Hello World')", context=context)
    print(result.logs.stdout)  # Output: Hello World

    # Access underlying sandbox for file operations
    await interpreter.sandbox.files.write_files([
        WriteEntry(path="data.txt", data="Hello")
    ])
    file_result = await interpreter.codes.run(
        "with open('data.txt') as f: print(f.read())",
        context=context,
    )

    # Always clean up resources
    await sandbox.kill()
    await sandbox.close()
    ```
    """

    def __init__(self, sandbox: Sandbox, code_service: Codes) -> None:
        """
        Initialize CodeInterpreter with sandbox and code service.

        Note: This constructor is for internal use. Use CodeInterpreter.create() instead.

        Args:
            sandbox: Underlying sandbox instance
            code_service: Code execution implementation
        """
        self._sandbox = sandbox
        self._code_service = code_service

    @property
    def sandbox(self) -> Sandbox:
        """
        Provides access to the underlying sandbox instance.

        Returns:
            The underlying sandbox instance
        """
        return self._sandbox

    @property
    def id(self) -> str:
        """
        Gets the unique identifier of this code interpreter (same as underlying sandbox ID).

        Returns:
            ID of the code interpreter/sandbox
        """
        return self._sandbox.id

    @property
    def files(self):
        """
        Provides access to file system operations within the sandbox.

        Allows writing, reading, listing, and deleting files and directories.

        Returns:
            Service for filesystem manipulation
        """
        return self._sandbox.files

    @property
    def commands(self):
        """
        Provides access to command execution operations.

        Allows running shell commands, capturing output, and managing processes.

        Returns:
            Service for command execution
        """
        return self._sandbox.commands

    @property
    def metrics(self):
        """
        Provides access to sandbox metrics and monitoring.

        Allows retrieving resource usage statistics (CPU, memory) and other performance metrics.

        Returns:
            Service for metrics retrieval
        """
        return self._sandbox.metrics

    @property
    def codes(self) -> Codes:
        """
        Provides access to code execution operations.

        This service enables:
        - Multi-language code execution (Python, JavaScript, Bash, etc.)
        - Execution context management with persistent variables
        - Real-time output streaming and interruption capabilities

        Returns:
            Service for advanced code execution with session support
        """
        return self._code_service

    @classmethod
    async def create(cls, sandbox: Sandbox) -> "CodeInterpreter":
        """
        Creates a CodeInterpreter from an existing Sandbox instance.

        This factory method handles the creation and initialization of CodeInterpreter
        services, including the code execution service and language configuration.

        CodeInterpreter must be created by wrapping an existing Sandbox instance with
        code execution capabilities. This design ensures clear separation of concerns:
        - Sandbox handles infrastructure (containers, resources, networking)
        - CodeInterpreter adds code execution capabilities on top

        Args:
            sandbox: Existing sandbox instance to wrap with code execution capabilities

        Returns:
            CodeInterpreter instance wrapping the sandbox

        Raises:
            InvalidArgumentException: If sandbox is not provided
            SandboxException: If creation fails
            SandboxInternalException: If internal service initialization fails
        """
        if sandbox is None:
            raise InvalidArgumentException("Sandbox instance must be provided")

        logger.info("Creating code interpreter from sandbox: %s", sandbox.id)

        factory = AdapterFactory(sandbox.connection_config)

        try:
            # Connect to the execd daemon endpoint for code execution services
            from opensandbox.constants import DEFAULT_EXECD_PORT
            code_interpreter_endpoint = await sandbox.get_endpoint(DEFAULT_EXECD_PORT)
            code_execution_service = factory.create_code_execution_service(code_interpreter_endpoint)

            logger.info("Code interpreter %s created successfully", sandbox.id)

            return cls(sandbox, code_execution_service)
        except Exception as e:
            if isinstance(e, SandboxException):
                raise
            raise SandboxInternalException(
                f"Failed to create code interpreter: {e}", cause=e
            ) from e


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/models/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Data models for code execution and interpretation.
"""

from code_interpreter.models.code import (
    CodeContext,
    SupportedLanguage,
)

__all__ = [
    "CodeContext",
    "SupportedLanguage",
]


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/models/code.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Code execution models.

Models for code contexts, execution requests, and language support.
"""


from pydantic import BaseModel, ConfigDict, Field, field_validator


class SupportedLanguage:
    """
    Supported programming languages for code execution.

    This class defines the languages that are officially supported by the code interpreter.
    When adding new languages, ensure corresponding execution environments are available.
    """
    PYTHON = "python"
    JAVA = "java"
    GO = "go"
    TYPESCRIPT = "typescript"
    BASH = "bash"
    JAVASCRIPT = "javascript"


class CodeContext(BaseModel):
    """
    Represents an execution context for code interpretation.

    A CodeContext maintains the execution environment for a specific programming
    language, including the working directory, language configuration, and
    persistent state across multiple code executions.

    Context Lifecycle:

    1. Creation: Context is created with language and working directory
    2. Execution: Code runs within this context, building up state
    3. Persistence: Variables, imports, and functions persist between executions
    4. Cleanup: Context can be explicitly destroyed or garbage collected
    """

    id: str | None = Field(default=None, description="Unique identifier for this execution context")
    language: str = Field(description="Programming language for this context (e.g., 'python', 'javascript')")

    @field_validator('language')
    @classmethod
    def language_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Language cannot be blank")
        return v


    model_config = ConfigDict(arbitrary_types_allowed=True)


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/models/code_sync.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous code execution models for Code Interpreter SDK.
"""

from pydantic import BaseModel, Field, field_validator


class SupportedLanguageSync:
    # kept for symmetry; values match SupportedLanguage
    PYTHON = "python"
    JAVA = "java"
    GO = "go"
    TYPESCRIPT = "typescript"
    BASH = "bash"


class CodeContextSync(BaseModel):
    id: str | None = Field(default=None)
    language: str = Field(description="Programming language for this context")

    @field_validator("language")
    @classmethod
    def language_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Language cannot be blank")
        return v


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/py.typed
================================================


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/services/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Services for code execution and interpretation.
"""

from code_interpreter.services.code import Codes

__all__ = [
    "Codes",
]


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/services/code.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Code execution service interface.

Defines the contract for multi-language code interpretation with context management,
session persistence, and real-time execution capabilities.
"""

from typing import Protocol, overload

from opensandbox.models.execd import Execution, ExecutionHandlers

from code_interpreter.models.code import CodeContext


class Codes(Protocol):
    """
    Code execution service for multi-language code interpretation.

    This service provides advanced code execution capabilities with context management,
    session persistence, and multi-language support. It extends basic command execution
    with interpreter-specific features like variable inspection and execution history.

    Supported Languages:

    - Python: Full Python 3.x support with package management
    - JavaScript/Node.js: ES6+ with npm package support
    - Bash: Shell scripting with full system access
    - Java: Compilation and execution with classpath management
    - Kotlin: Script and compiled Kotlin execution

    Key Features:

    - Execution Contexts: Isolated environments with persistent state
    - Variable Persistence: Variables and imports persist across executions
    - Real-time Interruption: Stop long-running code execution safely
    - Output Streaming: Real-time stdout/stderr with proper buffering
    - Error Handling: Language-specific error parsing and reporting

    Usage Example:

    ```python
    # Create execution context
    context = await code_service.create_context(SupportedLanguage.PYTHON)

    # Execute code with persistent state
    result1 = await code_service.run(
        "import numpy as np; x = 42",
        context=context,
    )

    result2 = await code_service.run(
        "print(f'Value: {x}, NumPy version: {np.__version__}')",
        context=context,
    )
    # Variables 'x' and 'np' persist between executions
    ```
    """

    async def create_context(self, language: str) -> CodeContext:
        """
        Creates a new execution context for code interpretation.

        An execution context maintains the state of variables, imports, and working
        directory across multiple code executions. This allows for interactive
        programming sessions where subsequent code can reference previously
        defined variables and functions.

        Args:
            language: The programming language for this context (e.g., "python", "javascript")

        Returns:
            A new CodeContext with the specified configuration

        Raises:
            SandboxException: If the language is not supported or context creation fails
        """
        ...

    async def get_context(self, context_id: str) -> CodeContext:
        """
        Get an existing execution context by id.

        Args:
            context_id: Context/session id

        Returns:
            The existing CodeContext
        """
        ...

    async def list_contexts(self, language: str) -> list[CodeContext]:
        """
        List active contexts under a given language/runtime.

        Args:
            language: Execution runtime (e.g. "python", "bash")

        Returns:
            List of contexts
        """
        ...

    async def delete_context(self, context_id: str) -> None:
        """
        Delete an execution context by id.

        Args:
            context_id: Context/session id to delete
        """
        ...

    async def delete_contexts(self, language: str) -> None:
        """
        Delete all execution contexts under a given language/runtime.

        Args:
            language: Execution runtime (e.g. "python", "bash")
        """
        ...

    @overload
    async def run(
        self,
        code: str,
        *,
        context: CodeContext,
        handlers: ExecutionHandlers | None = None,
    ) -> Execution: ...

    @overload
    async def run(
        self,
        code: str,
        *,
        language: str,
        handlers: ExecutionHandlers | None = None,
    ) -> Execution: ...

    async def run(
        self,
        code: str,
        *,
        language: str | None = None,
        context: CodeContext | None = None,
        handlers: ExecutionHandlers | None = None,
    ) -> Execution:
        """
        Executes code within the specified context.

        This method runs the provided code string in the language interpreter,
        capturing all output, errors, and execution metadata. The execution
        happens within the context's environment, preserving variable state
        and working directory.

        Execution Behavior:

        - Asynchronous: Non-blocking execution with proper async handling
        - Stateful: Variables and imports persist in the context
        - Streaming: Output is captured in real-time as it's produced
        - Interruptible: Can be stopped using interrupt() method

        Args:
            code: Source code to execute.
            language: Convenience language selector for this run. If provided and ``context`` is None,
                a **default context for this language** is used (execd will create/reuse a default
                session when ``context.id`` is omitted). If both ``language`` and ``context`` are
                provided, they must match.
            context: Execution context (language + optional id). If None, the default Python context is used.
            handlers: Optional streaming handlers for stdout/stderr/events.

        Returns:
            Execution with stdout, stderr, exit code, and execution metadata

        Raises:
            SandboxException: If execution fails or times out
        """
        ...

    async def interrupt(self, execution_id: str) -> None:
        """
        Interrupts a currently running code execution.

        This method safely terminates a running code execution, cleaning up
        resources and ensuring the interpreter remains in a consistent state.
        The interruption is cooperative and may take some time to complete.

        Interruption Behavior:

        - Safe: Preserves interpreter state and doesn't corrupt the context
        - Cooperative: Respects language-specific interruption mechanisms
        - Timeout: Will force-kill after a reasonable timeout if needed

        Args:
            execution_id: The unique identifier of the execution to interrupt

        Raises:
            SandboxException: If interruption fails
        """
        ...


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/sync/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from code_interpreter.sync.code_interpreter import CodeInterpreterSync

__all__ = ["CodeInterpreterSync"]


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/sync/adapters/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Adapter implementations for Code Interpreter sync services.
"""

from code_interpreter.sync.adapters.code_adapter import CodesAdapterSync
from code_interpreter.sync.adapters.factory import AdapterFactorySync

__all__ = [
    "AdapterFactorySync",
    "CodesAdapterSync",
]


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/sync/adapters/code_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous adapter for code execution service (including SSE streaming).
"""

import json
import logging
import time

import httpx
from opensandbox.adapters.converter.event_node import EventNode
from opensandbox.adapters.converter.exception_converter import (
    ExceptionConverter,
)
from opensandbox.adapters.converter.response_handler import (
    extract_request_id,
    handle_api_error,
    require_parsed,
)
from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.exceptions import InvalidArgumentException, SandboxApiException
from opensandbox.models.execd import Execution
from opensandbox.models.execd_sync import ExecutionHandlersSync
from opensandbox.models.sandboxes import SandboxEndpoint
from opensandbox.sync.adapters.converter.execution_event_dispatcher import (
    ExecutionEventDispatcherSync,
)

from code_interpreter.models.code_sync import CodeContextSync, SupportedLanguageSync
from code_interpreter.sync.services.code import CodesSync

logger = logging.getLogger(__name__)


def _normalize_sse_event(event_dict: dict) -> dict:
    if "type" in event_dict and "timestamp" in event_dict:
        return event_dict
    if "code" in event_dict and "message" in event_dict:
        return {
            "type": "error",
            "timestamp": int(time.time() * 1000),
            "error": {
                "ename": str(event_dict["code"]),
                "evalue": str(event_dict["message"]),
                "traceback": [],
            },
        }
    return event_dict


class CodesAdapterSync(CodesSync):
    """
    Synchronous adapter for code execution service.

    This adapter is the sync counterpart of :class:`code_interpreter.adapters.code_adapter.CodesAdapter`.
    It wraps the generated execd API client for non-streaming operations and uses direct ``httpx``
    streaming for SSE output while running code.

    Notes:

    - ``run`` performs blocking SSE streaming via ``httpx.Client.stream``.
    - Each SSE line is parsed into an :class:`EventNode` and dispatched via
      :class:`ExecutionEventDispatcherSync` to update the shared :class:`Execution` object
      and invoke any user-provided handlers.
    """

    RUN_CODE_PATH = "/code"
    CREATE_CONTEXT_PATH = "/code/context"

    def __init__(self, execd_endpoint: SandboxEndpoint, connection_config: ConnectionConfigSync) -> None:
        """
        Initialize the code service adapter (sync).

        Args:
            execd_endpoint: Endpoint for execd daemon connection
            connection_config: Shared connection configuration (transport, headers, timeouts)
        """
        self.execd_endpoint = execd_endpoint
        self.connection_config = connection_config
        from opensandbox.api.execd import Client

        base_url = f"{self.connection_config.protocol}://{self.execd_endpoint.endpoint}"
        timeout_seconds = self.connection_config.request_timeout.total_seconds()
        timeout = httpx.Timeout(timeout_seconds)
        headers = {"User-Agent": self.connection_config.user_agent, **self.connection_config.headers}

        self._client = Client(base_url=base_url, timeout=timeout)
        self._httpx_client = httpx.Client(
            base_url=base_url,
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client.set_httpx_client(self._httpx_client)

        sse_headers = {**headers, "Accept": "text/event-stream", "Cache-Control": "no-cache"}
        self._sse_client = httpx.Client(
            headers=sse_headers,
            timeout=httpx.Timeout(connect=timeout_seconds, read=None, write=timeout_seconds, pool=None),
            transport=self.connection_config.transport,
        )

    def _get_execd_url(self, path: str) -> str:
        """Build URL for execd endpoint."""
        return f"{self.connection_config.protocol}://{self.execd_endpoint.endpoint}{path}"

    def create_context(self, language: str) -> CodeContextSync:
        """
        Create a new execution context for code interpretation (sync).

        Uses the generated API client for this non-streaming operation.
        """
        try:
            from opensandbox.api.execd.api.code_interpreting import create_code_context
            from opensandbox.api.execd.models.code_context import (
                CodeContext as ApiCodeContext,
            )
            from opensandbox.api.execd.models.code_context_request import (
                CodeContextRequest,
            )
            from opensandbox.api.execd.types import Unset

            response_obj = create_code_context.sync_detailed(
                client=self._client,
                body=CodeContextRequest(language=language),
            )
            handle_api_error(response_obj, "Create code context")
            parsed = require_parsed(response_obj, ApiCodeContext, "Create code context")
            context_id = None if isinstance(parsed.id, Unset) else parsed.id
            return CodeContextSync(id=context_id, language=parsed.language)
        except Exception as e:
            logger.error("Failed to create context", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def get_context(self, context_id: str) -> CodeContextSync:
        try:
            from opensandbox.api.execd.api.code_interpreting import get_context
            from opensandbox.api.execd.models.code_context import (
                CodeContext as ApiCodeContext,
            )
            from opensandbox.api.execd.types import Unset

            response_obj = get_context.sync_detailed(
                client=self._client,
                context_id=context_id,
            )
            handle_api_error(response_obj, "Get code context")
            parsed = require_parsed(response_obj, ApiCodeContext, "Get code context")
            context_id_val = None if isinstance(parsed.id, Unset) else parsed.id
            return CodeContextSync(id=context_id_val, language=parsed.language)
        except Exception as e:
            logger.error("Failed to get context", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def list_contexts(self, language: str) -> list[CodeContextSync]:
        try:
            from opensandbox.api.execd.api.code_interpreting import list_contexts
            from opensandbox.api.execd.types import UNSET

            response_obj = list_contexts.sync_detailed(
                client=self._client,
                language=language,
            )
            handle_api_error(response_obj, "List code contexts")
            parsed_list = require_parsed(response_obj, list, "List code contexts")
            result: list[CodeContextSync] = []
            for c in parsed_list:
                # c is an API CodeContext model
                context_id_val = c.id if c.id is not UNSET else None
                result.append(CodeContextSync(id=context_id_val, language=c.language))
            return result
        except Exception as e:
            logger.error("Failed to list contexts", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def delete_context(self, context_id: str) -> None:
        try:
            from opensandbox.api.execd.api.code_interpreting import delete_context

            response_obj = delete_context.sync_detailed(
                client=self._client,
                context_id=context_id,
            )
            handle_api_error(response_obj, "Delete code context")
        except Exception as e:
            logger.error("Failed to delete context", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def delete_contexts(self, language: str) -> None:
        try:
            from opensandbox.api.execd.api.code_interpreting import (
                delete_contexts_by_language,
            )

            response_obj = delete_contexts_by_language.sync_detailed(
                client=self._client,
                language=language,
            )
            handle_api_error(response_obj, "Delete code contexts by language")
        except Exception as e:
            logger.error("Failed to delete contexts", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def run(
        self,
        code: str,
        *,
        language: str | None = None,
        context: CodeContextSync | None = None,
        handlers: ExecutionHandlersSync | None = None,
    ) -> Execution:
        """
        Execute code within the specified context using SSE streaming (sync).

        Args:
            code: Source code to execute.
            context: Execution context (language + optional id). If None, a temporary Python context is used.
            handlers: Optional streaming handlers for stdout/stderr/events.

        Returns:
            Execution result populated incrementally while streaming events

        Raises:
            InvalidArgumentException: if code is empty
            SandboxApiException: if execd returns a non-200 response
            SandboxException: for other errors converted by :class:`ExceptionConverter`
        """
        if not code.strip():
            raise InvalidArgumentException("Code cannot be empty")

        try:
            if context is not None and language is not None and context.language != language:
                raise InvalidArgumentException(
                    f"language '{language}' must match context.language '{context.language}'"
                )

            if context is None:
                # Default context: language default context (server-side behavior).
                # When context.id is omitted, execd will create/reuse a default session per language.
                context = CodeContextSync(language=language or SupportedLanguageSync.PYTHON)
            api_request = {
                "code": code,
                "context": {
                    "language": context.language,
                    **({"id": context.id} if context.id else {}),
                },
            }

            url = self._get_execd_url(self.RUN_CODE_PATH)
            execution = Execution(id=None, execution_count=None, result=[], error=None)
            dispatcher = ExecutionEventDispatcherSync(execution, handlers)

            with self._sse_client.stream("POST", url, json=api_request) as response:
                if response.status_code != 200:
                    response.read()
                    raise SandboxApiException(
                        message=f"Failed to run code. Status code: {response.status_code}",
                        status_code=response.status_code,
                        request_id=extract_request_id(response.headers),
                    )

                for line in response.iter_lines():
                    if not line or not line.strip():
                        continue
                    data = line
                    if data.startswith("data:"):
                        data = data[5:].strip()
                    try:
                        event_dict = _normalize_sse_event(json.loads(data))
                        event_node = EventNode(**event_dict)
                        dispatcher.dispatch(event_node)
                    except json.JSONDecodeError:
                        logger.debug("Failed to parse SSE line: %s", line)
                        continue
                    except Exception as e:
                        logger.error("Error processing event: %s", data, exc_info=e)
                        continue

            return execution
        except Exception as e:
            logger.error("Failed to run code (length: %s)", len(code), exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def interrupt(self, execution_id: str) -> None:
        """
        Interrupt a currently running code execution.

        Args:
            execution_id: Execution id returned by execd for the running code execution
        """
        try:
            from opensandbox.api.execd.api.code_interpreting import interrupt_code

            response_obj = interrupt_code.sync_detailed(client=self._client, id=execution_id)
            handle_api_error(response_obj, "Interrupt code execution")
        except Exception as e:
            logger.error("Failed to interrupt code execution", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/sync/adapters/factory.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Factory for creating Code Interpreter sync services.
"""

from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.models.sandboxes import SandboxEndpoint

from code_interpreter.sync.adapters.code_adapter import CodesAdapterSync
from code_interpreter.sync.services.code import CodesSync


class AdapterFactorySync:
    """
    Factory for creating Code Interpreter sync service instances.

    This factory centralizes construction of sync services so they all share the same
    connection configuration (transport, headers, timeouts).
    """

    def __init__(self, connection_config: ConnectionConfigSync) -> None:
        """
        Initialize the factory with shared connection configuration (sync).

        Args:
            connection_config: Shared connection configuration (transport, headers, timeouts).
        """
        self.connection_config = connection_config

    def create_code_execution_service(self, endpoint: SandboxEndpoint) -> CodesSync:
        """
        Create a code execution service for the specified endpoint (sync).

        Args:
            endpoint: Sandbox endpoint for code execution services.

        Returns:
            Configured sync code service instance.
        """
        return CodesAdapterSync(endpoint, self.connection_config)


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/sync/code_interpreter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous Code Interpreter SDK.
"""

import logging

from opensandbox.constants import DEFAULT_EXECD_PORT
from opensandbox.exceptions import (
    InvalidArgumentException,
    SandboxException,
    SandboxInternalException,
)
from opensandbox.sync.sandbox import SandboxSync

from code_interpreter.sync.adapters.factory import AdapterFactorySync
from code_interpreter.sync.services.code import CodesSync

logger = logging.getLogger(__name__)


class CodeInterpreterSync:
    """
    Synchronous Code Interpreter SDK providing secure, isolated code execution capabilities.

    This class mirrors the async :class:`code_interpreter.code_interpreter.CodeInterpreter`, but all
    operations are **blocking** and executed in the current thread.

    It wraps an existing :class:`opensandbox.sync.sandbox.SandboxSync` instance and adds
    code-execution APIs (contexts, run with SSE streaming, interrupts) on top.

    Notes:

    - **Blocking**: Do not call these methods directly from an asyncio event loop thread.
      If you need non-blocking behavior, prefer the async :class:`~code_interpreter.code_interpreter.CodeInterpreter`.
    - **Lifecycle**: Remote lifecycle is owned by the underlying sandbox; call methods on
      ``interpreter.sandbox`` for pause/resume/kill/renew/metrics/info/endpoints.

    Usage Example:

    ```python
    from opensandbox.sync.sandbox import SandboxSync
    from code_interpreter.sync.code_interpreter import CodeInterpreterSync
    from code_interpreter.models.code import SupportedLanguage

    sandbox = SandboxSync.create("python:3.11")
    interpreter = CodeInterpreterSync.create(sandbox=sandbox)

    ctx = interpreter.codes.create_context(SupportedLanguage.PYTHON)
    result = interpreter.codes.run("print('hi')", context=ctx)

    sandbox.kill()
    sandbox.close()
    ```
    """

    def __init__(self, sandbox: SandboxSync, code_service: CodesSync) -> None:
        """
        Initialize CodeInterpreterSync with sandbox and code service.

        Note: This constructor is for internal use. Use :meth:`create` instead.

        Args:
            sandbox: Underlying sandbox instance
            code_service: Code execution service implementation (sync)
        """
        self._sandbox = sandbox
        self._code_service = code_service

    @property
    def sandbox(self) -> SandboxSync:
        """
        Provides access to the underlying sandbox instance.

        Returns:
            The underlying sandbox instance
        """
        return self._sandbox

    @property
    def id(self) -> str:
        """
        Gets the unique identifier of this code interpreter (same as underlying sandbox ID).

        Returns:
            ID of the code interpreter/sandbox
        """
        return self._sandbox.id

    @property
    def files(self):
        """
        Provides access to file system operations within the sandbox.

        Returns:
            Service for filesystem manipulation
        """
        return self._sandbox.files

    @property
    def commands(self):
        """
        Provides access to command execution operations.

        Returns:
            Service for command execution
        """
        return self._sandbox.commands

    @property
    def metrics(self):
        """
        Provides access to sandbox metrics and monitoring.

        Returns:
            Service for metrics retrieval
        """
        return self._sandbox.metrics

    @property
    def codes(self) -> CodesSync:
        """
        Provides access to code execution operations (sync).

        This service enables:
        - Multi-language code execution (Python, JavaScript, Bash, etc.)
        - Execution context management with persistent variables
        - Real-time output streaming and interruption capabilities

        Returns:
            Service for advanced code execution with session support
        """
        return self._code_service

    @classmethod
    def create(cls, sandbox: SandboxSync) -> "CodeInterpreterSync":
        """
        Create a CodeInterpreterSync from an existing SandboxSync instance (blocking).

        Args:
            sandbox: Existing sandbox instance to wrap with code execution capabilities

        Returns:
            CodeInterpreterSync instance wrapping the sandbox

        Raises:
            InvalidArgumentException: If sandbox is not provided
            SandboxException: If creation fails
            SandboxInternalException: If internal service initialization fails
        """
        if sandbox is None:
            raise InvalidArgumentException("Sandbox instance must be provided")

        logger.info("Creating code interpreter from sandbox: %s", sandbox.id)
        factory = AdapterFactorySync(sandbox.connection_config)
        try:
            endpoint = sandbox.get_endpoint(DEFAULT_EXECD_PORT)
            code_service = factory.create_code_execution_service(endpoint)
            logger.info("Code interpreter %s created successfully", sandbox.id)
            return cls(sandbox, code_service)
        except Exception as e:
            if isinstance(e, SandboxException):
                raise
            raise SandboxInternalException(f"Failed to create code interpreter: {e}", cause=e) from e


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/sync/services/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous service interfaces (Protocols) for the Code Interpreter sync SDK.

These interfaces mirror the async interfaces under :mod:`code_interpreter.services`,
but are **blocking** and intended for use with :class:`code_interpreter.sync.code_interpreter.CodeInterpreterSync`.
"""

from code_interpreter.sync.services.code import CodesSync

__all__ = [
    "CodesSync",
]


================================================
FILE: sdks/code-interpreter/python/src/code_interpreter/sync/services/code.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous code execution service interface.

Defines the contract for multi-language code interpretation with context management,
session persistence, and real-time execution capabilities (SSE streaming), **in blocking form**.

This is the sync counterpart of :mod:`code_interpreter.services.code`.
"""

from typing import Protocol, overload

from opensandbox.models.execd import Execution
from opensandbox.models.execd_sync import ExecutionHandlersSync

from code_interpreter.models.code_sync import CodeContextSync


class CodesSync(Protocol):
    """
    Code execution service for multi-language code interpretation (sync).

    This service provides advanced code execution capabilities with context management,
    session persistence, and multi-language support.

    Supported Languages (typical):
        - Python
        - JavaScript / TypeScript
        - Bash
        - Java
        - Kotlin (depending on server image)

    Key Features:
        - Execution Contexts: Isolated environments with persistent state
        - Variable Persistence: Variables and imports persist across executions in a context
        - Real-time Interruption: Stop long-running code execution safely
        - Output Streaming: Real-time stdout/stderr via SSE

    Notes:
        - All methods are **blocking** and executed in the current thread.
        - For non-blocking usage, prefer the async :class:`code_interpreter.services.code.Codes`.
    """

    def create_context(self, language: str) -> CodeContextSync:
        """
        Create a new execution context for code interpretation (blocking).

        An execution context maintains state (variables/imports/working directory) across
        multiple code executions, enabling interactive sessions.

        Args:
            language: The programming language for this context (e.g., "python", "typescript").

        Returns:
            A new CodeContextSync.

        Raises:
            SandboxException: If the language is not supported or context creation fails.
        """
        ...

    def get_context(self, context_id: str) -> CodeContextSync:
        """Get an existing execution context by id (blocking)."""
        ...

    def list_contexts(self, language: str) -> list[CodeContextSync]:
        """List active contexts under a given language/runtime (blocking)."""
        ...

    def delete_context(self, context_id: str) -> None:
        """Delete an execution context by id (blocking)."""
        ...

    def delete_contexts(self, language: str) -> None:
        """Delete all contexts under a language/runtime (blocking)."""
        ...

    @overload
    def run(
        self,
        code: str,
        *,
        context: CodeContextSync,
        handlers: ExecutionHandlersSync | None = None,
    ) -> Execution: ...

    @overload
    def run(
        self,
        code: str,
        *,
        language: str,
        handlers: ExecutionHandlersSync | None = None,
    ) -> Execution: ...

    def run(
        self,
        code: str,
        *,
        language: str | None = None,
        context: CodeContextSync | None = None,
        handlers: ExecutionHandlersSync | None = None,
    ) -> Execution:
        """
        Execute code within the specified context (blocking).

        This method runs the provided code string in the language interpreter, capturing output,
        errors, and execution metadata. Execution happens within the context's environment,
        preserving variable state and working directory.

        Execution behavior:
            - Blocking: The call does not return until the stream finishes.
            - Stateful: Variables and imports persist in the context.
            - Streaming: Output is processed incrementally as SSE events arrive.
            - Interruptible: Can be stopped using :meth:`interrupt`.

        Args:
            code: Source code to execute.
            language: Convenience language selector for this run. If provided and ``context`` is None,
                a **default context for this language** is used (execd will create/reuse a default
                session when ``context.id`` is omitted). If both ``language`` and ``context`` are
                provided, they must match.
            context: Execution context (language + optional id). If None, the default Python context is used.
            handlers: Optional streaming handlers for stdout/stderr/events.

        Returns:
            Execution with stdout/stderr/events and execution metadata.

        Raises:
            SandboxException: If execution fails or times out.
        """
        ...

    def interrupt(self, execution_id: str) -> None:
        """
        Interrupt a currently running code execution.

        This method attempts to safely terminate a running execution, cleaning up resources and
        keeping the interpreter in a consistent state.

        Args:
            execution_id: The unique identifier of the execution to interrupt.

        Raises:
            SandboxException: If interruption fails.
        """
        ...


================================================
FILE: sdks/code-interpreter/python/tests/test_adapter_eager_init.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pytest
from opensandbox.config import ConnectionConfig
from opensandbox.models.sandboxes import SandboxEndpoint

from code_interpreter.adapters.code_adapter import CodesAdapter


@pytest.mark.asyncio
async def test_code_service_eager_init_and_client_available() -> None:
    cfg = ConnectionConfig(protocol="http")
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)
    adapter = CodesAdapter(endpoint, cfg)

    client = await adapter._get_client()
    assert client is not None


================================================
FILE: sdks/code-interpreter/python/tests/test_code_interpreter_create_and_delegation.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pytest
from opensandbox.config import ConnectionConfig
from opensandbox.exceptions import InvalidArgumentException
from opensandbox.models.sandboxes import SandboxEndpoint

from code_interpreter import CodeInterpreter


class _FakeSandbox:
    def __init__(self) -> None:
        self._id = str(__import__("uuid").uuid4())
        self.connection_config = ConnectionConfig(protocol="http")
        self.files = object()
        self.commands = object()
        self.metrics = object()

    @property
    def id(self):
        return self._id

    async def get_endpoint(self, port: int) -> SandboxEndpoint:
        return SandboxEndpoint(endpoint="localhost:44772", port=port)

    async def is_healthy(self) -> bool:
        return True

    async def get_info(self):  # pragma: no cover
        raise RuntimeError("not used")

    async def get_metrics(self):  # pragma: no cover
        raise RuntimeError("not used")

    async def renew(self, timeout):  # pragma: no cover
        raise RuntimeError("not used")


@pytest.mark.asyncio
async def test_create_requires_sandbox() -> None:
    with pytest.raises(InvalidArgumentException):
        await CodeInterpreter.create(sandbox=None)  # type: ignore[arg-type]


@pytest.mark.asyncio
async def test_create_wires_code_service_and_delegates_properties() -> None:
    sbx = _FakeSandbox()
    ci = await CodeInterpreter.create(sandbox=sbx)  # type: ignore[arg-type]

    assert ci.id == sbx.id
    assert ci.files is sbx.files
    assert ci.commands is sbx.commands
    assert ci.metrics is sbx.metrics

    # codes service should be present and callable (no network)
    assert ci.codes is not None


================================================
FILE: sdks/code-interpreter/python/tests/test_code_service_adapter_openapi_calls.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations

import pytest
from opensandbox.config import ConnectionConfig
from opensandbox.models.sandboxes import SandboxEndpoint

from code_interpreter.adapters.code_adapter import CodesAdapter


class _Resp:
    def __init__(self, *, status_code: int, parsed) -> None:
        self.status_code = status_code
        self.parsed = parsed


@pytest.mark.asyncio
async def test_create_context_uses_openapi_and_converts(monkeypatch: pytest.MonkeyPatch) -> None:
    from opensandbox.api.execd.models.code_context import CodeContext as ApiCodeContext

    async def _fake_asyncio_detailed(*, client, body):
        assert body.language == "python"
        return _Resp(status_code=200, parsed=ApiCodeContext(language="python", id="ctx-1"))

    monkeypatch.setattr(
        "opensandbox.api.execd.api.code_interpreting.create_code_context.asyncio_detailed",
        _fake_asyncio_detailed,
    )

    adapter = CodesAdapter(
        SandboxEndpoint(endpoint="localhost:44772", port=44772),
        ConnectionConfig(protocol="http"),
    )
    ctx = await adapter.create_context("python")
    assert ctx.id == "ctx-1"
    assert ctx.language == "python"


@pytest.mark.asyncio
async def test_interrupt_calls_openapi(monkeypatch: pytest.MonkeyPatch) -> None:
    called = {"id": None}

    async def _fake_asyncio_detailed(*, client, id):
        called["id"] = id
        return _Resp(status_code=204, parsed=None)

    monkeypatch.setattr(
        "opensandbox.api.execd.api.code_interpreting.interrupt_code.asyncio_detailed",
        _fake_asyncio_detailed,
    )

    adapter = CodesAdapter(
        SandboxEndpoint(endpoint="localhost:44772", port=44772),
        ConnectionConfig(protocol="http"),
    )
    await adapter.interrupt("exec-1")
    assert called["id"] == "exec-1"


================================================
FILE: sdks/code-interpreter/python/tests/test_code_service_adapter_streaming.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations

import json

import httpx
import pytest
from opensandbox.config import ConnectionConfig
from opensandbox.exceptions import InvalidArgumentException, SandboxApiException
from opensandbox.models.sandboxes import SandboxEndpoint

from code_interpreter.adapters.code_adapter import CodesAdapter
from code_interpreter.adapters.converter.code_execution_converter import (
    CodeExecutionConverter,
)
from code_interpreter.models.code import CodeContext, SupportedLanguage


class _SseTransport(httpx.AsyncBaseTransport):
    async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
        body = request.content.decode("utf-8") if isinstance(request.content, (bytes, bytearray)) else ""
        payload = json.loads(body) if body else {}

        if request.url.path == "/code" and payload.get("code") == "print(1)":
            sse = (
                b'data: {"type":"init","text":"exec-1","timestamp":1}\n\n'
                b'data: {"type":"stdout","text":"1\\n","timestamp":2}\n\n'
                b'data: {"type":"execution_complete","timestamp":3,"execution_time":7}\n\n'
            )
            return httpx.Response(200, headers={"Content-Type": "text/event-stream"}, content=sse, request=request)

        if request.url.path == "/code" and payload.get("code") == "print(2)":
            assert payload["context"]["language"] == "go"
            sse = (
                b'data: {"type":"init","text":"exec-2","timestamp":1}\n\n'
                b'data: {"type":"stdout","text":"2\\n","timestamp":2}\n\n'
                b'data: {"type":"execution_complete","timestamp":3,"execution_time":7}\n\n'
            )
            return httpx.Response(200, headers={"Content-Type": "text/event-stream"}, content=sse, request=request)

        return httpx.Response(
            400,
            headers={"x-request-id": "req-code-123"},
            content=b"bad",
            request=request,
        )


def test_code_execution_converter_includes_context() -> None:
    ctx = CodeContext(id="c1", language=SupportedLanguage.PYTHON)
    d = CodeExecutionConverter.to_api_run_code_request("print(1)", ctx)
    assert d["code"] == "print(1)"
    assert d["context"]["id"] == "c1"
    assert d["context"]["language"] == "python"


@pytest.mark.asyncio
async def test_run_code_streaming_happy_path_updates_execution() -> None:
    cfg = ConnectionConfig(protocol="http", transport=_SseTransport())
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)
    adapter = CodesAdapter(endpoint, cfg)

    execution = await adapter.run("print(1)")
    assert execution.id == "exec-1"
    assert execution.logs.stdout[0].text == "1\n"


@pytest.mark.asyncio
async def test_run_code_can_accept_language_string_without_context() -> None:
    cfg = ConnectionConfig(protocol="http", transport=_SseTransport())
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)
    adapter = CodesAdapter(endpoint, cfg)

    execution = await adapter.run("print(2)", language=SupportedLanguage.GO)
    assert execution.id == "exec-2"
    assert execution.logs.stdout[0].text == "2\n"


@pytest.mark.asyncio
async def test_run_code_rejects_blank_code() -> None:
    cfg = ConnectionConfig(protocol="http")
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)
    adapter = CodesAdapter(endpoint, cfg)

    with pytest.raises(InvalidArgumentException):
        await adapter.run("   ")


@pytest.mark.asyncio
async def test_run_code_rejects_mismatched_language_and_context() -> None:
    cfg = ConnectionConfig(protocol="http", transport=_SseTransport())
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)
    adapter = CodesAdapter(endpoint, cfg)

    with pytest.raises(InvalidArgumentException):
        await adapter.run(
            "print(1)",
            context=CodeContext(language=SupportedLanguage.PYTHON),
            language=SupportedLanguage.GO,
        )


@pytest.mark.asyncio
async def test_run_code_non_200_raises_api_exception() -> None:
    cfg = ConnectionConfig(protocol="http", transport=_SseTransport())
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)
    adapter = CodesAdapter(endpoint, cfg)

    with pytest.raises(SandboxApiException) as ei:
        await adapter.run("other")
    assert ei.value.request_id == "req-code-123"


================================================
FILE: sdks/eslint.base.mjs
================================================
import js from "@eslint/js";
import tseslint from "typescript-eslint";
import globals from "globals";

export function createBaseConfig({
  tsconfigRootDir,
  tsconfigPath = "./tsconfig.json",
  extraIgnores = [],
  includeScripts = false,
  scriptGlobs = ["scripts/**/*.{js,mjs,cjs}"],
} = {}) {
  const ignores = ["dist/**", "node_modules/**", "coverage/**", ...extraIgnores];

  const configs = [
    { ignores },
    js.configs.recommended,
    ...tseslint.configs.recommended,
    {
      files: ["src/**/*.{ts,mts,cts}"],
      languageOptions: {
        globals: {
          ...globals.nodeBuiltin,
          ...globals.node,
        },
        parserOptions: {
          project: [tsconfigPath],
          tsconfigRootDir,
        },
      },
      extends: [
        ...tseslint.configs.stylisticTypeChecked,
      ],
      rules: {
        "@typescript-eslint/no-explicit-any": "off",
        "@typescript-eslint/no-unused-vars": [
          "error",
          { argsIgnorePattern: "^_", varsIgnorePattern: "^_" },
        ],
        "no-console": "warn",
        "no-debugger": "error",
        "no-constant-condition": "warn",
      },
    },
  ];

  if (includeScripts) {
    configs.push({
      files: scriptGlobs,
      languageOptions: {
        globals: {
          ...globals.nodeBuiltin,
          ...globals.node,
        },
      },
      rules: {
        "no-console": "off",
      },
    });
  }

  return tseslint.config(...configs);
}


================================================
FILE: sdks/mcp/sandbox/python/LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: sdks/mcp/sandbox/python/README.md
================================================
# OpenSandbox MCP Sandbox Server

## 1. Overview

OpenSandbox MCP Server exposes the OpenSandbox Python SDK as MCP tools for
Claude Code, Cursor, and other MCP-capable clients. It provides focused
sandbox lifecycle management, command execution, and text file operations.

## 2. Installation & Startup

### Source

```bash
uv sync
uv run opensandbox-mcp
```

### Package

```bash
pip install opensandbox-mcp
opensandbox-mcp
```

### Configuration

Environment variables:

- `OPEN_SANDBOX_API_KEY`
- `OPEN_SANDBOX_DOMAIN`

CLI overrides:

```bash
opensandbox-mcp --api-key ... --domain ... --protocol https
```

Config fields:

- `api_key`: OpenSandbox API key for authentication.
- `domain`: OpenSandbox API domain, for example `api.opensandbox.io`.
- `protocol`: `http` or `https` for API requests.
- `request_timeout_seconds`: HTTP request timeout in seconds.
- `transport`: `stdio` by default, or `streamable-http`.

### Streamable HTTP

```bash
opensandbox-mcp \
  --transport streamable-http
```

## 3. Integrations

### Claude Code stdio

```bash
claude mcp add opensandbox-sandbox --transport stdio -- \
  opensandbox-mcp --api-key "$OPEN_SANDBOX_API_KEY" --domain "$OPEN_SANDBOX_DOMAIN"
```

### Claude Code http

```bash
claude mcp add opensandbox-sandbox --transport http http://localhost:8000/mcp
```

### Cursor stdio

```json
{
  "mcpServers": {
    "opensandbox-sandbox": {
      "command": "opensandbox-mcp",
      "args": [
        "--api-key",
        "${OPEN_SANDBOX_API_KEY}",
        "--domain",
        "${OPEN_SANDBOX_DOMAIN}"
      ]
    }
  }
}
```

### Cursor http

```json
{
  "mcpServers": {
    "opensandbox-sandbox": {
      "url": "http://localhost:8000/mcp"
    }
  }
}
```

## 4. Tools

Notes:

- All tools operate on a `sandbox_id` returned by `sandbox_create` or `sandbox_connect`.
- `file_read`/`file_write` are text-only; use `encoding` and `range_header` for large files.

### Sandbox

- `sandbox_create`: create a new sandbox and register it locally
- `sandbox_connect`: attach to an existing sandbox and register it locally
- `sandbox_kill`: terminate a sandbox by ID
- `sandbox_get_info`: fetch sandbox info by ID
- `sandbox_list`: list sandboxes with optional `filter` object
- `sandbox_renew`: extend sandbox expiration
- `sandbox_healthcheck`: check if sandbox is healthy
- `sandbox_get_metrics`: get resource metrics
- `sandbox_get_endpoint`: get network endpoint for a port

### Command Execution

- `command_run`: run a command inside a sandbox
- `command_interrupt`: interrupt a running command

### Filesystem

- `file_read`: read a text file
- `file_write`: write a text file
- `file_delete`: delete files
- `file_search`: search for files by glob
- `file_create_directories`: create directories
- `file_delete_directories`: delete directories
- `file_move`: move/rename files or directories
- `file_replace_contents`: replace file content

## 5. Minimal Workflow

1. `sandbox_create` -> keep the `sandbox_id`.
2. `file_write` code or assets into the sandbox.
3. `command_run` to execute, install dependencies, or start a service.
4. `sandbox_get_endpoint` if you expose a port.
5. `sandbox_kill` when finished.

## 6. Usage Examples

Here are some examples of what you can ask an LLM to do:

- "Create a Python sandbox and run a quick health command."
- "Write a Python script into the sandbox and run it."
- "Download a GitHub repo, install dependencies, and run its tests."
- "Generate a CSV file with fake sales data and run a simple summary script."
- "Start a tiny web server on port 8000 and return the public URL."
- "Build a minimal REST API (hello + health) and expose it on port 8000."
- "Create a tar.gz of /app and report the file size."
- "Build a simple Snake game and return the web endpoint where it can be accessed."


================================================
FILE: sdks/mcp/sandbox/python/README_zh.md
================================================
# OpenSandbox MCP 沙箱服务（Python）

## 1. 简介

OpenSandbox MCP Server 将 OpenSandbox Python SDK 以 MCP 工具形式暴露给
Claude Code、Cursor 等客户端，提供精简的沙箱生命周期、命令执行与文本文件操作能力。

## 2. 安装和启动

### 源码方式（本地开发）

```bash
uv sync
uv run opensandbox-mcp
```

### 下载包方式

```bash
pip install opensandbox-mcp
opensandbox-mcp
```

### 配置

环境变量：

- `OPEN_SANDBOX_API_KEY`
- `OPEN_SANDBOX_DOMAIN`

CLI 覆盖：

```bash
opensandbox-mcp --api-key ... --domain ... --protocol https
```

配置项说明：

- `api_key`：OpenSandbox API Key（鉴权）。
- `domain`：OpenSandbox API 域名（如 `api.opensandbox.io`）。
- `protocol`：`http` 或 `https`。
- `request_timeout_seconds`：HTTP 请求超时（秒）。
- `transport`：`stdio`（默认）或 `streamable-http`。

### Streamable HTTP

```bash
opensandbox-mcp \
  --transport streamable-http
```

## 3. 集成案例

### Claude Code stdio

```bash
claude mcp add opensandbox-sandbox --transport stdio -- \
  opensandbox-mcp --api-key "$OPEN_SANDBOX_API_KEY" --domain "$OPEN_SANDBOX_DOMAIN"
```

### Claude Code http

```bash
claude mcp add opensandbox-sandbox --transport http http://localhost:8000/mcp
```

### Cursor stdio

```json
{
  "mcpServers": {
    "opensandbox-sandbox": {
      "command": "opensandbox-mcp",
      "args": [
        "--api-key",
        "${OPEN_SANDBOX_API_KEY}",
        "--domain",
        "${OPEN_SANDBOX_DOMAIN}"
      ]
    }
  }
}
```

### Cursor http

```json
{
  "mcpServers": {
    "opensandbox-sandbox": {
      "url": "http://localhost:8000/mcp"
    }
  }
}
```

## 4. 工具描述

说明：

- 所有工具均使用 `sandbox_create` / `sandbox_connect` 返回的 `sandbox_id`。
- `file_read` / `file_write` 仅支持文本文件；大文件可用 `encoding` 和 `range_header`。

### Sandbox 生命周期

- `sandbox_create`: 创建沙箱并注册到本地会话
- `sandbox_connect`: 连接已有沙箱并注册到本地会话
- `sandbox_get_info`: 获取沙箱信息
- `sandbox_list`: 使用 `filter` 列出沙箱
- `sandbox_renew`: 续期
- `sandbox_get_metrics`: 资源指标
- `sandbox_healthcheck`: 沙箱健康检查
- `sandbox_kill`: 终止沙箱
- `sandbox_get_endpoint`: 获取指定端口的访问地址

### 命令执行

- `command_run`: 在沙箱内执行命令
- `command_interrupt`: 中断命令

### 文件系统

- `file_read`: 读取文本文件
- `file_write`: 写文本文件
- `file_delete`: 删除文件
- `file_search`: 按 glob 搜索
- `file_create_directories`: 创建目录
- `file_delete_directories`: 删除目录
- `file_move`: 移动/重命名
- `file_replace_contents`: 替换文件内容

## 5. 最小流程

1. `sandbox_create` -> 记录 `sandbox_id`。
2. `file_write` 写入代码或资源。
3. `command_run` 执行、安装依赖或启动服务。
4. 对外暴露端口时使用 `sandbox_get_endpoint`。
5. 完成后 `sandbox_kill`。

## 6. 使用案例

下面是一些你可以让 LLM 完成的指令示例：

- "创建一个 Python 沙箱并执行健康检查命令。"
- "把一段 Python 脚本写入沙箱并执行。"
- "下载一个 GitHub 仓库，安装依赖并运行测试。"
- "生成一份销售数据 CSV，并运行简单统计脚本。"
- "启动一个 8000 端口的 Web 服务并返回公网链接。"
- "搭一个最小 REST API（hello + health）并对外暴露。"
- "把 /app 打包成 tar.gz 并报告文件大小。"
- "实现一个贪吃蛇小游戏，并且返回可访问的web链接"


================================================
FILE: sdks/mcp/sandbox/python/pyproject.toml
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[build-system]
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"

[project]
name = "opensandbox-mcp"
dynamic = ["version"]
description = "OpenSandbox MCP Sandbox Server (Python)"
authors = [
    { name = "OpenSandbox Team", email = "ninan.nn@alibaba-inc.com" }
]
license = { file = "LICENSE" }
readme = "README.md"
requires-python = ">=3.10"
keywords = ["sandbox", "mcp", "sdk", "opensandbox", "server"]
classifiers = [
    "Development Status :: 3 - Alpha",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: Apache Software License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3 :: Only",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Topic :: Software Development :: Libraries",
    "Typing :: Typed",
]
dependencies = [
    "mcp[cli]",
    "opensandbox>=0.1.4,<0.2.0",
]

[project.urls]
Homepage = "https://open-sandbox.ai"
Repository = "https://github.com/alibaba/OpenSandbox"
Documentation = "https://open-sandbox.ai"
Issues = "https://github.com/alibaba/OpenSandbox/issues"

[project.scripts]
opensandbox-mcp = "opensandbox_mcp.__main__:main"

[tool.hatch.version]
source = "vcs"

[tool.hatch.version.raw-options]
# This package is in a subdirectory; explicitly point setuptools-scm at the git root.
root = "../../../.."
tag_regex = "^python/mcp/sandbox/v(?P<version>\\d+\\.\\d+\\.\\d+(?:[\\.\\w\\+\\-]*)?)$"
git_describe_command = 'git describe --dirty --tags --long --match "python/mcp/sandbox/v*"'
fallback_version = "0.1.0"

[tool.hatch.build]
include = [
    "LICENSE",
    "src/**/py.typed",
    "src/opensandbox_mcp",
]

[tool.hatch.build.targets.wheel]
packages = ["src/opensandbox_mcp"]

[dependency-groups]
dev = [
    "pytest>=7.0.0",
    "pytest-asyncio>=0.21.0",
    "pytest-cov>=4.0.0",
    "ruff>=0.14.8",
    "pyright>=1.1.0",
]

[tool.ruff]
target-version = "py310"
line-length = 88

[tool.ruff.lint]
select = [
    "E",  # pycodestyle errors
    "W",  # pycodestyle warnings
    "F",  # pyflakes
    "I",  # isort
    "B",  # flake8-bugbear
    "C4", # flake8-comprehensions
    "UP", # pyupgrade
]
ignore = [
    "E501", # line too long, handled by formatter
    "B008", # do not perform function calls in argument defaults
    "C901", # too complex
]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]

[tool.pyright]
typeCheckingMode = "standard"
pythonVersion = "3.10"
pythonPlatform = "All"

include = ["src"]

exclude = [
    "**/node_modules",
    "**/__pycache__",
]

venvPath = "."
venv = ".venv"

reportMissingImports = true
reportMissingTypeStubs = false

[tool.pytest.ini_options]
minversion = "6.0"
addopts = "-ra -q --strict-markers --strict-config"
testpaths = ["tests"]
python_files = ["test_*.py", "*_test.py"]
asyncio_mode = "auto"

[tool.coverage.run]
source = ["src"]
branch = true

[tool.uv.sources]
opensandbox = { path = "../../../sandbox/python", editable = true }


================================================
FILE: sdks/mcp/sandbox/python/src/opensandbox_mcp/__init__.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from importlib.metadata import PackageNotFoundError
from importlib.metadata import version as _pkg_version

from opensandbox_mcp.server import create_server

try:
    __version__ = _pkg_version("opensandbox-mcp")
except PackageNotFoundError:  # pragma: no cover
    __version__ = "0.0.0"

__all__ = ["create_server"]


================================================
FILE: sdks/mcp/sandbox/python/src/opensandbox_mcp/__main__.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import argparse
from datetime import timedelta

from opensandbox.config import ConnectionConfig

from opensandbox_mcp.server import create_server


def main() -> None:
    parser = argparse.ArgumentParser(
        description="OpenSandbox MCP Sandbox server entrypoint."
    )
    parser.add_argument(
        "--transport",
        choices=("stdio", "streamable-http"),
        default="stdio",
        help="Transport to use. Default uses the MCP SDK default.",
    )
    parser.add_argument(
        "--api-key",
        default=None,
        help="OpenSandbox API key (overrides OPEN_SANDBOX_API_KEY).",
    )
    parser.add_argument(
        "--domain",
        default=None,
        help="OpenSandbox API domain (overrides OPEN_SANDBOX_DOMAIN).",
    )
    parser.add_argument(
        "--protocol",
        choices=("http", "https"),
        default="http",
        help="Protocol to use for API requests.",
    )
    parser.add_argument(
        "--request-timeout-seconds",
        type=float,
        default=30,
        help="HTTP request timeout in seconds.",
    )

    args = parser.parse_args()
    config_values = {}
    if args.api_key:
        config_values["api_key"] = args.api_key
    if args.domain:
        config_values["domain"] = args.domain
    if args.protocol:
        config_values["protocol"] = args.protocol
    if args.request_timeout_seconds is not None:
        config_values["request_timeout"] = timedelta(
            seconds=args.request_timeout_seconds
        )
    connection_config = ConnectionConfig(**config_values) if config_values else None
    mcp = create_server(connection_config=connection_config)

    if args.transport == "streamable-http":
        mcp.run(
            transport="streamable-http"
        )
        return

    if args.transport == "stdio":
        mcp.run(transport="stdio")
        return

    mcp.run()


if __name__ == "__main__":
    main()


================================================
FILE: sdks/mcp/sandbox/python/src/opensandbox_mcp/py.typed
================================================


================================================
FILE: sdks/mcp/sandbox/python/src/opensandbox_mcp/server.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import asyncio
from dataclasses import dataclass, field
from datetime import timedelta

from mcp.server.fastmcp import Context, FastMCP
from mcp.server.session import ServerSession
from opensandbox import Sandbox, SandboxManager
from opensandbox.config import ConnectionConfig
from opensandbox.models.execd import Execution, RunCommandOpts
from opensandbox.models.filesystem import (
    ContentReplaceEntry,
    EntryInfo,
    MoveEntry,
    SearchEntry,
    WriteEntry,
)
from opensandbox.models.sandboxes import (
    NetworkPolicy,
    PagedSandboxInfos,
    SandboxEndpoint,
    SandboxFilter,
    SandboxImageAuth,
    SandboxImageSpec,
    SandboxInfo,
    SandboxMetrics,
    SandboxRenewResponse,
)
from pydantic import BaseModel, Field


@dataclass
class ServerState:
    sandboxes: dict[str, Sandbox] = field(default_factory=dict)
    connection_config: ConnectionConfig = field(default_factory=ConnectionConfig)
    lock: asyncio.Lock = field(default_factory=asyncio.Lock)

    async def add(self, sandbox: Sandbox) -> None:
        async with self.lock:
            self.sandboxes[sandbox.id] = sandbox

    async def get(self, sandbox_id: str) -> Sandbox | None:
        async with self.lock:
            return self.sandboxes.get(sandbox_id)

    async def remove(self, sandbox_id: str) -> Sandbox | None:
        async with self.lock:
            return self.sandboxes.pop(sandbox_id, None)


class StatusResponse(BaseModel):
    status: str = Field(description="Operation status string.")

class DirectoryEntryInput(BaseModel):
    path: str = Field(description="Directory path.")
    mode: int = Field(default=755, description="Unix permissions for the directory.")
    owner: str | None = Field(default=None, description="Owner username.")
    group: str | None = Field(default=None, description="Group name.")

class SandboxInfoResponse(BaseModel):
    sandbox_id: str = Field(description="Sandbox identifier.")
    info: SandboxInfo = Field(description="Sandbox info payload.")

class SandboxHealthResponse(BaseModel):
    sandbox_id: str = Field(description="Sandbox identifier.")
    healthy: bool = Field(description="Sandbox health status.")

class FileReadResponse(BaseModel):
    path: str = Field(description="File path.")
    content: str = Field(description="File content.")


def register_tools(
    mcp: FastMCP,
    *,
    prefix: str = "",
    state: ServerState | None = None,
    connection_config: ConnectionConfig | None = None,
) -> ServerState:
    """Register sandbox tools on a FastMCP instance."""
    config = (connection_config or ConnectionConfig()).with_transport_if_missing()
    state = state or ServerState(connection_config=config)
    name_prefix = f"{prefix}_" if prefix else ""

    def tool():
        def decorator(func):
            if name_prefix:
                func.__name__ = f"{name_prefix}{func.__name__}"
            return mcp.tool()(func)

        return decorator

    async def _get_or_connect_sandbox(
        sandbox_id: str,
        *,
        connect_if_missing: bool,
    ) -> Sandbox:
        sandbox = await state.get(sandbox_id)
        if sandbox is not None:
            return sandbox
        if not connect_if_missing:
            raise ValueError(
                "Sandbox not found in local registry. Call sandbox_connect or "
                "set connect_if_missing=True with connection parameters."
            )
        sandbox = await Sandbox.connect(
            sandbox_id, connection_config=state.connection_config
        )
        await state.add(sandbox)
        return sandbox

    @tool()
    async def sandbox_create(
        image: str,
        ctx: Context[ServerSession, None] | None = None,
        *,
        auth_username: str | None = None,
        auth_password: str | None = None,
        timeout_seconds: float = 600,
        ready_timeout_seconds: float = 30,
        health_check_polling_interval_ms: int = 200,
        skip_health_check: bool = False,
        env: dict[str, str] | None = None,
        metadata: dict[str, str] | None = None,
        resource: dict[str, str] | None = None,
        network_policy: NetworkPolicy | None = None,
        extensions: dict[str, str] | None = None,
        entrypoint: list[str] | None = None,
    ) -> SandboxInfoResponse:
        """Create a sandbox and store it in the MCP server session.

        This allocates a new sandbox instance using the OpenSandbox API and
        tracks it locally so subsequent tool calls can reuse it.

        Parameters:
            image: Container image reference (e.g., "python:3.11").
            ctx: MCP context for progress reporting (optional).
            auth_username: Registry username for private images.
            auth_password: Registry password/token for private images.
            timeout_seconds: Sandbox lifetime in seconds (absolute TTL).
            ready_timeout_seconds: Max time to wait for readiness checks.
            health_check_polling_interval_ms: Interval between health checks in ms.
            skip_health_check: If True, return before readiness checks complete.
            env: Environment variables for the sandbox.
            metadata: Custom metadata for the sandbox (string map).
            resource: Resource limits (cpu/memory/etc.) as string map.
            network_policy: Optional egress network policy (NetworkPolicy model).
                Example: NetworkPolicy(
                    default_action="deny",
                    egress=[{"action": "allow", "target": "pypi.org"}],
                )
            extensions: Opaque extension parameters passed through to the server.
            entrypoint: Entrypoint command list.

        Returns:
            A dict with:
                sandbox_id: The new sandbox identifier.
                info: Sandbox info payload from the SDK.

        Raises:
            ValueError: If auth_username/auth_password are incomplete.
            Exception: If sandbox creation fails.

        Example:
            result = await sandbox_create(
                image="python:3.11",
                env={"PYTHONPATH": "/app"},
                resource={"cpu": "1", "memory": "2Gi"},
            )
        """
        if ctx:
            await ctx.report_progress(progress=0.1, total=1.0, message="Validating input")
        image_auth = None
        if auth_username or auth_password:
            if not auth_username or not auth_password:
                raise ValueError("auth_username and auth_password must be provided together")
            image_auth = SandboxImageAuth(
                username=auth_username,
                password=auth_password,
            )
        image_spec = SandboxImageSpec(image=image, auth=image_auth)
        if ctx:
            await ctx.report_progress(
                progress=0.3, total=1.0, message="Creating sandbox"
            )
        sandbox = await Sandbox.create(
            image_spec,
            timeout=timedelta(seconds=timeout_seconds),
            ready_timeout=timedelta(seconds=ready_timeout_seconds),
            env=env,
            metadata=metadata,
            resource=resource,
            network_policy=network_policy,
            extensions=extensions,
            entrypoint=entrypoint,
            health_check_polling_interval=timedelta(
                milliseconds=health_check_polling_interval_ms
            ),
            skip_health_check=skip_health_check,
            connection_config=state.connection_config,
        )
        await state.add(sandbox)
        if ctx:
            await ctx.report_progress(
                progress=0.8, total=1.0, message="Fetching sandbox info"
            )
        info = await sandbox.get_info()
        if ctx:
            await ctx.report_progress(progress=1.0, total=1.0, message="Done")
        return SandboxInfoResponse(sandbox_id=sandbox.id, info=info)

    @tool()
    async def sandbox_connect(
        sandbox_id: str,
        *,
        connect_timeout_seconds: float = 30,
        health_check_polling_interval_ms: int = 200,
        skip_health_check: bool = False,
    ) -> SandboxInfoResponse:
        """Connect to an existing sandbox and store it locally.

        Use this when a sandbox already exists and you want to use it in this
        MCP server session without creating a new one.

        Parameters:
            sandbox_id: Existing sandbox identifier.
            connect_timeout_seconds: Max time to wait for readiness.
            health_check_polling_interval_ms: Interval between health checks in ms.
            skip_health_check: If True, return before readiness checks complete.

        Returns:
            A dict with:
                sandbox_id: The sandbox identifier.
                info: Sandbox info payload from the SDK.

        Example:
            result = await sandbox_connect(sandbox_id="sbx_123")
        """
        sandbox = await Sandbox.connect(
            sandbox_id,
            connection_config=state.connection_config,
            connect_timeout=timedelta(seconds=connect_timeout_seconds),
            health_check_polling_interval=timedelta(
                milliseconds=health_check_polling_interval_ms
            ),
            skip_health_check=skip_health_check,
        )
        await state.add(sandbox)
        info = await sandbox.get_info()
        return SandboxInfoResponse(sandbox_id=sandbox.id, info=info)

    @tool()
    async def sandbox_kill(
        sandbox_id: str,
    ) -> StatusResponse:
        """Terminate a sandbox by ID and remove it from local registry.

        Parameters:
            sandbox_id: Target sandbox identifier.

        Returns:
            {"status": "killed"} when successful.
        """
        sandbox = await state.remove(sandbox_id)
        if sandbox is None:
            manager = await SandboxManager.create(
                connection_config=state.connection_config
            )
            try:
                await manager.kill_sandbox(sandbox_id)
            finally:
                await manager.close()
        else:
            try:
                await sandbox.kill()
            finally:
                await sandbox.close()
        return StatusResponse(status="killed")

    @tool()
    async def sandbox_get_info(
        sandbox_id: str,
    ) -> SandboxInfo:
        """Fetch sandbox info by ID.

        Parameters:
            sandbox_id: Target sandbox identifier.

        Returns:
            Sandbox info dict from the SDK.
        """
        sandbox = await state.get(sandbox_id)
        if sandbox is not None:
            return await sandbox.get_info()
        manager = await SandboxManager.create(
            connection_config=state.connection_config
        )
        try:
            info = await manager.get_sandbox_info(sandbox_id)
        finally:
            await manager.close()
        return info

    @tool()
    async def sandbox_list(
        ctx: Context[ServerSession, None] | None = None,
        *,
        filter: SandboxFilter | None = None,
    ) -> PagedSandboxInfos:
        """List sandboxes matching filter criteria.

        Parameters:
            ctx: MCP context for progress reporting (optional).
            filter: SandboxFilter object (states, metadata, page, page_size).

        Returns:
            Paginated sandbox list.
        """
        if ctx:
            await ctx.report_progress(progress=0.1, total=1.0, message="Listing sandboxes")
        filter = filter or SandboxFilter()
        manager = await SandboxManager.create(
            connection_config=state.connection_config
        )
        try:
            result = await manager.list_sandbox_infos(filter)
        finally:
            await manager.close()
        if ctx:
            await ctx.report_progress(progress=1.0, total=1.0, message="Done")
        return result

    @tool()
    async def sandbox_renew(
        sandbox_id: str,
        *,
        timeout_seconds: float,
    ) -> SandboxRenewResponse:
        """Renew sandbox expiration time.

        Parameters:
            sandbox_id: Target sandbox identifier.
            timeout_seconds: Additional lifetime in seconds.

        Returns:
            Renew response dict including new expiration time.
        """
        sandbox = await state.get(sandbox_id)
        if sandbox is None:
            manager = await SandboxManager.create(
                connection_config=state.connection_config
            )
            try:
                response = await manager.renew_sandbox(
                    sandbox_id, timedelta(seconds=timeout_seconds)
                )
            finally:
                await manager.close()
        else:
            response = await sandbox.renew(timedelta(seconds=timeout_seconds))
        return response

    @tool()
    async def sandbox_get_metrics(
        sandbox_id: str,
        *,
        connect_if_missing: bool = False,
    ) -> SandboxMetrics:
        """Get resource metrics for a sandbox.

        Parameters:
            sandbox_id: Target sandbox identifier.
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            Metrics dict.
        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        metrics = await sandbox.get_metrics()
        return metrics

    @tool()
    async def sandbox_healthcheck(
        sandbox_id: str,
        *,
        connect_if_missing: bool = False,
    ) -> SandboxHealthResponse:
        """Check if a sandbox is healthy.

        Parameters:
            sandbox_id: Target sandbox identifier.
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            {"sandbox_id": "...", "healthy": true|false}.
        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        healthy = await sandbox.is_healthy()
        return SandboxHealthResponse(sandbox_id=sandbox_id, healthy=healthy)

    @tool()
    async def command_run(
        sandbox_id: str,
        command: str,
        *,
        background: bool = False,
        working_directory: str | None = None,
        connect_if_missing: bool = False,
    ) -> Execution:
        """Run a command inside a sandbox.
        Parameters:
            sandbox_id: Target sandbox identifier.
            command: Shell command to execute (supports pipes/redirects).
            background: If True, run asynchronously and return immediately.
            working_directory: Working directory for the command.
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            Execution result dict with id, exit_code, logs, and duration.

        Example:
            result = await command_run("sbx_123", "ls -la", working_directory="/")
        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        opts = RunCommandOpts(
            background=background,
            working_directory=working_directory,
        )
        execution = await sandbox.commands.run(command, opts=opts)
        return execution

    @tool()
    async def command_interrupt(
        sandbox_id: str,
        execution_id: str,
        *,
        connect_if_missing: bool = False,
    ) -> StatusResponse:
        """Interrupt a running command execution.

        Parameters:
            sandbox_id: Target sandbox identifier.
            execution_id: Execution identifier to interrupt.
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            {"status": "interrupted"} when successful.
        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        await sandbox.commands.interrupt(execution_id)
        return StatusResponse(status="interrupted")

    @tool()
    async def file_read(
        sandbox_id: str,
        path: str,
        *,
        encoding: str = "utf-8",
        range_header: str | None = None,
        connect_if_missing: bool = False,
    ) -> FileReadResponse:
        """Read a text file from the sandbox.

        Parameters:
            sandbox_id: Target sandbox identifier.
            path: File path to read.
            encoding: Text encoding.
            range_header: Optional byte range header (e.g., "bytes=0-1023").
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            {"path": "...", "content": "..."}.

        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        content = await sandbox.files.read_file(
            path, encoding=encoding, range_header=range_header
        )
        return FileReadResponse(path=path, content=content)

    @tool()
    async def file_write(
        sandbox_id: str,
        path: str,
        content: str,
        *,
        encoding: str = "utf-8",
        mode: int = 755,
        owner: str | None = None,
        group: str | None = None,
        connect_if_missing: bool = False,
    ) -> StatusResponse:
        """Write a text file inside the sandbox.

        Parameters:
            sandbox_id: Target sandbox identifier.
            path: Destination file path.
            content: File content.
            encoding: Text encoding.
            mode: Unix file permissions.
            owner: Owner username.
            group: Group name.
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            {"status": "written"} when successful.
        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        await sandbox.files.write_file(
            path,
            content,
            encoding=encoding,
            mode=mode,
            owner=owner,
            group=group,
        )
        return StatusResponse(status="written")

    @tool()
    async def file_delete(
        sandbox_id: str,
        paths: list[str],
        *,
        connect_if_missing: bool = False,
    ) -> StatusResponse:
        """Delete files inside the sandbox.

        Parameters:
            sandbox_id: Target sandbox identifier.
            paths: File paths to delete.
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            {"status": "deleted"} when successful.
        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        await sandbox.files.delete_files(paths)
        return StatusResponse(status="deleted")

    @tool()
    async def file_search(
        sandbox_id: str,
        path: str,
        pattern: str,
        *,
        connect_if_missing: bool = False,
    ) -> list[EntryInfo]:
        """Search for files matching a pattern.

        Parameters:
            sandbox_id: Target sandbox identifier.
            path: Base directory to search.
            pattern: Glob pattern (e.g., "*.py").
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            List of entry info objects.
        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        results = await sandbox.files.search(SearchEntry(path=path, pattern=pattern))
        return results

    @tool()
    async def file_create_directories(
        sandbox_id: str,
        entries: list[DirectoryEntryInput],
        *,
        connect_if_missing: bool = False,
    ) -> StatusResponse:
        """Create directories inside the sandbox.

        Parameters:
            sandbox_id: Target sandbox identifier.
            entries: List of directory entries (path, mode, owner, group).
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            {"status": "created"} when successful.
        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        write_entries = [
            WriteEntry(**entry.model_dump(exclude_none=True)) for entry in entries
        ]
        await sandbox.files.create_directories(write_entries)
        return StatusResponse(status="created")

    @tool()
    async def file_delete_directories(
        sandbox_id: str,
        paths: list[str],
        *,
        connect_if_missing: bool = False,
    ) -> StatusResponse:
        """Delete directories inside the sandbox.

        Parameters:
            sandbox_id: Target sandbox identifier.
            paths: Directory paths to delete.
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            {"status": "deleted"} when successful.
        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        await sandbox.files.delete_directories(paths)
        return StatusResponse(status="deleted")

    @tool()
    async def file_move(
        sandbox_id: str,
        entries: list[MoveEntry],
        *,
        connect_if_missing: bool = False,
    ) -> StatusResponse:
        """Move or rename files/directories inside the sandbox.

        Parameters:
            sandbox_id: Target sandbox identifier.
            entries: List of move entries (source, destination).
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            {"status": "moved"} when successful.
        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        await sandbox.files.move_files(entries)
        return StatusResponse(status="moved")

    @tool()
    async def file_replace_contents(
        sandbox_id: str,
        entries: list[ContentReplaceEntry],
        *,
        connect_if_missing: bool = False,
    ) -> StatusResponse:
        """Replace content inside files.

        Parameters:
            sandbox_id: Target sandbox identifier.
            entries: List of replace entries (path, old_content, new_content).
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            {"status": "updated"} when successful.
        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        replace_entries = [
            ContentReplaceEntry(**entry.model_dump(exclude_none=True))
            for entry in entries
        ]
        await sandbox.files.replace_contents(replace_entries)
        return StatusResponse(status="updated")

    @tool()
    async def sandbox_get_endpoint(
        sandbox_id: str,
        port: int,
        *,
        connect_if_missing: bool = False,
    ) -> SandboxEndpoint:
        """Get a sandbox network endpoint for a specific port.

        Parameters:
            sandbox_id: Target sandbox identifier.
            port: Port number inside the sandbox.
            connect_if_missing: Connect if sandbox not in local registry.

        Returns:
            Endpoint info dict.
        """
        sandbox = await _get_or_connect_sandbox(
            sandbox_id,
            connect_if_missing=connect_if_missing,
        )
        endpoint = await sandbox.get_endpoint(port)
        return endpoint

    return state


def create_server(connection_config: ConnectionConfig | None = None) -> FastMCP:
    """Create the MCP server instance for OpenSandbox."""
    mcp = FastMCP(
        "OpenSandbox Sandbox",
        instructions=(
            "Use these tools to create and manage isolated sandboxes. "
            "Always keep track of the sandbox_id returned by sandbox_create/connect. "
            "Use command_run for execution, file_read/file_write for file IO, and "
            "sandbox_kill to terminate remote sandboxes. Use sandbox_get_endpoint to "
            "expose sandbox ports; for large files, prefer range reads."
        ),
    )
    register_tools(mcp, connection_config=connection_config)
    return mcp


================================================
FILE: sdks/package.json
================================================
{
  "name": "opensandbox-sdks",
  "private": true,
  "packageManager": "pnpm@9.15.0",
  "scripts": {
    "build:js": "pnpm -r --filter @alibaba-group/opensandbox-code-interpreter... --sort run build",
    "lint:js": "pnpm -r --filter @alibaba-group/opensandbox-code-interpreter... run lint",
    "clean:js": "pnpm -r --filter @alibaba-group/opensandbox-code-interpreter... --sort run clean",
    "publish:js": "pnpm -r --filter @alibaba-group/opensandbox-code-interpreter... publish --access public --no-git-checks"
  },
  "devDependencies": {
    "@eslint/js": "^9.39.2",
    "eslint": "^9.39.2",
    "globals": "^17.0.0",
    "typescript": "^5.7.2",
    "typescript-eslint": "^8.52.0"
  }
}


================================================
FILE: sdks/pnpm-workspace.yaml
================================================
packages:
  - "sandbox/javascript"
  - "code-interpreter/javascript"


================================================
FILE: sdks/sandbox/csharp/.editorconfig
================================================
root = true

[*.cs]
charset = utf-8
end_of_line = lf
insert_final_newline = true
indent_style = space
indent_size = 4


================================================
FILE: sdks/sandbox/csharp/Directory.Build.props
================================================
<Project>
  <Import Project="$(MSBuildThisFileDirectory)..\..\Directory.Build.props" Condition="Exists('$(MSBuildThisFileDirectory)..\..\Directory.Build.props')" />

  <PropertyGroup>
    <EnableNETAnalyzers>true</EnableNETAnalyzers>
    <AnalysisLevel>latest</AnalysisLevel>
    <EnforceCodeStyleInBuild>true</EnforceCodeStyleInBuild>
  </PropertyGroup>
</Project>


================================================
FILE: sdks/sandbox/csharp/OpenSandbox.sln
================================================

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OpenSandbox", "src\OpenSandbox\OpenSandbox.csproj", "{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OpenSandbox.Tests", "tests\OpenSandbox.Tests\OpenSandbox.Tests.csproj", "{B2C3D4E5-F6A7-8901-BCDE-F12345678901}"
EndProject
Global
	GlobalSection(SolutionConfigurationPlatforms) = preSolution
		Debug|Any CPU = Debug|Any CPU
		Release|Any CPU = Release|Any CPU
	EndGlobalSection
	GlobalSection(ProjectConfigurationPlatforms) = postSolution
		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|Any CPU.Build.0 = Release|Any CPU
		{B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Release|Any CPU.Build.0 = Release|Any CPU
	EndGlobalSection
EndGlobal


================================================
FILE: sdks/sandbox/csharp/OpenSandbox.sln.DotSettings.user
================================================
﻿<wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
	<s:String x:Key="/Default/Environment/UnitTesting/UnitTestSessionStore/Sessions/=9dbda61d_002D52e7_002D49a4_002D99fc_002D81c70a977921/@EntryIndexedValue">&lt;SessionState ContinuousTestingMode="0" IsActive="True" Name="ConnectionConfigTests" xmlns="urn:schemas-jetbrains-com:jetbrains-ut-session"&gt;&#xD;
  &lt;TestAncestor&gt;&#xD;
    &lt;TestId&gt;xUnit::B2C3D4E5-F6A7-8901-BCDE-F12345678901::net8.0::OpenSandbox.Tests.ConnectionConfigTests&lt;/TestId&gt;&#xD;
    &lt;TestId&gt;xUnit::B2C3D4E5-F6A7-8901-BCDE-F12345678901::net8.0::OpenSandbox.Tests.SseParserTests&lt;/TestId&gt;&#xD;
  &lt;/TestAncestor&gt;&#xD;
&lt;/SessionState&gt;</s:String></wpf:ResourceDictionary>

================================================
FILE: sdks/sandbox/csharp/README.md
================================================
# OpenSandbox SDK for C#

English | [中文](README_zh.md)

A C# SDK for low-level interaction with OpenSandbox. It provides the ability to create, manage, and interact with secure sandbox environments, including executing shell commands, managing files, and reading resource metrics.

## Installation

### NuGet

```bash
dotnet add package Alibaba.OpenSandbox
```

### Package Manager

```powershell
Install-Package Alibaba.OpenSandbox
```

## Quick Start

The following example shows how to create a sandbox and execute a shell command.

> **Note**: Before running this example, ensure the OpenSandbox service is running. See the root [README.md](../../../README.md) for startup instructions.

```csharp
using OpenSandbox;
using OpenSandbox.Config;
using OpenSandbox.Core;

var config = new ConnectionConfig(new ConnectionConfigOptions
{
    Domain = "api.opensandbox.io",
    ApiKey = "your-api-key",
    // Protocol = ConnectionProtocol.Https,
    // RequestTimeoutSeconds = 60,
});

try
{
    await using var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
    {
        ConnectionConfig = config,
        Image = "ubuntu",
        TimeoutSeconds = 10 * 60,
    });

    var execution = await sandbox.Commands.RunAsync("echo 'Hello Sandbox!'");
    Console.WriteLine(execution.Logs.Stdout.FirstOrDefault()?.Text);

    // Optional but recommended: terminate the remote instance when you are done.
    await sandbox.KillAsync();
}
catch (SandboxException ex)
{
    Console.Error.WriteLine($"Sandbox Error: [{ex.Error.Code}] {ex.Error.Message}");
    Console.Error.WriteLine($"Request ID: {ex.RequestId}");
}
```

## Usage Examples

### 1. Lifecycle Management

Manage the sandbox lifecycle, including renewal, pausing, and resuming.

```csharp
var info = await sandbox.GetInfoAsync();
Console.WriteLine($"State: {info.Status.State}");
Console.WriteLine($"Created: {info.CreatedAt}");
Console.WriteLine($"Expires: {info.ExpiresAt}"); // null when manual cleanup mode is used

await sandbox.PauseAsync();

// Resume returns a fresh, connected Sandbox instance.
var resumed = await sandbox.ResumeAsync();

// Renew: expiresAt = now + timeoutSeconds
await resumed.RenewAsync(30 * 60);
```

Create a non-expiring sandbox by setting `ManualCleanup = true`:

```csharp
var manual = await Sandbox.CreateAsync(new SandboxCreateOptions
{
    ConnectionConfig = config,
    Image = "ubuntu",
    ManualCleanup = true,
});
```

Note: unlike the Python, JavaScript, and Kotlin SDKs, the C# SDK uses an explicit
`ManualCleanup` flag instead of `TimeoutSeconds = null`. This is intentional:
`int?` in the current options model cannot reliably distinguish "unset, use the
default TTL" from "explicitly request manual cleanup" without making the default
creation path ambiguous.

### Connect to an Existing Sandbox

Use `ConnectAsync` when you already have a sandbox ID and need a new SDK instance bound to it.

```csharp
var connected = await Sandbox.ConnectAsync(new SandboxConnectOptions
{
    SandboxId = "existing-sandbox-id",
    ConnectionConfig = config
});
```

### 2. Custom Health Check

Define custom logic to determine whether the sandbox is ready/healthy.

```csharp
var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
{
    ConnectionConfig = config,
    Image = "nginx:latest",
    HealthCheck = async (sbx) =>
    {
        // Example: consider the sandbox healthy when port 80 endpoint becomes available
        var ep = await sbx.GetEndpointAsync(80);
        return !string.IsNullOrEmpty(ep.EndpointAddress);
    },
});
```

### 3. Command Execution & Streaming

Execute commands and handle output streams in real-time.

```csharp
using OpenSandbox.Models;

var handlers = new ExecutionHandlers
{
    OnStdout = msg => { Console.WriteLine($"STDOUT: {msg.Text}"); return Task.CompletedTask; },
    OnStderr = msg => { Console.Error.WriteLine($"STDERR: {msg.Text}"); return Task.CompletedTask; },
    OnExecutionComplete = c => { Console.WriteLine($"Finished in {c.ExecutionTimeMs}ms"); return Task.CompletedTask; },
};

await sandbox.Commands.RunAsync(
    "for i in 1 2 3; do echo \"Count $i\"; sleep 0.2; done",
    handlers: handlers
);
```

For background commands, you can poll status and incremental logs:

```csharp
var execution = await sandbox.Commands.RunAsync(
    "python /app/server.py",
    options: new RunCommandOptions
    {
        Background = true,
        TimeoutSeconds = 120,
    });

var status = await sandbox.Commands.GetCommandStatusAsync(execution.Id!);
var logs = await sandbox.Commands.GetBackgroundCommandLogsAsync(execution.Id!, cursor: 0);
Console.WriteLine($"running={status.Running}, cursor={logs.Cursor}");
```

### 4. Comprehensive File Operations

Manage files and directories, including read, write, list/search, and delete.

```csharp
await sandbox.Files.CreateDirectoriesAsync(new[]
{
    new CreateDirectoryEntry { Path = "/tmp/demo", Mode = 755 }
});

await sandbox.Files.WriteFilesAsync(new[]
{
    new WriteEntry { Path = "/tmp/demo/hello.txt", Data = "Hello World", Mode = 644 }
});

var content = await sandbox.Files.ReadFileAsync("/tmp/demo/hello.txt");
Console.WriteLine($"Content: {content}");

var files = await sandbox.Files.SearchAsync(new SearchEntry { Path = "/tmp/demo", Pattern = "*.txt" });
foreach (var file in files)
{
    Console.WriteLine(file.Path);
}

await sandbox.Files.DeleteDirectoriesAsync(new[] { "/tmp/demo" });

// Delete one or more files directly.
await sandbox.Files.DeleteFilesAsync(new[] { "/tmp/demo/hello.txt" });
```

### 5. Endpoints

`GetEndpointAsync()` returns an endpoint **without a scheme** (for example `"localhost:44772"`). Use `GetEndpointUrlAsync()` if you want a ready-to-use absolute URL.

```csharp
var endpoint = await sandbox.GetEndpointAsync(44772);
Console.WriteLine(endpoint.EndpointAddress);

var url = await sandbox.GetEndpointUrlAsync(44772);
Console.WriteLine(url); // e.g., "http://localhost:44772"
```

### 6. Sandbox Management (Admin)

Use `SandboxManager` for administrative tasks and finding existing sandboxes.

```csharp
await using var manager = SandboxManager.Create(new SandboxManagerOptions
{
    ConnectionConfig = config
});

var list = await manager.ListSandboxInfosAsync(new SandboxFilter
{
    States = new[] { SandboxStates.Running },
    PageSize = 10
});

foreach (var s in list.Items)
{
    Console.WriteLine(s.Id);
}
```

## Configuration

### 1. Connection Configuration

The `ConnectionConfig` class manages API server connection settings.

| Parameter | Description | Default | Environment Variable |
| --- | --- | --- | --- |
| `ApiKey` | API key for authentication | Optional | `OPEN_SANDBOX_API_KEY` |
| `Domain` | Sandbox service domain (`host[:port]`) | `localhost:8080` | `OPEN_SANDBOX_DOMAIN` |
| `Protocol` | HTTP protocol (`Http`/`Https`) | `Http` | - |
| `RequestTimeoutSeconds` | Request timeout applied to SDK HTTP calls | `30` | - |
| `UseServerProxy` | Request server-proxied sandbox endpoint URLs | `false` | - |
| `Headers` | Extra headers applied to every request | `{}` | - |

```csharp
using OpenSandbox.Config;

// 1. Basic configuration
var config = new ConnectionConfig(new ConnectionConfigOptions
{
    Domain = "api.opensandbox.io",
    ApiKey = "your-key",
    RequestTimeoutSeconds = 60,
    // UseServerProxy = true, // Useful when the client cannot access sandbox endpoint directly
});

// 2. Advanced: custom headers
var config2 = new ConnectionConfig(new ConnectionConfigOptions
{
    Domain = "api.opensandbox.io",
    ApiKey = "your-key",
    Headers = new Dictionary<string, string>
    {
        ["X-Custom-Header"] = "value"
    },
});
```

### 2. Diagnostics and Logging

The SDK uses `Microsoft.Extensions.Logging` abstractions.

```csharp
using Microsoft.Extensions.Logging;
using OpenSandbox.Config;

using var loggerFactory = LoggerFactory.Create(builder =>
{
    builder.SetMinimumLevel(LogLevel.Debug);
    builder.AddConsole();
});

var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
{
    Image = "python:3.11",
    ConnectionConfig = new ConnectionConfig(),
    Diagnostics = new SdkDiagnosticsOptions
    {
        LoggerFactory = loggerFactory
    }
});
```

### 3. Sandbox Creation Configuration

`Sandbox.CreateAsync()` allows configuring the sandbox environment.

| Parameter | Description | Default |
| --- | --- | --- |
| `Image` | Docker image to use | Required |
| `TimeoutSeconds` | Automatic termination timeout (server-side TTL) | 10 minutes |
| `Entrypoint` | Container entrypoint command | `["tail","-f","/dev/null"]` |
| `Resource` | CPU and memory limits (string map) | `{"cpu":"1","memory":"2Gi"}` |
| `Env` | Environment variables | `{}` |
| `Metadata` | Custom metadata tags | `{}` |
| `NetworkPolicy` | Optional outbound network policy (egress) | - |
| `Volumes` | Optional storage mounts (`Host` / `PVC`, supports `ReadOnly` and `SubPath`) | - |
| `Extensions` | Extra server-defined fields | `{}` |
| `SkipHealthCheck` | Skip readiness checks (`Running` + health check) | `false` |
| `HealthCheck` | Custom readiness check | - |
| `ReadyTimeoutSeconds` | Max time to wait for readiness | 30 seconds |
| `HealthCheckPollingInterval` | Poll interval while waiting (milliseconds) | 200 ms |

Note: metadata keys under `opensandbox.io/` are reserved for system-managed
labels and will be rejected by the server.

```csharp
var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
{
    ConnectionConfig = config,
    Image = "python:3.11",
    NetworkPolicy = new NetworkPolicy
    {
        DefaultAction = NetworkRuleAction.Deny,
        Egress = new List<NetworkRule>
        {
            new() { Action = NetworkRuleAction.Allow, Target = "pypi.org" }
        }
    },
    Volumes = new[]
    {
        new Volume
        {
            Name = "workspace",
            Host = new Host { Path = "/tmp/opensandbox-e2e/host-volume-test" },
            MountPath = "/workspace",
            ReadOnly = false
        }
    }
});
```

### 4. Runtime Egress Policy Updates

Runtime egress reads and patches go directly to the sandbox egress sidecar.
The SDK first resolves the sandbox endpoint on port `18080`, then calls the sidecar `/policy` API.

Patch uses merge semantics:
- Incoming rules take priority over existing rules with the same `Target`.
- Existing rules for other targets remain unchanged.
- Within a single patch payload, the first rule for a `Target` wins.
- The current `DefaultAction` is preserved.

```csharp
var policy = await sandbox.GetEgressPolicyAsync();

await sandbox.PatchEgressRulesAsync(new[]
{
    new NetworkRule { Action = NetworkRuleAction.Allow, Target = "www.github.com" },
    new NetworkRule { Action = NetworkRuleAction.Deny, Target = "pypi.org" }
});
```

### 5. Timeout and Retry Behavior

- `ConnectionConfig.RequestTimeoutSeconds` controls timeout for SDK HTTP calls.
- `RunCommandOptions.TimeoutSeconds` controls command execution timeout for command runs.
- `SandboxCreateOptions.TimeoutSeconds` controls sandbox server-side TTL.
- `ReadyTimeoutSeconds` controls how long `CreateAsync` / `ConnectAsync` waits for readiness.
- The SDK does not automatically retry failed API requests; implement retries in caller code where appropriate.

### 6. Resource Cleanup

Both `Sandbox` and `SandboxManager` implement `IAsyncDisposable`. Use `await using` or call `DisposeAsync()` when done.

```csharp
await using var sandbox = await Sandbox.CreateAsync(options);
// ... use sandbox ...
// Automatically disposed when leaving scope
```

## Error Handling

The SDK throws `SandboxException` (and derived exceptions such as `SandboxApiException`,
`SandboxReadyTimeoutException`, and `InvalidArgumentException`) when operations fail.

```csharp
try
{
    var execution = await sandbox.Commands.RunAsync("echo 'Hello Sandbox!'");
    Console.WriteLine(execution.Logs.Stdout.FirstOrDefault()?.Text);
}
catch (SandboxReadyTimeoutException)
{
    Console.Error.WriteLine("Sandbox did not become ready before the configured timeout.");
}
catch (SandboxApiException ex)
{
    Console.Error.WriteLine($"API Error: status={ex.StatusCode}, requestId={ex.RequestId}, message={ex.Message}");
}
catch (SandboxException ex)
{
    Console.Error.WriteLine($"Sandbox Error: [{ex.Error.Code}] {ex.Error.Message}");
}
```

## Supported Frameworks

- .NET Standard 2.0 (for maximum compatibility with .NET Framework 4.6.1+, .NET Core 2.0+, Mono, Xamarin, etc.)
- .NET Standard 2.1
- .NET 6.0 (LTS)
- .NET 7.0
- .NET 8.0 (LTS)
- .NET 9.0
- .NET 10.0

## License

Apache License 2.0


================================================
FILE: sdks/sandbox/csharp/README_zh.md
================================================
# OpenSandbox SDK for C#

[English](README.md) | 中文

一个用于与 OpenSandbox 进行低级交互的 C# SDK。它提供了创建、管理和与安全沙箱环境交互的能力，包括执行 shell 命令、管理文件和读取资源指标。

## 安装

### NuGet

```bash
dotnet add package Alibaba.OpenSandbox
```

### Package Manager

```powershell
Install-Package Alibaba.OpenSandbox
```

## 快速开始

以下示例展示如何创建沙箱并执行 shell 命令。

> **注意**：运行此示例之前，请确保 OpenSandbox 服务正在运行。有关启动说明，请参阅根目录的 [README_zh.md](../../../docs/README_zh.md)。

```csharp
using OpenSandbox;
using OpenSandbox.Config;
using OpenSandbox.Core;

var config = new ConnectionConfig(new ConnectionConfigOptions
{
    Domain = "api.opensandbox.io",
    ApiKey = "your-api-key",
    // Protocol = ConnectionProtocol.Https,
    // RequestTimeoutSeconds = 60,
});

try
{
    await using var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
    {
        ConnectionConfig = config,
        Image = "ubuntu",
        TimeoutSeconds = 10 * 60,
    });

    var execution = await sandbox.Commands.RunAsync("echo 'Hello Sandbox!'");
    Console.WriteLine(execution.Logs.Stdout.FirstOrDefault()?.Text);

    // 可选但推荐：完成后终止远程实例
    await sandbox.KillAsync();
}
catch (SandboxException ex)
{
    Console.Error.WriteLine($"沙箱错误: [{ex.Error.Code}] {ex.Error.Message}");
    Console.Error.WriteLine($"Request ID: {ex.RequestId}");
}
```

## 使用示例

### 1. 生命周期管理

管理沙箱生命周期，包括续期、暂停和恢复。

```csharp
var info = await sandbox.GetInfoAsync();
Console.WriteLine($"状态: {info.Status.State}");
Console.WriteLine($"创建时间: {info.CreatedAt}");
Console.WriteLine($"过期时间: {info.ExpiresAt}"); // 使用手动清理模式时为 null

await sandbox.PauseAsync();

// Resume 返回一个新的、已连接的 Sandbox 实例
var resumed = await sandbox.ResumeAsync();

// 续期: expiresAt = now + timeoutSeconds
await resumed.RenewAsync(30 * 60);
```

通过设置 `ManualCleanup = true` 创建一个不会自动过期的沙箱：

```csharp
var manual = await Sandbox.CreateAsync(new SandboxCreateOptions
{
    ConnectionConfig = config,
    Image = "ubuntu",
    ManualCleanup = true,
});
```

注意：与 Python、JavaScript、Kotlin SDK 不同，C# SDK 使用显式的
`ManualCleanup` 开关，而不是 `TimeoutSeconds = null`。这是有意的设计，
因为在当前的 options 模型里，`int?` 不能稳定地区分“未设置，沿用默认 TTL”
和“显式请求手动清理”。

### 2. 自定义健康检查

定义自定义逻辑来确定沙箱是否就绪/健康。

```csharp
var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
{
    ConnectionConfig = config,
    Image = "nginx:latest",
    HealthCheck = async (sbx) =>
    {
        // 示例：当端口 80 端点可用时认为沙箱健康
        var ep = await sbx.GetEndpointAsync(80);
        return !string.IsNullOrEmpty(ep.EndpointAddress);
    },
});
```

### 3. 命令执行和流式处理

执行命令并实时处理输出流。

```csharp
using OpenSandbox.Models;

var handlers = new ExecutionHandlers
{
    OnStdout = msg => { Console.WriteLine($"STDOUT: {msg.Text}"); return Task.CompletedTask; },
    OnStderr = msg => { Console.Error.WriteLine($"STDERR: {msg.Text}"); return Task.CompletedTask; },
    OnExecutionComplete = c => { Console.WriteLine($"完成，耗时 {c.ExecutionTimeMs}ms"); return Task.CompletedTask; },
};

await sandbox.Commands.RunAsync(
    "for i in 1 2 3; do echo \"Count $i\"; sleep 0.2; done",
    handlers: handlers
);
```

对于后台命令，可以轮询状态和增量日志：

```csharp
var execution = await sandbox.Commands.RunAsync(
    "python /app/server.py",
    options: new RunCommandOptions
    {
        Background = true,
        TimeoutSeconds = 120,
    });

var status = await sandbox.Commands.GetCommandStatusAsync(execution.Id!);
var logs = await sandbox.Commands.GetBackgroundCommandLogsAsync(execution.Id!, cursor: 0);
Console.WriteLine($"running={status.Running}, cursor={logs.Cursor}");
```

### 4. 全面的文件操作

管理文件和目录，包括读取、写入、列出/搜索和删除。

```csharp
await sandbox.Files.CreateDirectoriesAsync(new[]
{
    new CreateDirectoryEntry { Path = "/tmp/demo", Mode = 755 }
});

await sandbox.Files.WriteFilesAsync(new[]
{
    new WriteEntry { Path = "/tmp/demo/hello.txt", Data = "Hello World", Mode = 644 }
});

var content = await sandbox.Files.ReadFileAsync("/tmp/demo/hello.txt");
Console.WriteLine($"内容: {content}");

var files = await sandbox.Files.SearchAsync(new SearchEntry { Path = "/tmp/demo", Pattern = "*.txt" });
foreach (var file in files)
{
    Console.WriteLine(file.Path);
}

await sandbox.Files.DeleteDirectoriesAsync(new[] { "/tmp/demo" });
```

### 5. 端点

`GetEndpointAsync()` 返回**不带协议**的端点（例如 `"localhost:44772"`）。如果需要可直接使用的绝对 URL，请使用 `GetEndpointUrlAsync()`。

```csharp
var endpoint = await sandbox.GetEndpointAsync(44772);
Console.WriteLine(endpoint.EndpointAddress);

var url = await sandbox.GetEndpointUrlAsync(44772);
Console.WriteLine(url); // 例如 "http://localhost:44772"
```

### 6. 沙箱管理（管理员）

使用 `SandboxManager` 进行管理任务和查找现有沙箱。

```csharp
await using var manager = SandboxManager.Create(new SandboxManagerOptions
{
    ConnectionConfig = config
});

var list = await manager.ListSandboxInfosAsync(new SandboxFilter
{
    States = new[] { "Running" },
    PageSize = 10
});

foreach (var s in list.Items)
{
    Console.WriteLine(s.Id);
}
```

## 配置

### 1. 连接配置

`ConnectionConfig` 类管理 API 服务器连接设置。

| 参数 | 描述 | 默认值 | 环境变量 |
| --- | --- | --- | --- |
| `ApiKey` | 用于身份验证的 API 密钥 | 可选 | `OPEN_SANDBOX_API_KEY` |
| `Domain` | 沙箱服务域名 (`host[:port]`) | `localhost:8080` | `OPEN_SANDBOX_DOMAIN` |
| `Protocol` | HTTP 协议 (`Http`/`Https`) | `Http` | - |
| `RequestTimeoutSeconds` | 应用于 SDK HTTP 调用的请求超时 | `30` | - |
| `UseServerProxy` | 是否请求服务端代理的沙箱访问端点 URL | `false` | - |
| `Headers` | 应用于每个请求的额外头部 | `{}` | - |

```csharp
using OpenSandbox.Config;

// 1. 基本配置
var config = new ConnectionConfig(new ConnectionConfigOptions
{
    Domain = "api.opensandbox.io",
    ApiKey = "your-key",
    RequestTimeoutSeconds = 60,
    // UseServerProxy = true, // 当客户端无法直连沙箱 endpoint 时建议开启
});

// 2. 高级：自定义头部
var config2 = new ConnectionConfig(new ConnectionConfigOptions
{
    Domain = "api.opensandbox.io",
    ApiKey = "your-key",
    Headers = new Dictionary<string, string>
    {
        ["X-Custom-Header"] = "value"
    },
});
```

### 2. 诊断与日志

SDK 使用 `Microsoft.Extensions.Logging` 抽象。

```csharp
using Microsoft.Extensions.Logging;
using OpenSandbox.Config;

using var loggerFactory = LoggerFactory.Create(builder =>
{
    builder.SetMinimumLevel(LogLevel.Debug);
    builder.AddConsole();
});

var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
{
    Image = "python:3.11",
    ConnectionConfig = new ConnectionConfig(),
    Diagnostics = new SdkDiagnosticsOptions
    {
        LoggerFactory = loggerFactory
    }
});
```

### 3. 沙箱创建配置

`Sandbox.CreateAsync()` 允许配置沙箱环境。

| 参数 | 描述 | 默认值 |
| --- | --- | --- |
| `Image` | 要使用的 Docker 镜像 | 必需 |
| `TimeoutSeconds` | 自动终止超时（服务器端 TTL） | 10 分钟 |
| `Entrypoint` | 容器入口点命令 | `["tail","-f","/dev/null"]` |
| `Resource` | CPU 和内存限制（字符串映射） | `{"cpu":"1","memory":"2Gi"}` |
| `Env` | 环境变量 | `{}` |
| `Metadata` | 自定义元数据标签 | `{}` |
| `NetworkPolicy` | 可选的出站网络策略（egress） | - |
| `Volumes` | 可选存储挂载（`Host` / `PVC`，支持 `ReadOnly` 与 `SubPath`） | - |
| `Extensions` | 额外的服务器定义字段 | `{}` |
| `SkipHealthCheck` | 跳过就绪检查（`Running` + 健康检查） | `false` |
| `HealthCheck` | 自定义就绪检查 | - |
| `ReadyTimeoutSeconds` | 等待就绪的最大时间 | 30 秒 |
| `HealthCheckPollingInterval` | 等待时的轮询间隔（毫秒） | 200 ms |

注意：`opensandbox.io/` 前缀下的 metadata key 属于系统保留标签，服务端会拒绝用户传入。

```csharp
var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
{
    ConnectionConfig = config,
    Image = "python:3.11",
    NetworkPolicy = new NetworkPolicy
    {
        DefaultAction = NetworkRuleAction.Deny,
        Egress = new List<NetworkRule>
        {
            new() { Action = NetworkRuleAction.Allow, Target = "pypi.org" }
        }
    },
    Volumes = new[]
    {
        new Volume
        {
            Name = "workspace",
            Host = new Host { Path = "/tmp/opensandbox-e2e/host-volume-test" },
            MountPath = "/workspace",
            ReadOnly = false
        }
    }
});
```

### 3. 运行时 Egress 策略更新

运行时的 egress 查询和 patch 会直接访问沙箱内的 egress sidecar。
SDK 会先解析 `18080` 端口对应的 sandbox endpoint，再调用 sidecar 的 `/policy` API。

```csharp
var policy = await sandbox.GetEgressPolicyAsync();

await sandbox.PatchEgressRulesAsync(new[]
{
    new NetworkRule { Action = NetworkRuleAction.Allow, Target = "www.github.com" },
    new NetworkRule { Action = NetworkRuleAction.Deny, Target = "pypi.org" }
});
```

### 4. 资源清理

`Sandbox` 和 `SandboxManager` 都实现了 `IAsyncDisposable`。完成后使用 `await using` 或调用 `DisposeAsync()`。

```csharp
await using var sandbox = await Sandbox.CreateAsync(options);
// ... 使用沙箱 ...
// 离开作用域时自动释放
```

## 支持的框架

- .NET Standard 2.0（最大兼容性，支持 .NET Framework 4.6.1+、.NET Core 2.0+、Mono、Xamarin 等）
- .NET Standard 2.1
- .NET 6.0 (LTS)
- .NET 7.0
- .NET 8.0 (LTS)
- .NET 9.0
- .NET 10.0

## 许可证

Apache License 2.0


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Adapters/CommandsAdapter.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Runtime.CompilerServices;
using System.Text;
using System.Text.Json;
using OpenSandbox.Core;
using OpenSandbox.Internal;
using OpenSandbox.Models;
using OpenSandbox.Services;
using Microsoft.Extensions.Logging;

namespace OpenSandbox.Adapters;

/// <summary>
/// Adapter for the execd commands service.
/// </summary>
internal sealed class CommandsAdapter : IExecdCommands
{
    private readonly HttpClientWrapper _client;
    private readonly HttpClient _sseHttpClient;
    private readonly string _baseUrl;
    private readonly IReadOnlyDictionary<string, string> _headers;
    private readonly ILogger _logger;

    private static readonly JsonSerializerOptions JsonOptions = new()
    {
        PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
        DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
    };

    public CommandsAdapter(
        HttpClientWrapper client,
        HttpClient sseHttpClient,
        string baseUrl,
        IReadOnlyDictionary<string, string> headers,
        ILogger logger)
    {
        _client = client ?? throw new ArgumentNullException(nameof(client));
        _sseHttpClient = sseHttpClient ?? throw new ArgumentNullException(nameof(sseHttpClient));
        _baseUrl = baseUrl?.TrimEnd('/') ?? throw new ArgumentNullException(nameof(baseUrl));
        _headers = headers ?? new Dictionary<string, string>();
        _logger = logger ?? throw new ArgumentNullException(nameof(logger));
    }

    public async IAsyncEnumerable<ServerStreamEvent> RunStreamAsync(
        string command,
        RunCommandOptions? options = null,
        [EnumeratorCancellation] CancellationToken cancellationToken = default)
    {
        if (options?.Gid.HasValue == true && options.Uid.HasValue != true)
        {
            throw new InvalidArgumentException("uid is required when gid is provided");
        }
        if (options?.Uid.HasValue == true && options.Uid.Value < 0)
        {
            throw new InvalidArgumentException("uid must be >= 0");
        }
        if (options?.Gid.HasValue == true && options.Gid.Value < 0)
        {
            throw new InvalidArgumentException("gid must be >= 0");
        }

        var url = $"{_baseUrl}/command";
        _logger.LogDebug("Running command stream (commandLength={CommandLength})", command.Length);
        var requestBody = new RunCommandRequest
        {
            Command = command,
            Cwd = options?.WorkingDirectory,
            Background = options?.Background,
            Timeout = options?.TimeoutSeconds.HasValue == true ? options.TimeoutSeconds.Value * 1000L : null,
            Uid = options?.Uid,
            Gid = options?.Gid,
            Envs = options?.Envs
        };

        var json = JsonSerializer.Serialize(requestBody, JsonOptions);
        using var request = new HttpRequestMessage(HttpMethod.Post, url)
        {
            Content = new StringContent(json, Encoding.UTF8, "application/json")
        };

        request.Headers.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("text/event-stream"));

        foreach (var header in _headers)
        {
            request.Headers.TryAddWithoutValidation(header.Key, header.Value);
        }

        using var response = await _sseHttpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);

        await foreach (var ev in SseParser.ParseJsonEventStreamAsync<ServerStreamEvent>(response, "Run command failed", cancellationToken).ConfigureAwait(false))
        {
            yield return ev;
        }
    }

    public async Task<Execution> RunAsync(
        string command,
        RunCommandOptions? options = null,
        ExecutionHandlers? handlers = null,
        CancellationToken cancellationToken = default)
    {
        _logger.LogDebug("Running command (commandLength={CommandLength})", command.Length);
        var execution = new Execution();
        var dispatcher = new ExecutionEventDispatcher(execution, handlers);

        await foreach (var ev in RunStreamAsync(command, options, cancellationToken).ConfigureAwait(false))
        {
            // Keep legacy behavior: if server sends "init" with empty id, preserve previous id
            if (ev.Type == ServerStreamEventTypes.Init && string.IsNullOrEmpty(ev.Text) && !string.IsNullOrEmpty(execution.Id))
            {
                ev.Text = execution.Id;
            }

            await dispatcher.DispatchAsync(ev).ConfigureAwait(false);
        }

        return execution;
    }

    public async Task InterruptAsync(string sessionId, CancellationToken cancellationToken = default)
    {
        _logger.LogInformation("Interrupting execution: {ExecutionId}", sessionId);
        var queryParams = new Dictionary<string, string?> { ["id"] = sessionId };
        await _client.DeleteAsync("/command", queryParams, cancellationToken).ConfigureAwait(false);
    }

    public Task<CommandStatus> GetCommandStatusAsync(string executionId, CancellationToken cancellationToken = default)
    {
        if (string.IsNullOrWhiteSpace(executionId))
        {
            throw new InvalidArgumentException("executionId cannot be empty");
        }

        _logger.LogDebug("Fetching command status: {ExecutionId}", executionId);
        return _client.GetAsync<CommandStatus>($"/command/status/{Uri.EscapeDataString(executionId)}", cancellationToken: cancellationToken);
    }

    public async Task<CommandLogs> GetBackgroundCommandLogsAsync(
        string executionId,
        long? cursor = null,
        CancellationToken cancellationToken = default)
    {
        if (string.IsNullOrWhiteSpace(executionId))
        {
            throw new InvalidArgumentException("executionId cannot be empty");
        }

        _logger.LogDebug("Fetching command logs: {ExecutionId} (cursor={Cursor})", executionId, cursor);
        var path = $"/command/{Uri.EscapeDataString(executionId)}/logs";
        var query = cursor.HasValue ? $"?cursor={cursor.Value}" : string.Empty;
        var url = $"{_baseUrl}{path}{query}";

        using var request = new HttpRequestMessage(HttpMethod.Get, url);
        using var response = await _client.SendAsync(request, cancellationToken).ConfigureAwait(false);

        var content = await response.Content.ReadAsStringAsync().ConfigureAwait(false);
        if (!response.IsSuccessStatusCode)
        {
            throw CreateApiException(response, content);
        }

        var cursorHeader = response.Headers.TryGetValues("EXECD-COMMANDS-TAIL-CURSOR", out var values)
            ? values.FirstOrDefault()
            : null;
        var parsedCursor = long.TryParse(cursorHeader, out var c) ? c : (long?)null;

        return new CommandLogs
        {
            Content = content,
            Cursor = parsedCursor
        };
    }

    private static SandboxApiException CreateApiException(HttpResponseMessage response, string content)
    {
        var requestId = response.Headers.TryGetValues(Constants.RequestIdHeader, out var values)
            ? values.FirstOrDefault()
            : null;

        string? errorMessage = null;
        string? errorCode = null;
        object? rawBody = content;

        if (!string.IsNullOrEmpty(content))
        {
            try
            {
                var parsed = JsonSerializer.Deserialize<Dictionary<string, JsonElement>>(content, JsonOptions);
                if (parsed != null)
                {
                    rawBody = parsed;
                    if (parsed.TryGetValue("message", out var msg))
                    {
                        errorMessage = msg.GetString();
                    }

                    if (parsed.TryGetValue("code", out var code))
                    {
                        errorCode = code.GetString();
                    }
                }
            }
            catch
            {
                // Ignore JSON parse errors and fallback to raw body.
            }
        }

        var message = errorMessage ?? $"Request failed with status code {(int)response.StatusCode}";
        return new SandboxApiException(
            message: message,
            statusCode: (int)response.StatusCode,
            requestId: requestId,
            rawBody: rawBody,
            error: new SandboxError(errorCode ?? SandboxErrorCodes.UnexpectedResponse, errorMessage ?? message));
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Adapters/EgressAdapter.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Text.Json;
using System.Linq;
using OpenSandbox.Core;
using OpenSandbox.Internal;
using OpenSandbox.Models;
using OpenSandbox.Services;

namespace OpenSandbox.Adapters;

internal sealed class EgressAdapter : IEgress
{
    private readonly HttpClientWrapper _client;

    public EgressAdapter(HttpClientWrapper client)
    {
        _client = client ?? throw new ArgumentNullException(nameof(client));
    }

    public async Task<NetworkPolicy> GetPolicyAsync(CancellationToken cancellationToken = default)
    {
        var response = await _client.GetAsync<JsonElement>("/policy", cancellationToken: cancellationToken).ConfigureAwait(false);
        if (!response.TryGetProperty("policy", out var policyElement) || policyElement.ValueKind != JsonValueKind.Object)
        {
            throw new SandboxApiException("Missing policy in egress response");
        }

        return ParseNetworkPolicy(policyElement);
    }

    public async Task PatchRulesAsync(
        IReadOnlyList<NetworkRule> rules,
        CancellationToken cancellationToken = default)
    {
        var normalizedRules = rules.Select(r => new Dictionary<string, object?>
        {
            ["action"] = r.Action == NetworkRuleAction.Allow ? "allow" : "deny",
            ["target"] = r.Target
        }).ToList();

        await _client.PatchAsync("/policy", normalizedRules, cancellationToken).ConfigureAwait(false);
    }

    private static NetworkPolicy ParseNetworkPolicy(JsonElement element)
    {
        var policy = new NetworkPolicy();

        if (element.TryGetProperty("defaultAction", out var defaultAction) &&
            defaultAction.ValueKind == JsonValueKind.String)
        {
            policy.DefaultAction = ParseNetworkRuleAction(defaultAction.GetString());
        }

        if (element.TryGetProperty("egress", out var egress) &&
            egress.ValueKind == JsonValueKind.Array)
        {
            policy.Egress = egress.EnumerateArray().Select(ParseNetworkRule).ToList();
        }

        return policy;
    }

    private static NetworkRule ParseNetworkRule(JsonElement element)
    {
        var actionText = element.GetProperty("action").GetString();
        var target = element.GetProperty("target").GetString();
        return new NetworkRule
        {
            Action = ParseNetworkRuleAction(actionText),
            Target = target ?? throw new SandboxApiException("Missing target in network rule")
        };
    }

    private static NetworkRuleAction ParseNetworkRuleAction(string? action)
    {
        return action?.ToLowerInvariant() switch
        {
            "allow" => NetworkRuleAction.Allow,
            "deny" => NetworkRuleAction.Deny,
            _ => throw new SandboxApiException($"Invalid network rule action: {action ?? "<null>"}")
        };
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Adapters/FilesystemAdapter.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Net.Http.Headers;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.Json;
using OpenSandbox.Core;
using OpenSandbox.Internal;
using OpenSandbox.Models;
using OpenSandbox.Services;

namespace OpenSandbox.Adapters;

/// <summary>
/// Adapter for the execd filesystem service.
/// </summary>
internal sealed class FilesystemAdapter : ISandboxFiles
{
    private readonly HttpClientWrapper _client;
    private readonly HttpClient _httpClient;
    private readonly string _baseUrl;
    private readonly IReadOnlyDictionary<string, string> _headers;

    private static readonly JsonSerializerOptions JsonOptions = new()
    {
        PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
        PropertyNameCaseInsensitive = true,
        DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
    };

    public FilesystemAdapter(
        HttpClientWrapper client,
        HttpClient httpClient,
        string baseUrl,
        IReadOnlyDictionary<string, string> headers)
    {
        _client = client ?? throw new ArgumentNullException(nameof(client));
        _httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
        _baseUrl = baseUrl?.TrimEnd('/') ?? throw new ArgumentNullException(nameof(baseUrl));
        _headers = headers ?? new Dictionary<string, string>();
    }

    public async Task<IReadOnlyDictionary<string, SandboxFileInfo>> GetFileInfoAsync(
        IEnumerable<string> paths,
        CancellationToken cancellationToken = default)
    {
        var pathWithQuery = BuildRepeatedPathQuery("/files/info", "path", paths);
        var response = await _client.GetAsync<JsonElement>(pathWithQuery, cancellationToken: cancellationToken).ConfigureAwait(false);
        return ParseFilesInfoResponse(response);
    }

    public async Task<IReadOnlyList<SandboxFileInfo>> SearchAsync(
        SearchEntry entry,
        CancellationToken cancellationToken = default)
    {
        var queryParams = new Dictionary<string, string?>
        {
            ["path"] = entry.Path,
            ["pattern"] = entry.Pattern
        };

        var response = await _client.GetAsync<JsonElement>("/files/search", queryParams, cancellationToken).ConfigureAwait(false);
        return ParseSearchFilesResponse(response);
    }

    public async Task CreateDirectoriesAsync(
        IEnumerable<CreateDirectoryEntry> entries,
        CancellationToken cancellationToken = default)
    {
        var body = entries.ToDictionary(
            e => e.Path,
            e => new Permission
            {
                Mode = e.Mode ?? 755,
                Owner = e.Owner,
                Group = e.Group
            });

        await _client.PostAsync("/directories", body, cancellationToken).ConfigureAwait(false);
    }

    public async Task DeleteDirectoriesAsync(
        IEnumerable<string> paths,
        CancellationToken cancellationToken = default)
    {
        var pathWithQuery = BuildRepeatedPathQuery("/directories", "path", paths);
        await _client.DeleteAsync(pathWithQuery, cancellationToken: cancellationToken).ConfigureAwait(false);
    }

    public async Task WriteFilesAsync(
        IEnumerable<WriteEntry> entries,
        CancellationToken cancellationToken = default)
    {
        var entryList = entries.ToList();
        if (entryList.Count == 0)
        {
            return;
        }
        var url = $"{_baseUrl}/files/upload";

        using var form = new MultipartFormDataContent();
        foreach (var entry in entryList)
        {
            var fileName = GetFileName(entry.Path);
            var metadata = new FileMetadata
            {
                Path = entry.Path,
                Mode = entry.Mode,
                Owner = entry.Owner,
                Group = entry.Group
            };

            var metadataJson = JsonSerializer.Serialize(metadata, JsonOptions);
            var metadataContent = new StringContent(metadataJson, Encoding.UTF8, "application/json");
            form.Add(metadataContent, "metadata", "metadata");

            var fileContent = CreateFileContent(entry.Data);
            form.Add(fileContent, "file", fileName);
        }

        using var request = new HttpRequestMessage(HttpMethod.Post, url)
        {
            Content = form
        };

        foreach (var header in _headers)
        {
            request.Headers.TryAddWithoutValidation(header.Key, header.Value);
        }

        using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);

        if (!response.IsSuccessStatusCode)
        {
            var content = await response.Content.ReadAsStringAsync().ConfigureAwait(false);
            var requestId = response.Headers.TryGetValues(Constants.RequestIdHeader, out var values)
                ? values.FirstOrDefault()
                : null;

            throw new SandboxApiException(
                message: $"Upload failed (status={(int)response.StatusCode})",
                statusCode: (int)response.StatusCode,
                requestId: requestId,
                rawBody: content);
        }
    }

    public async Task<string> ReadFileAsync(
        string path,
        ReadFileOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        var bytes = await ReadBytesAsync(path, new ReadBytesOptions { Range = options?.Range }, cancellationToken).ConfigureAwait(false);
        var encoding = GetEncoding(options?.Encoding ?? "utf-8");
        return encoding.GetString(bytes);
    }

    public async Task<byte[]> ReadBytesAsync(
        string path,
        ReadBytesOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        var headers = new Dictionary<string, string>();
        var range = options?.Range;
        if (range != null && range.Length > 0)
        {
            headers["Range"] = range;
        }

        var queryParams = new Dictionary<string, string?>
        {
            ["path"] = path
        };

        return await _client.GetBytesAsync("/files/download", queryParams, headers, cancellationToken).ConfigureAwait(false);
    }

    public async IAsyncEnumerable<byte[]> ReadBytesStreamAsync(
        string path,
        ReadBytesOptions? options = null,
        [EnumeratorCancellation] CancellationToken cancellationToken = default)
    {
        var url = $"{_baseUrl}/files/download?path={Uri.EscapeDataString(path)}";

        using var request = new HttpRequestMessage(HttpMethod.Get, url);
        foreach (var header in _headers)
        {
            request.Headers.TryAddWithoutValidation(header.Key, header.Value);
        }

        var range = options?.Range;
        if (range != null && range.Length > 0)
        {
            request.Headers.TryAddWithoutValidation("Range", range);
        }

        using var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);

        if (!response.IsSuccessStatusCode)
        {
            var content = await response.Content.ReadAsStringAsync().ConfigureAwait(false);
            var requestId = response.Headers.TryGetValues(Constants.RequestIdHeader, out var values)
                ? values.FirstOrDefault()
                : null;

            throw new SandboxApiException(
                message: "Download stream failed",
                statusCode: (int)response.StatusCode,
                requestId: requestId,
                rawBody: content);
        }

        var stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false);
        var buffer = new byte[8192];
        int bytesRead;

        while ((bytesRead = await stream.ReadAsync(buffer, 0, buffer.Length, cancellationToken).ConfigureAwait(false)) > 0)
        {
            var chunk = new byte[bytesRead];
            Array.Copy(buffer, chunk, bytesRead);
            yield return chunk;
        }
    }

    public async Task DeleteFilesAsync(
        IEnumerable<string> paths,
        CancellationToken cancellationToken = default)
    {
        var pathWithQuery = BuildRepeatedPathQuery("/files", "path", paths);
        await _client.DeleteAsync(pathWithQuery, cancellationToken: cancellationToken).ConfigureAwait(false);
    }

    public async Task MoveFilesAsync(
        IEnumerable<MoveEntry> entries,
        CancellationToken cancellationToken = default)
    {
        var body = entries.Select(e => new RenameFileItem
        {
            Src = e.Src,
            Dest = e.Dest
        }).ToList();

        await _client.PostAsync("/files/mv", body, cancellationToken).ConfigureAwait(false);
    }

    public async Task ReplaceContentsAsync(
        IEnumerable<ContentReplaceEntry> entries,
        CancellationToken cancellationToken = default)
    {
        var body = entries.ToDictionary(
            e => e.Path,
            e => new ReplaceFileContentItem
            {
                Old = e.OldContent,
                New = e.NewContent
            });

        await _client.PostAsync("/files/replace", body, cancellationToken).ConfigureAwait(false);
    }

    public async Task SetPermissionsAsync(
        IEnumerable<SetPermissionEntry> entries,
        CancellationToken cancellationToken = default)
    {
        var body = entries.ToDictionary(
            e => e.Path,
            e => new Permission
            {
                Mode = e.Mode,
                Owner = e.Owner,
                Group = e.Group
            });

        await _client.PostAsync("/files/permissions", body, cancellationToken).ConfigureAwait(false);
    }

    private static HttpContent CreateFileContent(object? data)
    {
        return data switch
        {
            null => new ByteArrayContent(Array.Empty<byte>()),
            string str => new StringContent(str, Encoding.UTF8),
            byte[] bytes => new ByteArrayContent(bytes),
            Stream stream => new StreamContent(stream),
            _ => throw new InvalidArgumentException($"Unsupported file data type: {data.GetType().FullName}")
        };
    }

    private static string GetFileName(string path)
    {
        var parts = path.Split('/', '\\');
        return parts.Length > 0 ? parts[^1] : "file";
    }

    private static string BuildRepeatedPathQuery(string route, string key, IEnumerable<string> values)
    {
        var encodedValues = values
            .Where(v => !string.IsNullOrEmpty(v))
            .Select(v => $"{Uri.EscapeDataString(key)}={Uri.EscapeDataString(v)}")
            .ToList();

        if (encodedValues.Count == 0)
        {
            return route;
        }

        return $"{route}?{string.Join("&", encodedValues)}";
    }

    private static Encoding GetEncoding(string encodingName)
    {
        return encodingName.ToLowerInvariant() switch
        {
            "utf-8" or "utf8" => Encoding.UTF8,
            "ascii" => Encoding.ASCII,
            "utf-16" or "utf16" or "unicode" => Encoding.Unicode,
            "utf-32" or "utf32" => Encoding.UTF32,
            _ => Encoding.GetEncoding(encodingName)
        };
    }

    private static IReadOnlyDictionary<string, SandboxFileInfo> ParseFilesInfoResponse(JsonElement element)
    {
        var result = new Dictionary<string, SandboxFileInfo>();

        if (element.ValueKind != JsonValueKind.Object)
            return result;

        foreach (var property in element.EnumerateObject())
        {
            result[property.Name] = ParseFileInfo(property.Value);
        }

        return result;
    }

    private static IReadOnlyList<SandboxFileInfo> ParseSearchFilesResponse(JsonElement element)
    {
        if (element.ValueKind != JsonValueKind.Array)
            return Array.Empty<SandboxFileInfo>();

        return element.EnumerateArray().Select(ParseFileInfo).ToList();
    }

    private static SandboxFileInfo ParseFileInfo(JsonElement element)
    {
        return new SandboxFileInfo
        {
            Path = element.GetProperty("path").GetString() ?? string.Empty,
            Size = element.TryGetProperty("size", out var size) && size.ValueKind == JsonValueKind.Number
                ? size.GetInt64()
                : null,
            ModifiedAt = element.TryGetProperty("modified_at", out var modifiedAt) && modifiedAt.ValueKind == JsonValueKind.String
                ? DateTime.TryParse(modifiedAt.GetString(), out var modDate) ? modDate : null
                : null,
            CreatedAt = element.TryGetProperty("created_at", out var createdAt) && createdAt.ValueKind == JsonValueKind.String
                ? DateTime.TryParse(createdAt.GetString(), out var createDate) ? createDate : null
                : null,
            Mode = element.TryGetProperty("mode", out var mode) && mode.ValueKind == JsonValueKind.Number
                ? mode.GetInt32()
                : null,
            Owner = element.TryGetProperty("owner", out var owner) ? owner.GetString() : null,
            Group = element.TryGetProperty("group", out var group) ? group.GetString() : null
        };
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Adapters/HealthAdapter.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Internal;
using OpenSandbox.Services;

namespace OpenSandbox.Adapters;

/// <summary>
/// Adapter for the execd health service.
/// </summary>
internal sealed class HealthAdapter : IExecdHealth
{
    private readonly HttpClientWrapper _client;

    public HealthAdapter(HttpClientWrapper client)
    {
        _client = client ?? throw new ArgumentNullException(nameof(client));
    }

    public async Task<bool> PingAsync(CancellationToken cancellationToken = default)
    {
        try
        {
            await _client.GetAsync("/ping", cancellationToken: cancellationToken).ConfigureAwait(false);
            return true;
        }
        catch
        {
            return false;
        }
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Adapters/MetricsAdapter.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Internal;
using OpenSandbox.Models;
using OpenSandbox.Services;

namespace OpenSandbox.Adapters;

/// <summary>
/// Adapter for the execd metrics service.
/// </summary>
internal sealed class MetricsAdapter : IExecdMetrics
{
    private readonly HttpClientWrapper _client;

    public MetricsAdapter(HttpClientWrapper client)
    {
        _client = client ?? throw new ArgumentNullException(nameof(client));
    }

    public async Task<SandboxMetrics> GetMetricsAsync(CancellationToken cancellationToken = default)
    {
        var metrics = await _client.GetAsync<Metrics>("/metrics", cancellationToken: cancellationToken).ConfigureAwait(false);
        return NormalizeMetrics(metrics);
    }

    private static SandboxMetrics NormalizeMetrics(Metrics m)
    {
        return new SandboxMetrics
        {
            CpuCount = m.CpuCount ?? 0,
            CpuUsedPercentage = m.CpuUsedPct ?? 0,
            MemoryTotalMiB = m.MemTotalMib ?? 0,
            MemoryUsedMiB = m.MemUsedMib ?? 0,
            Timestamp = m.Timestamp ?? 0
        };
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Adapters/SandboxesAdapter.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Text.Json;
using System.Linq;
using OpenSandbox.Core;
using OpenSandbox.Internal;
using OpenSandbox.Models;
using OpenSandbox.Services;

namespace OpenSandbox.Adapters;

/// <summary>
/// Adapter for the sandbox lifecycle service.
/// </summary>
internal sealed class SandboxesAdapter : ISandboxes
{
    private readonly HttpClientWrapper _client;

    private static readonly JsonSerializerOptions JsonOptions = new()
    {
        PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
        PropertyNameCaseInsensitive = true,
        DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
    };

    public SandboxesAdapter(HttpClientWrapper client)
    {
        _client = client ?? throw new ArgumentNullException(nameof(client));
    }

    public async Task<CreateSandboxResponse> CreateSandboxAsync(
        CreateSandboxRequest request,
        CancellationToken cancellationToken = default)
    {
        var response = await _client.PostAsync<JsonElement>("/sandboxes", request, cancellationToken).ConfigureAwait(false);
        return ParseCreateSandboxResponse(response);
    }

    public async Task<SandboxInfo> GetSandboxAsync(
        string sandboxId,
        CancellationToken cancellationToken = default)
    {
        var response = await _client.GetAsync<JsonElement>($"/sandboxes/{Uri.EscapeDataString(sandboxId)}", cancellationToken: cancellationToken).ConfigureAwait(false);
        return ParseSandboxInfo(response);
    }

    public async Task<ListSandboxesResponse> ListSandboxesAsync(
        ListSandboxesParams? @params = null,
        CancellationToken cancellationToken = default)
    {
        var queryParts = new List<string>();

        if (@params?.States != null && @params.States.Count > 0)
        {
            // The API expects repeated query params: ?state=Running&state=Paused
            queryParts.AddRange(@params.States.Select(state => $"state={Uri.EscapeDataString(state)}"));
        }

        if (@params?.Metadata != null && @params.Metadata.Count > 0)
        {
            // Encode metadata as k=v&k2=v2
            var metadataStr = string.Join("&", @params.Metadata.Select(kv => $"{kv.Key}={kv.Value}"));
            queryParts.Add($"metadata={Uri.EscapeDataString(metadataStr)}");
        }

        if (@params?.Page.HasValue == true)
        {
            queryParts.Add($"page={@params.Page.Value}");
        }

        if (@params?.PageSize.HasValue == true)
        {
            queryParts.Add($"pageSize={@params.PageSize.Value}");
        }

        var path = queryParts.Count > 0
            ? $"/sandboxes?{string.Join("&", queryParts)}"
            : "/sandboxes";

        var response = await _client.GetAsync<JsonElement>(path, cancellationToken: cancellationToken).ConfigureAwait(false);
        return ParseListSandboxesResponse(response);
    }

    public async Task DeleteSandboxAsync(
        string sandboxId,
        CancellationToken cancellationToken = default)
    {
        await _client.DeleteAsync($"/sandboxes/{Uri.EscapeDataString(sandboxId)}", cancellationToken: cancellationToken).ConfigureAwait(false);
    }

    public async Task PauseSandboxAsync(
        string sandboxId,
        CancellationToken cancellationToken = default)
    {
        await _client.PostAsync($"/sandboxes/{Uri.EscapeDataString(sandboxId)}/pause", cancellationToken: cancellationToken).ConfigureAwait(false);
    }

    public async Task ResumeSandboxAsync(
        string sandboxId,
        CancellationToken cancellationToken = default)
    {
        await _client.PostAsync($"/sandboxes/{Uri.EscapeDataString(sandboxId)}/resume", cancellationToken: cancellationToken).ConfigureAwait(false);
    }

    public async Task<RenewSandboxExpirationResponse> RenewSandboxExpirationAsync(
        string sandboxId,
        RenewSandboxExpirationRequest request,
        CancellationToken cancellationToken = default)
    {
        var response = await _client.PostAsync<JsonElement>(
            $"/sandboxes/{Uri.EscapeDataString(sandboxId)}/renew-expiration",
            request,
            cancellationToken).ConfigureAwait(false);

        return ParseRenewSandboxExpirationResponse(response);
    }

    public async Task<Endpoint> GetSandboxEndpointAsync(
        string sandboxId,
        int port,
        bool useServerProxy = false,
        CancellationToken cancellationToken = default)
    {
        var queryParams = new Dictionary<string, string?>
        {
            ["use_server_proxy"] = useServerProxy ? "true" : "false"
        };

        var response = await _client.GetAsync<JsonElement>(
            $"/sandboxes/{Uri.EscapeDataString(sandboxId)}/endpoints/{port}",
            queryParams,
            cancellationToken: cancellationToken).ConfigureAwait(false);

        return new Endpoint
        {
            EndpointAddress = response.GetProperty("endpoint").GetString() ?? throw new SandboxApiException("Missing endpoint in response"),
            Headers = response.TryGetProperty("headers", out var headersElement) && headersElement.ValueKind == JsonValueKind.Object
                ? headersElement.EnumerateObject().ToDictionary(p => p.Name, p => p.Value.GetString() ?? string.Empty)
                : new Dictionary<string, string>()
        };
    }

    private static DateTime ParseIsoDate(string fieldName, JsonElement element)
    {
        var value = element.GetString();
        if (string.IsNullOrEmpty(value))
        {
            throw new SandboxApiException($"Invalid {fieldName}: expected ISO string, got null or empty");
        }

        if (!DateTime.TryParse(value, out var date))
        {
            throw new SandboxApiException($"Invalid {fieldName}: {value}");
        }

        return date.ToUniversalTime();
    }

    private static DateTime? ParseOptionalIsoDate(string fieldName, JsonElement element)
    {
        return element.ValueKind == JsonValueKind.Null ? null : ParseIsoDate(fieldName, element);
    }

    private static SandboxInfo ParseSandboxInfo(JsonElement element)
    {
        var status = element.GetProperty("status");
        var image = element.GetProperty("image");

        return new SandboxInfo
        {
            Id = element.GetProperty("id").GetString() ?? throw new SandboxApiException("Missing id in response"),
            Image = new ImageSpec
            {
                Uri = image.GetProperty("uri").GetString() ?? throw new SandboxApiException("Missing image.uri in response"),
                Auth = image.TryGetProperty("auth", out var auth) && auth.ValueKind != JsonValueKind.Null
                    ? JsonSerializer.Deserialize<ImageAuth>(auth.GetRawText(), JsonOptions)
                    : null
            },
            Entrypoint = element.GetProperty("entrypoint").EnumerateArray().Select(e => e.GetString() ?? string.Empty).ToList(),
            Metadata = element.TryGetProperty("metadata", out var metadata) && metadata.ValueKind == JsonValueKind.Object
                ? metadata.EnumerateObject().ToDictionary(p => p.Name, p => p.Value.GetString() ?? string.Empty)
                : null,
            Status = new SandboxStatus
            {
                State = status.GetProperty("state").GetString() ?? throw new SandboxApiException("Missing status.state in response"),
                Reason = status.TryGetProperty("reason", out var reason) ? reason.GetString() : null,
                Message = status.TryGetProperty("message", out var message) ? message.GetString() : null
            },
            CreatedAt = ParseIsoDate("createdAt", element.GetProperty("createdAt")),
            ExpiresAt = element.TryGetProperty("expiresAt", out var expiresAtElement)
                ? ParseOptionalIsoDate("expiresAt", expiresAtElement)
                : null
        };
    }

    private static CreateSandboxResponse ParseCreateSandboxResponse(JsonElement element)
    {
        var status = element.GetProperty("status");

        return new CreateSandboxResponse
        {
            Id = element.GetProperty("id").GetString() ?? throw new SandboxApiException("Missing id in response"),
            Status = new SandboxStatus
            {
                State = status.GetProperty("state").GetString() ?? throw new SandboxApiException("Missing status.state in response"),
                Reason = status.TryGetProperty("reason", out var reason) ? reason.GetString() : null,
                Message = status.TryGetProperty("message", out var message) ? message.GetString() : null
            },
            Metadata = element.TryGetProperty("metadata", out var metadata) && metadata.ValueKind == JsonValueKind.Object
                ? metadata.EnumerateObject().ToDictionary(p => p.Name, p => p.Value.GetString() ?? string.Empty)
                : null,
            CreatedAt = ParseIsoDate("createdAt", element.GetProperty("createdAt")),
            ExpiresAt = element.TryGetProperty("expiresAt", out var expiresAtElement)
                ? ParseOptionalIsoDate("expiresAt", expiresAtElement)
                : null,
            Entrypoint = element.GetProperty("entrypoint").EnumerateArray().Select(e => e.GetString() ?? string.Empty).ToList()
        };
    }

    private static ListSandboxesResponse ParseListSandboxesResponse(JsonElement element)
    {
        var items = element.GetProperty("items").EnumerateArray().Select(ParseSandboxInfo).ToList();

        PaginationInfo? pagination = null;
        if (element.TryGetProperty("pagination", out var paginationElement) && paginationElement.ValueKind == JsonValueKind.Object)
        {
            pagination = new PaginationInfo
            {
                Page = paginationElement.TryGetProperty("page", out var page) ? page.GetInt32() : 0,
                PageSize = paginationElement.TryGetProperty("pageSize", out var pageSize) ? pageSize.GetInt32() : 0,
                TotalItems = paginationElement.TryGetProperty("totalItems", out var totalItems) ? totalItems.GetInt32() : 0,
                TotalPages = paginationElement.TryGetProperty("totalPages", out var totalPages) ? totalPages.GetInt32() : 0,
                HasNextPage = paginationElement.TryGetProperty("hasNextPage", out var hasNextPage) && hasNextPage.GetBoolean()
            };
        }

        return new ListSandboxesResponse
        {
            Items = items,
            Pagination = pagination
        };
    }

    private static RenewSandboxExpirationResponse ParseRenewSandboxExpirationResponse(JsonElement element)
    {
        DateTime? expiresAt = null;
        if (element.TryGetProperty("expiresAt", out var expiresAtElement) && expiresAtElement.ValueKind == JsonValueKind.String)
        {
            expiresAt = ParseIsoDate("expiresAt", expiresAtElement);
        }

        return new RenewSandboxExpirationResponse
        {
            ExpiresAt = expiresAt
        };
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Adapters/SseParser.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Runtime.CompilerServices;
using System.Text;
using System.Text.Json;
using OpenSandbox.Core;

namespace OpenSandbox.Adapters;

/// <summary>
/// Parser for Server-Sent Events (SSE) streams.
/// Supports both standard SSE frames (data: {...}) and newline-delimited JSON.
/// </summary>
internal static class SseParser
{
    private static readonly JsonSerializerOptions JsonOptions = new()
    {
        PropertyNameCaseInsensitive = true
    };

    /// <summary>
    /// Parses an SSE-like stream that may be either:
    /// - standard SSE frames (data: {...}\n\n)
    /// - newline-delimited JSON (one JSON object per line)
    /// </summary>
    /// <typeparam name="T">The type to deserialize each event to.</typeparam>
    /// <param name="response">The HTTP response to parse.</param>
    /// <param name="fallbackErrorMessage">Error message to use if parsing fails.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>An async enumerable of parsed events.</returns>
    public static async IAsyncEnumerable<T> ParseJsonEventStreamAsync<T>(
        HttpResponseMessage response,
        string? fallbackErrorMessage = null,
        [EnumeratorCancellation] CancellationToken cancellationToken = default)
    {
        if (!response.IsSuccessStatusCode)
        {
            var text = await response.Content.ReadAsStringAsync().ConfigureAwait(false);
            var requestId = response.Headers.TryGetValues(Constants.RequestIdHeader, out var values)
                ? values.FirstOrDefault()
                : null;

            object? parsed = null;
            string? errorMessage = null;
            string? errorCode = null;

            if (!string.IsNullOrEmpty(text))
            {
                try
                {
                    parsed = JsonSerializer.Deserialize<Dictionary<string, object>>(text, JsonOptions);
                    if (parsed is Dictionary<string, object> dict)
                    {
                        if (dict.TryGetValue("message", out var msg))
                            errorMessage = msg?.ToString();
                        if (dict.TryGetValue("code", out var code))
                            errorCode = code?.ToString();
                    }
                }
                catch
                {
                    // Ignore JSON parse errors
                }
            }

            var message = errorMessage ?? fallbackErrorMessage ?? $"Stream request failed (status={(int)response.StatusCode})";
            var sandboxErrorCode = errorCode ?? SandboxErrorCodes.UnexpectedResponse;

            throw new SandboxApiException(
                message: message,
                statusCode: (int)response.StatusCode,
                requestId: requestId,
                rawBody: parsed ?? text,
                error: new SandboxError(sandboxErrorCode, errorMessage ?? message));
        }

        var stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false);
        using var reader = new StreamReader(stream, Encoding.UTF8);

        while (true)
        {
            cancellationToken.ThrowIfCancellationRequested();

#if NET7_0_OR_GREATER
            var line = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false);
#else
            var line = await reader.ReadLineAsync().ConfigureAwait(false);
#endif
            if (line == null)
                break;

            var trimmedLine = line.Trim();

            // Skip empty lines
            if (string.IsNullOrEmpty(trimmedLine))
                continue;

            // Skip SSE comments
            if (trimmedLine.StartsWith(":"))
                continue;

            // Skip SSE metadata lines
            if (trimmedLine.StartsWith("event:", StringComparison.OrdinalIgnoreCase) ||
                trimmedLine.StartsWith("id:", StringComparison.OrdinalIgnoreCase) ||
                trimmedLine.StartsWith("retry:", StringComparison.OrdinalIgnoreCase))
                continue;

            // Extract JSON from SSE data line or use as-is for NDJSON
            var jsonLine = trimmedLine.StartsWith("data:", StringComparison.OrdinalIgnoreCase)
                ? trimmedLine.Substring(5).Trim()
                : trimmedLine;

            if (string.IsNullOrEmpty(jsonLine))
                continue;

            var parsedEvent = TryParseJson<T>(jsonLine);
            if (parsedEvent != null)
            {
                yield return parsedEvent;
            }
        }
    }

    private static T? TryParseJson<T>(string json)
    {
        try
        {
            return JsonSerializer.Deserialize<T>(json, JsonOptions);
        }
        catch
        {
            return default;
        }
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Config/ConnectionConfig.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Core;

namespace OpenSandbox.Config;

/// <summary>
/// Connection protocol for the OpenSandbox API.
/// </summary>
public enum ConnectionProtocol
{
    /// <summary>
    /// HTTP protocol.
    /// </summary>
    Http,

    /// <summary>
    /// HTTPS protocol.
    /// </summary>
    Https
}

/// <summary>
/// Options for configuring a <see cref="ConnectionConfig"/>.
/// </summary>
public class ConnectionConfigOptions
{
    /// <summary>
    /// Gets or sets the API server domain (host[:port]) without scheme.
    /// Examples: "localhost:8080", "api.opensandbox.io"
    /// You may also pass a full URL (e.g. "http://localhost:8080" or "https://api.example.com").
    /// </summary>
    public string? Domain { get; set; }

    /// <summary>
    /// Gets or sets the connection protocol (http or https).
    /// </summary>
    public ConnectionProtocol? Protocol { get; set; }

    /// <summary>
    /// Gets or sets the API key for authentication.
    /// </summary>
    public string? ApiKey { get; set; }

    /// <summary>
    /// Gets or sets additional headers to include in requests.
    /// </summary>
    public Dictionary<string, string>? Headers { get; set; }

    /// <summary>
    /// Gets or sets the request timeout in seconds.
    /// Defaults to 30 seconds.
    /// </summary>
    public int? RequestTimeoutSeconds { get; set; }

    /// <summary>
    /// Gets or sets whether to use server-proxied endpoint URLs.
    /// </summary>
    public bool? UseServerProxy { get; set; }
}

/// <summary>
/// Configuration for connecting to the OpenSandbox API.
/// </summary>
/// <remarks>
/// This type is thread-safe for concurrent reads and lazy <see cref="GetHttpClient"/> initialization.
/// The HttpClient returned by <see cref="GetHttpClient"/> is shared per <see cref="ConnectionConfig"/> instance.
/// </remarks>
public sealed class ConnectionConfig
{
    /// <summary>
    /// Gets the connection protocol.
    /// </summary>
    public ConnectionProtocol Protocol { get; }

    /// <summary>
    /// Gets the API server domain.
    /// </summary>
    public string Domain { get; }

    /// <summary>
    /// Gets the API key for authentication.
    /// </summary>
    public string? ApiKey { get; }

    /// <summary>
    /// Gets the additional headers to include in requests.
    /// </summary>
    public IReadOnlyDictionary<string, string> Headers { get; }

    /// <summary>
    /// Gets the request timeout in seconds.
    /// </summary>
    public int RequestTimeoutSeconds { get; }

    /// <summary>
    /// Gets whether server-proxied endpoint URLs should be requested.
    /// </summary>
    public bool UseServerProxy { get; }

    /// <summary>
    /// Gets the user agent string.
    /// </summary>
    public string UserAgent { get; } = Constants.DefaultUserAgent;

    private HttpClient? _httpClient;
    private readonly object _httpClientLock = new();

    /// <summary>
    /// Initializes a new instance of the <see cref="ConnectionConfig"/> class.
    /// </summary>
    /// <param name="options">The configuration options.</param>
    public ConnectionConfig(ConnectionConfigOptions? options = null)
    {
        options ??= new ConnectionConfigOptions();

        var envDomain = Environment.GetEnvironmentVariable(Constants.EnvDomain);
        var envApiKey = Environment.GetEnvironmentVariable(Constants.EnvApiKey);

        var rawDomain = options.Domain ?? envDomain ?? "localhost:8080";
        var (protocol, domainBase) = NormalizeDomainBase(rawDomain);

        Protocol = protocol ?? options.Protocol ?? ConnectionProtocol.Http;
        Domain = domainBase;
        ApiKey = options.ApiKey ?? envApiKey;
        RequestTimeoutSeconds = options.RequestTimeoutSeconds ?? Constants.DefaultRequestTimeoutSeconds;
        UseServerProxy = options.UseServerProxy ?? false;

        var headers = new Dictionary<string, string>(options.Headers ?? new Dictionary<string, string>());

        // Add API key header if not already present
        if (!string.IsNullOrEmpty(ApiKey) && !headers.ContainsKey(Constants.ApiKeyHeader))
        {
            headers[Constants.ApiKeyHeader] = ApiKey;
        }

        Headers = headers;
    }

    /// <summary>
    /// Gets the base URL for API requests.
    /// </summary>
    /// <returns>The base URL including the /v1 prefix.</returns>
    public string GetBaseUrl()
    {
        if (Domain.StartsWith("http://", StringComparison.OrdinalIgnoreCase) ||
            Domain.StartsWith("https://", StringComparison.OrdinalIgnoreCase))
        {
            return $"{StripV1Suffix(Domain)}/v1";
        }

        var scheme = Protocol == ConnectionProtocol.Https ? "https" : "http";
        return $"{scheme}://{StripV1Suffix(Domain)}/v1";
    }

    /// <summary>
    /// Gets or creates an HttpClient configured for this connection.
    /// </summary>
    /// <returns>A configured HttpClient instance.</returns>
    public HttpClient GetHttpClient()
    {
        if (_httpClient != null)
        {
            return _httpClient;
        }

        lock (_httpClientLock)
        {
            if (_httpClient != null)
            {
                return _httpClient;
            }

            _httpClient = CreateHttpClient();
            return _httpClient;
        }
    }

    /// <summary>
    /// Creates a new HttpClient configured for this connection.
    /// </summary>
    /// <returns>A new configured HttpClient instance.</returns>
    public HttpClient CreateHttpClient()
    {
        var handler = new HttpClientHandler
        {
            AutomaticDecompression = System.Net.DecompressionMethods.GZip | System.Net.DecompressionMethods.Deflate
        };

        var client = new HttpClient(handler)
        {
            Timeout = TimeSpan.FromSeconds(RequestTimeoutSeconds)
        };

        // Set default headers
        client.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgent);

        foreach (var header in Headers)
        {
            if (!client.DefaultRequestHeaders.TryAddWithoutValidation(header.Key, header.Value))
            {
                // Some headers need to be added differently
                if (header.Key.Equals("Content-Type", StringComparison.OrdinalIgnoreCase))
                {
                    continue; // Content-Type is set per request
                }
            }
        }

        return client;
    }

    /// <summary>
    /// Creates a new HttpClient configured for SSE (Server-Sent Events) streaming.
    /// This client has no timeout to allow for long-running streams.
    /// </summary>
    /// <returns>A new configured HttpClient instance for SSE.</returns>
    public HttpClient CreateSseHttpClient()
    {
        var handler = new HttpClientHandler
        {
            AutomaticDecompression = System.Net.DecompressionMethods.GZip | System.Net.DecompressionMethods.Deflate
        };

        var client = new HttpClient(handler)
        {
            Timeout = Timeout.InfiniteTimeSpan
        };

        // Set default headers
        client.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgent);

        foreach (var header in Headers)
        {
            client.DefaultRequestHeaders.TryAddWithoutValidation(header.Key, header.Value);
        }

        return client;
    }

    private static (ConnectionProtocol?, string) NormalizeDomainBase(string input)
    {
        // Accept a full URL and preserve its path prefix (if any)
        if (input.StartsWith("http://", StringComparison.OrdinalIgnoreCase) ||
            input.StartsWith("https://", StringComparison.OrdinalIgnoreCase))
        {
            var uri = new Uri(input);
            var protocol = uri.Scheme.Equals("https", StringComparison.OrdinalIgnoreCase)
                ? ConnectionProtocol.Https
                : ConnectionProtocol.Http;

            var baseUrl = $"{uri.Scheme}://{uri.Authority}{uri.AbsolutePath}";
            return (protocol, StripV1Suffix(baseUrl.TrimEnd('/')));
        }

        // No scheme: treat as "host[:port]" or "host[:port]/prefix"
        return (null, StripV1Suffix(input.TrimEnd('/')));
    }

    private static string StripV1Suffix(string s)
    {
        var trimmed = s.TrimEnd('/');
        return trimmed.EndsWith("/v1", StringComparison.OrdinalIgnoreCase)
            ? trimmed[..^3]
            : trimmed;
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Config/DiagnosticsOptions.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using Microsoft.Extensions.Logging;

namespace OpenSandbox.Config;

/// <summary>
/// Diagnostics options for SDK logging.
/// </summary>
public sealed class SdkDiagnosticsOptions
{
    /// <summary>
    /// Gets or sets the logger factory used by the SDK.
    /// </summary>
    public ILoggerFactory? LoggerFactory { get; set; }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Core/Constants.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

namespace OpenSandbox.Core;

/// <summary>
/// Default constants used throughout the OpenSandbox SDK.
/// </summary>
public static class Constants
{
    /// <summary>
    /// Default port for the execd service.
    /// </summary>
    public const int DefaultExecdPort = 44772;

    /// <summary>
    /// Default port for the egress sidecar service.
    /// </summary>
    public const int DefaultEgressPort = 18080;

    /// <summary>
    /// Default entrypoint command for sandbox containers.
    /// </summary>
    public static readonly string[] DefaultEntrypoint = new[] { "tail", "-f", "/dev/null" };

    /// <summary>
    /// Default resource limits for sandbox containers.
    /// </summary>
    public static readonly IReadOnlyDictionary<string, string> DefaultResourceLimits = new Dictionary<string, string>
    {
        ["cpu"] = "1",
        ["memory"] = "2Gi"
    };

    /// <summary>
    /// Default sandbox timeout in seconds (10 minutes).
    /// </summary>
    public const int DefaultTimeoutSeconds = 600;

    /// <summary>
    /// Default timeout for waiting until sandbox is ready in seconds.
    /// </summary>
    public const int DefaultReadyTimeoutSeconds = 30;

    /// <summary>
    /// Default polling interval for health checks in milliseconds.
    /// </summary>
    public const int DefaultHealthCheckPollingIntervalMillis = 200;

    /// <summary>
    /// Default HTTP request timeout in seconds.
    /// </summary>
    public const int DefaultRequestTimeoutSeconds = 30;

    /// <summary>
    /// Default user agent string for SDK HTTP requests.
    /// </summary>
    public const string DefaultUserAgent = "OpenSandbox-CSharp-SDK/0.1.0";

    /// <summary>
    /// Environment variable name for the OpenSandbox domain.
    /// </summary>
    public const string EnvDomain = "OPEN_SANDBOX_DOMAIN";

    /// <summary>
    /// Environment variable name for the OpenSandbox API key.
    /// </summary>
    public const string EnvApiKey = "OPEN_SANDBOX_API_KEY";

    /// <summary>
    /// Header name for the API key.
    /// </summary>
    public const string ApiKeyHeader = "OPEN-SANDBOX-API-KEY";

    /// <summary>
    /// Header name for request ID.
    /// </summary>
    public const string RequestIdHeader = "x-request-id";
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Core/Exceptions.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

namespace OpenSandbox.Core;

/// <summary>
/// Error codes used by the OpenSandbox SDK.
/// </summary>
public static class SandboxErrorCodes
{
    /// <summary>
    /// An internal unknown error occurred.
    /// </summary>
    public const string InternalUnknownError = "INTERNAL_UNKNOWN_ERROR";

    /// <summary>
    /// Timeout waiting for sandbox to become ready.
    /// </summary>
    public const string ReadyTimeout = "READY_TIMEOUT";

    /// <summary>
    /// Sandbox is unhealthy.
    /// </summary>
    public const string Unhealthy = "UNHEALTHY";

    /// <summary>
    /// Invalid argument provided.
    /// </summary>
    public const string InvalidArgument = "INVALID_ARGUMENT";

    /// <summary>
    /// Unexpected response from the server.
    /// </summary>
    public const string UnexpectedResponse = "UNEXPECTED_RESPONSE";
}

/// <summary>
/// Structured error payload carried by <see cref="SandboxException"/>.
/// </summary>
public sealed class SandboxError
{
    /// <summary>
    /// Gets the stable programmatic error code.
    /// </summary>
    public string Code { get; }

    /// <summary>
    /// Gets the optional human-readable error message.
    /// </summary>
    public string? Message { get; }

    /// <summary>
    /// Initializes a new instance of the <see cref="SandboxError"/> class.
    /// </summary>
    /// <param name="code">The error code.</param>
    /// <param name="message">The optional error message.</param>
    public SandboxError(string code, string? message = null)
    {
        Code = code ?? throw new ArgumentNullException(nameof(code));
        Message = message;
    }

    /// <inheritdoc />
    public override string ToString() => Message != null ? $"[{Code}] {Message}" : $"[{Code}]";
}

/// <summary>
/// Base exception class for all OpenSandbox SDK errors.
/// </summary>
public class SandboxException : Exception
{
    /// <summary>
    /// Gets the structured error information.
    /// </summary>
    public SandboxError Error { get; }

    /// <summary>
    /// Gets the request ID from the server response when available.
    /// </summary>
    public string? RequestId { get; }

    /// <summary>
    /// Initializes a new instance of the <see cref="SandboxException"/> class.
    /// Kept for binary compatibility with previous SDK versions.
    /// </summary>
    /// <param name="message">The error message.</param>
    /// <param name="innerException">The inner exception.</param>
    /// <param name="error">The structured error information.</param>
    public SandboxException(
        string? message,
        Exception? innerException,
        SandboxError? error)
        : this(message, innerException, error, null)
    {
    }

    /// <summary>
    /// Initializes a new instance of the <see cref="SandboxException"/> class.
    /// </summary>
    /// <param name="message">The error message.</param>
    /// <param name="innerException">The inner exception.</param>
    /// <param name="error">The structured error information.</param>
    /// <param name="requestId">The request ID.</param>
    public SandboxException(
        string? message = null,
        Exception? innerException = null,
        SandboxError? error = null,
        string? requestId = null)
        : base(message ?? error?.Message, innerException)
    {
        Error = error ?? new SandboxError(SandboxErrorCodes.InternalUnknownError, message);
        RequestId = requestId;
    }
}

/// <summary>
/// Exception thrown when an API request fails.
/// </summary>
public class SandboxApiException : SandboxException
{
    /// <summary>
    /// Gets the HTTP status code of the failed request.
    /// </summary>
    public int? StatusCode { get; }

    /// <summary>
    /// Gets the request ID from the server response when available.
    /// Kept on the derived type for binary compatibility with older releases.
    /// </summary>
    public new string? RequestId => base.RequestId;

    /// <summary>
    /// Gets the raw response body.
    /// </summary>
    public object? RawBody { get; }

    /// <summary>
    /// Initializes a new instance of the <see cref="SandboxApiException"/> class.
    /// </summary>
    /// <param name="message">The error message.</param>
    /// <param name="statusCode">The HTTP status code.</param>
    /// <param name="requestId">The request ID.</param>
    /// <param name="rawBody">The raw response body.</param>
    /// <param name="innerException">The inner exception.</param>
    /// <param name="error">The structured error information.</param>
    public SandboxApiException(
        string? message = null,
        int? statusCode = null,
        string? requestId = null,
        object? rawBody = null,
        Exception? innerException = null,
        SandboxError? error = null)
        : base(message, innerException, error ?? new SandboxError(SandboxErrorCodes.UnexpectedResponse, message), requestId)
    {
        StatusCode = statusCode;
        RawBody = rawBody;
    }
}

/// <summary>
/// Exception thrown when an internal SDK error occurs.
/// </summary>
public class SandboxInternalException : SandboxException
{
    /// <summary>
    /// Initializes a new instance of the <see cref="SandboxInternalException"/> class.
    /// </summary>
    /// <param name="message">The error message.</param>
    /// <param name="innerException">The inner exception.</param>
    public SandboxInternalException(string? message = null, Exception? innerException = null)
        : base(message, innerException, new SandboxError(SandboxErrorCodes.InternalUnknownError, message))
    {
    }
}

/// <summary>
/// Exception thrown when a sandbox is unhealthy.
/// </summary>
public class SandboxUnhealthyException : SandboxException
{
    /// <summary>
    /// Initializes a new instance of the <see cref="SandboxUnhealthyException"/> class.
    /// </summary>
    /// <param name="message">The error message.</param>
    /// <param name="innerException">The inner exception.</param>
    public SandboxUnhealthyException(string? message = null, Exception? innerException = null)
        : base(message, innerException, new SandboxError(SandboxErrorCodes.Unhealthy, message))
    {
    }
}

/// <summary>
/// Exception thrown when waiting for sandbox readiness times out.
/// </summary>
public class SandboxReadyTimeoutException : SandboxException
{
    /// <summary>
    /// Initializes a new instance of the <see cref="SandboxReadyTimeoutException"/> class.
    /// </summary>
    /// <param name="message">The error message.</param>
    /// <param name="innerException">The inner exception.</param>
    public SandboxReadyTimeoutException(string? message = null, Exception? innerException = null)
        : base(message, innerException, new SandboxError(SandboxErrorCodes.ReadyTimeout, message))
    {
    }
}

/// <summary>
/// Exception thrown when an invalid argument is provided.
/// </summary>
public class InvalidArgumentException : SandboxException
{
    /// <summary>
    /// Initializes a new instance of the <see cref="InvalidArgumentException"/> class.
    /// </summary>
    /// <param name="message">The error message.</param>
    /// <param name="innerException">The inner exception.</param>
    public InvalidArgumentException(string? message = null, Exception? innerException = null)
        : base(message, innerException, new SandboxError(SandboxErrorCodes.InvalidArgument, message))
    {
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Factory/DefaultAdapterFactory.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Adapters;
using OpenSandbox.Internal;
using Microsoft.Extensions.Logging;

namespace OpenSandbox.Factory;

/// <summary>
/// Default implementation of the adapter factory.
/// </summary>
public sealed class DefaultAdapterFactory : IAdapterFactory
{
    /// <summary>
    /// Creates a new instance of the default adapter factory.
    /// </summary>
    /// <returns>A new adapter factory instance.</returns>
    public static IAdapterFactory Create() => new DefaultAdapterFactory();

    /// <inheritdoc />
    public LifecycleStack CreateLifecycleStack(CreateLifecycleStackOptions options)
    {
        var clientWrapper = new HttpClientWrapper(
            options.HttpClientProvider.HttpClient,
            options.LifecycleBaseUrl,
            options.ConnectionConfig.Headers,
            options.LoggerFactory.CreateLogger("OpenSandbox.HttpClientWrapper"));

        var sandboxes = new SandboxesAdapter(clientWrapper);

        return new LifecycleStack
        {
            Sandboxes = sandboxes
        };
    }

    /// <inheritdoc />
    public ExecdStack CreateExecdStack(CreateExecdStackOptions options)
    {
        var headers = options.ExecdHeaders ?? options.ConnectionConfig.Headers;

        var clientWrapper = new HttpClientWrapper(
            options.HttpClientProvider.HttpClient,
            options.ExecdBaseUrl,
            headers,
            options.LoggerFactory.CreateLogger("OpenSandbox.HttpClientWrapper"));

        var health = new HealthAdapter(clientWrapper);
        var metrics = new MetricsAdapter(clientWrapper);
        var files = new FilesystemAdapter(
            clientWrapper,
            options.HttpClientProvider.HttpClient,
            options.ExecdBaseUrl,
            headers);
        var commands = new CommandsAdapter(
            clientWrapper,
            options.HttpClientProvider.SseHttpClient,
            options.ExecdBaseUrl,
            headers,
            options.LoggerFactory.CreateLogger("OpenSandbox.CommandsAdapter"));

        return new ExecdStack
        {
            Commands = commands,
            Files = files,
            Health = health,
            Metrics = metrics
        };
    }

    /// <inheritdoc />
    public EgressStack CreateEgressStack(CreateEgressStackOptions options)
    {
        var headers = options.EgressHeaders ?? options.ConnectionConfig.Headers;

        var clientWrapper = new HttpClientWrapper(
            options.HttpClientProvider.HttpClient,
            options.EgressBaseUrl,
            headers,
            options.LoggerFactory.CreateLogger("OpenSandbox.HttpClientWrapper"));

        return new EgressStack
        {
            Egress = new EgressAdapter(clientWrapper)
        };
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Factory/IAdapterFactory.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Config;
using OpenSandbox.Services;
using OpenSandbox;
using Microsoft.Extensions.Logging;

namespace OpenSandbox.Factory;

/// <summary>
/// Options for creating a lifecycle service stack.
/// </summary>
public class CreateLifecycleStackOptions
{
    /// <summary>
    /// Gets or sets the connection configuration.
    /// </summary>
    public required ConnectionConfig ConnectionConfig { get; set; }

    /// <summary>
    /// Gets or sets the lifecycle API base URL.
    /// </summary>
    public required string LifecycleBaseUrl { get; set; }

    /// <summary>
    /// Gets or sets the HTTP client provider for this SDK instance.
    /// </summary>
    public required HttpClientProvider HttpClientProvider { get; set; }

    /// <summary>
    /// Gets or sets the logger factory for this SDK instance.
    /// </summary>
    public required ILoggerFactory LoggerFactory { get; set; }
}

/// <summary>
/// Options for creating an execd service stack.
/// </summary>
public class CreateExecdStackOptions
{
    /// <summary>
    /// Gets or sets the connection configuration.
    /// </summary>
    public required ConnectionConfig ConnectionConfig { get; set; }

    /// <summary>
    /// Gets or sets the execd API base URL.
    /// </summary>
    public required string ExecdBaseUrl { get; set; }

    /// <summary>
    /// Gets or sets headers to apply to execd requests.
    /// If null, <see cref="ConnectionConfig.Headers"/> is used.
    /// </summary>
    public IReadOnlyDictionary<string, string>? ExecdHeaders { get; set; }

    /// <summary>
    /// Gets or sets the HTTP client provider for this SDK instance.
    /// </summary>
    public required HttpClientProvider HttpClientProvider { get; set; }

    /// <summary>
    /// Gets or sets the logger factory for this SDK instance.
    /// </summary>
    public required ILoggerFactory LoggerFactory { get; set; }
}

/// <summary>
/// Stack of lifecycle services.
/// </summary>
public class LifecycleStack
{
    /// <summary>
    /// Gets the sandboxes service.
    /// </summary>
    public required ISandboxes Sandboxes { get; init; }
}

/// <summary>
/// Stack of execd services.
/// </summary>
public class ExecdStack
{
    /// <summary>
    /// Gets the commands service.
    /// </summary>
    public required IExecdCommands Commands { get; init; }

    /// <summary>
    /// Gets the files service.
    /// </summary>
    public required ISandboxFiles Files { get; init; }

    /// <summary>
    /// Gets the health service.
    /// </summary>
    public required IExecdHealth Health { get; init; }

    /// <summary>
    /// Gets the metrics service.
    /// </summary>
    public required IExecdMetrics Metrics { get; init; }
}

public class CreateEgressStackOptions
{
    public required ConnectionConfig ConnectionConfig { get; set; }

    public required string EgressBaseUrl { get; set; }

    public IReadOnlyDictionary<string, string>? EgressHeaders { get; set; }

    public required HttpClientProvider HttpClientProvider { get; set; }

    public required ILoggerFactory LoggerFactory { get; set; }
}

public class EgressStack
{
    public required IEgress Egress { get; init; }
}

/// <summary>
/// Factory interface for creating service adapters.
/// </summary>
public interface IAdapterFactory
{
    /// <summary>
    /// Creates a lifecycle service stack.
    /// </summary>
    /// <param name="options">The creation options.</param>
    /// <returns>The lifecycle stack.</returns>
    LifecycleStack CreateLifecycleStack(CreateLifecycleStackOptions options);

    /// <summary>
    /// Creates an execd service stack.
    /// </summary>
    /// <param name="options">The creation options.</param>
    /// <returns>The execd stack.</returns>
    ExecdStack CreateExecdStack(CreateExecdStackOptions options);

    /// <summary>
    /// Creates an egress service stack.
    /// </summary>
    /// <param name="options">The creation options.</param>
    /// <returns>The egress stack.</returns>
    EgressStack CreateEgressStack(CreateEgressStackOptions options);
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/HttpClientProvider.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Config;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;

namespace OpenSandbox;

/// <summary>
/// Provides the HTTP clients used by a sandbox SDK instance.
/// </summary>
public sealed class HttpClientProvider : IDisposable
{
    private bool _disposed;
    private readonly ILogger _logger;

    internal HttpClientProvider(ConnectionConfig connectionConfig, ILoggerFactory loggerFactory)
    {
        _logger = (loggerFactory ?? NullLoggerFactory.Instance).CreateLogger("OpenSandbox.HttpClientProvider");
        _logger.LogDebug("Creating HTTP clients for SDK instance");
        HttpClient = connectionConfig.CreateHttpClient();
        SseHttpClient = connectionConfig.CreateSseHttpClient();
    }

    /// <summary>
    /// Gets the HTTP client used for non-streaming requests.
    /// </summary>
    public HttpClient HttpClient { get; }

    /// <summary>
    /// Gets the HTTP client used for streaming requests.
    /// </summary>
    public HttpClient SseHttpClient { get; }

    /// <summary>
    /// Releases HTTP client resources.
    /// </summary>
    public void Dispose()
    {
        if (_disposed)
        {
            return;
        }

        _disposed = true;
        _logger.LogDebug("Disposing HTTP clients for SDK instance");
        HttpClient.Dispose();
        SseHttpClient.Dispose();
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Internal/ExecutionEventDispatcher.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Models;

namespace OpenSandbox.Internal;

/// <summary>
/// Dispatches streamed execution events to handlers and builds the execution result.
/// </summary>
internal sealed class ExecutionEventDispatcher
{
    private readonly Execution _execution;
    private readonly ExecutionHandlers? _handlers;

    public ExecutionEventDispatcher(Execution execution, ExecutionHandlers? handlers = null)
    {
        _execution = execution ?? throw new ArgumentNullException(nameof(execution));
        _handlers = handlers;
    }

    public async Task DispatchAsync(ServerStreamEvent ev)
    {
        var timestamp = ev.Timestamp ?? DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();

        switch (ev.Type)
        {
            case ServerStreamEventTypes.Init:
                await HandleInitAsync(ev, timestamp).ConfigureAwait(false);
                break;

            case ServerStreamEventTypes.Stdout:
                await HandleStdoutAsync(ev, timestamp).ConfigureAwait(false);
                break;

            case ServerStreamEventTypes.Stderr:
                await HandleStderrAsync(ev, timestamp).ConfigureAwait(false);
                break;

            case ServerStreamEventTypes.Result:
                await HandleResultAsync(ev, timestamp).ConfigureAwait(false);
                break;

            case ServerStreamEventTypes.ExecutionCount:
                HandleExecutionCount(ev);
                break;

            case ServerStreamEventTypes.ExecutionComplete:
                await HandleExecutionCompleteAsync(ev, timestamp).ConfigureAwait(false);
                break;

            case ServerStreamEventTypes.Error:
                await HandleErrorAsync(ev, timestamp).ConfigureAwait(false);
                break;
        }
    }

    private async Task HandleInitAsync(ServerStreamEvent ev, long timestamp)
    {
        var id = ev.Text ?? string.Empty;
        if (!string.IsNullOrEmpty(id))
        {
            _execution.Id = id;
        }

        var init = new ExecutionInit
        {
            Id = id,
            Timestamp = timestamp
        };

        if (_handlers?.OnInit != null)
        {
            await _handlers.OnInit(init).ConfigureAwait(false);
        }
    }

    private async Task HandleStdoutAsync(ServerStreamEvent ev, long timestamp)
    {
        var msg = new OutputMessage
        {
            Text = ev.Text ?? string.Empty,
            Timestamp = timestamp,
            IsError = false
        };

        _execution.Logs.Stdout.Add(msg);

        if (_handlers?.OnStdout != null)
        {
            await _handlers.OnStdout(msg).ConfigureAwait(false);
        }
    }

    private async Task HandleStderrAsync(ServerStreamEvent ev, long timestamp)
    {
        var msg = new OutputMessage
        {
            Text = ev.Text ?? string.Empty,
            Timestamp = timestamp,
            IsError = true
        };

        _execution.Logs.Stderr.Add(msg);

        if (_handlers?.OnStderr != null)
        {
            await _handlers.OnStderr(msg).ConfigureAwait(false);
        }
    }

    private async Task HandleResultAsync(ServerStreamEvent ev, long timestamp)
    {
        var text = ExtractText(ev.Results);
        var result = new ExecutionResult
        {
            Text = text,
            Timestamp = timestamp,
            Raw = ev.Results?.ToDictionary(kv => kv.Key, kv => (object)kv.Value)
        };

        _execution.Results.Add(result);

        if (_handlers?.OnResult != null)
        {
            await _handlers.OnResult(result).ConfigureAwait(false);
        }
    }

    private void HandleExecutionCount(ServerStreamEvent ev)
    {
        if (ev.ExecutionCount.HasValue)
        {
            _execution.ExecutionCount = ev.ExecutionCount.Value;
        }
    }

    private async Task HandleExecutionCompleteAsync(ServerStreamEvent ev, long timestamp)
    {
        var complete = new ExecutionComplete
        {
            Timestamp = timestamp,
            ExecutionTimeMs = ev.ExecutionTime ?? 0
        };

        _execution.Complete = complete;

        if (_handlers?.OnExecutionComplete != null)
        {
            await _handlers.OnExecutionComplete(complete).ConfigureAwait(false);
        }
    }

    private async Task HandleErrorAsync(ServerStreamEvent ev, long timestamp)
    {
        if (ev.Error == null)
            return;

        var error = new ExecutionError
        {
            Name = GetStringValue(ev.Error, "ename") ?? GetStringValue(ev.Error, "name") ?? string.Empty,
            Value = GetStringValue(ev.Error, "evalue") ?? GetStringValue(ev.Error, "value") ?? string.Empty,
            Timestamp = timestamp,
            Traceback = GetStringArrayValue(ev.Error, "traceback") ?? Array.Empty<string>()
        };

        _execution.Error = error;

        if (_handlers?.OnError != null)
        {
            await _handlers.OnError(error).ConfigureAwait(false);
        }
    }

    private static string? ExtractText(Dictionary<string, object>? results)
    {
        if (results == null)
            return null;

        if (results.TryGetValue("text/plain", out var textPlain))
            return textPlain?.ToString();

        if (results.TryGetValue("text", out var text))
            return text?.ToString();

        if (results.TryGetValue("textPlain", out var textPlain2))
            return textPlain2?.ToString();

        return null;
    }

    private static string? GetStringValue(Dictionary<string, object> dict, string key)
    {
        if (dict.TryGetValue(key, out var value))
            return value?.ToString();
        return null;
    }

    private static IReadOnlyList<string>? GetStringArrayValue(Dictionary<string, object> dict, string key)
    {
        if (!dict.TryGetValue(key, out var value))
            return null;

        if (value is IEnumerable<object> enumerable)
            return enumerable.Select(x => x?.ToString() ?? string.Empty).ToList();

        if (value is System.Text.Json.JsonElement jsonElement && jsonElement.ValueKind == System.Text.Json.JsonValueKind.Array)
            return jsonElement.EnumerateArray().Select(x => x.GetString() ?? string.Empty).ToList();

        return null;
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Internal/HttpClientWrapper.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Text;
using System.Text.Json;
using OpenSandbox.Core;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;

namespace OpenSandbox.Internal;

/// <summary>
/// Internal HTTP client wrapper for making API requests.
/// </summary>
internal sealed class HttpClientWrapper
{
    private readonly HttpClient _httpClient;
    private readonly string _baseUrl;
    private readonly IReadOnlyDictionary<string, string> _defaultHeaders;
    private readonly ILogger _logger;

    private static readonly JsonSerializerOptions JsonOptions = new()
    {
        PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
        PropertyNameCaseInsensitive = true,
        DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
    };

    public HttpClientWrapper(
        HttpClient httpClient,
        string baseUrl,
        IReadOnlyDictionary<string, string>? defaultHeaders = null,
        ILogger? logger = null)
    {
        _httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
        _baseUrl = baseUrl?.TrimEnd('/') ?? throw new ArgumentNullException(nameof(baseUrl));
        _defaultHeaders = defaultHeaders ?? new Dictionary<string, string>();
        _logger = logger ?? NullLogger.Instance;
    }

    public string BaseUrl => _baseUrl;

    public async Task<T> GetAsync<T>(
        string path,
        Dictionary<string, string?>? queryParams = null,
        CancellationToken cancellationToken = default)
    {
        var url = BuildUrl(path, queryParams);
        _logger.LogDebug("HTTP GET {Url}", url);
        using var request = new HttpRequestMessage(HttpMethod.Get, url);
        ApplyDefaultHeaders(request);

        using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);
        return await HandleResponseAsync<T>(response, cancellationToken).ConfigureAwait(false);
    }

    public async Task GetAsync(
        string path,
        Dictionary<string, string?>? queryParams = null,
        CancellationToken cancellationToken = default)
    {
        var url = BuildUrl(path, queryParams);
        _logger.LogDebug("HTTP GET {Url}", url);
        using var request = new HttpRequestMessage(HttpMethod.Get, url);
        ApplyDefaultHeaders(request);

        using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);
        await EnsureSuccessAsync(response, cancellationToken).ConfigureAwait(false);
    }

    public async Task<T> PostAsync<T>(
        string path,
        object? body = null,
        CancellationToken cancellationToken = default)
    {
        var url = BuildUrl(path);
        _logger.LogDebug("HTTP POST {Url}", url);
        using var request = new HttpRequestMessage(HttpMethod.Post, url);
        ApplyDefaultHeaders(request);

        if (body != null)
        {
            var json = JsonSerializer.Serialize(body, JsonOptions);
            request.Content = new StringContent(json, Encoding.UTF8, "application/json");
        }

        using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);
        return await HandleResponseAsync<T>(response, cancellationToken).ConfigureAwait(false);
    }

    public async Task PostAsync(
        string path,
        object? body = null,
        CancellationToken cancellationToken = default)
    {
        var url = BuildUrl(path);
        _logger.LogDebug("HTTP POST {Url}", url);
        using var request = new HttpRequestMessage(HttpMethod.Post, url);
        ApplyDefaultHeaders(request);

        if (body != null)
        {
            var json = JsonSerializer.Serialize(body, JsonOptions);
            request.Content = new StringContent(json, Encoding.UTF8, "application/json");
        }

        using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);
        await EnsureSuccessAsync(response, cancellationToken).ConfigureAwait(false);
    }

    public async Task<T> PatchAsync<T>(
        string path,
        object? body = null,
        CancellationToken cancellationToken = default)
    {
        var url = BuildUrl(path);
        _logger.LogDebug("HTTP PATCH {Url}", url);
        using var request = new HttpRequestMessage(HttpMethod.Patch, url);
        ApplyDefaultHeaders(request);

        if (body != null)
        {
            var json = JsonSerializer.Serialize(body, JsonOptions);
            request.Content = new StringContent(json, Encoding.UTF8, "application/json");
        }

        using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);
        return await HandleResponseAsync<T>(response, cancellationToken).ConfigureAwait(false);
    }

    public async Task PatchAsync(
        string path,
        object? body = null,
        CancellationToken cancellationToken = default)
    {
        var url = BuildUrl(path);
        _logger.LogDebug("HTTP PATCH {Url}", url);
        using var request = new HttpRequestMessage(HttpMethod.Patch, url);
        ApplyDefaultHeaders(request);

        if (body != null)
        {
            var json = JsonSerializer.Serialize(body, JsonOptions);
            request.Content = new StringContent(json, Encoding.UTF8, "application/json");
        }

        using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);
        await EnsureSuccessAsync(response, cancellationToken).ConfigureAwait(false);
    }

    public async Task<T> DeleteAsync<T>(
        string path,
        Dictionary<string, string?>? queryParams = null,
        CancellationToken cancellationToken = default)
    {
        var url = BuildUrl(path, queryParams);
        _logger.LogDebug("HTTP DELETE {Url}", url);
        using var request = new HttpRequestMessage(HttpMethod.Delete, url);
        ApplyDefaultHeaders(request);

        using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);
        return await HandleResponseAsync<T>(response, cancellationToken).ConfigureAwait(false);
    }

    public async Task DeleteAsync(
        string path,
        Dictionary<string, string?>? queryParams = null,
        CancellationToken cancellationToken = default)
    {
        var url = BuildUrl(path, queryParams);
        _logger.LogDebug("HTTP DELETE {Url}", url);
        using var request = new HttpRequestMessage(HttpMethod.Delete, url);
        ApplyDefaultHeaders(request);

        using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);
        await EnsureSuccessAsync(response, cancellationToken).ConfigureAwait(false);
    }

    public async Task<HttpResponseMessage> SendAsync(
        HttpRequestMessage request,
        CancellationToken cancellationToken = default)
    {
        _logger.LogDebug("HTTP {Method} {Url}", request.Method, request.RequestUri);
        ApplyDefaultHeaders(request);
        return await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
    }

    public async Task<byte[]> GetBytesAsync(
        string path,
        Dictionary<string, string?>? queryParams = null,
        Dictionary<string, string>? headers = null,
        CancellationToken cancellationToken = default)
    {
        var url = BuildUrl(path, queryParams);
        using var request = new HttpRequestMessage(HttpMethod.Get, url);
        ApplyDefaultHeaders(request);

        if (headers != null)
        {
            foreach (var header in headers)
            {
                request.Headers.TryAddWithoutValidation(header.Key, header.Value);
            }
        }

        using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);
        await EnsureSuccessAsync(response, cancellationToken).ConfigureAwait(false);
        return await response.Content.ReadAsByteArrayAsync().ConfigureAwait(false);
    }

    public async Task<Stream> GetStreamAsync(
        string path,
        Dictionary<string, string?>? queryParams = null,
        Dictionary<string, string>? headers = null,
        CancellationToken cancellationToken = default)
    {
        var url = BuildUrl(path, queryParams);
        using var request = new HttpRequestMessage(HttpMethod.Get, url);
        ApplyDefaultHeaders(request);

        if (headers != null)
        {
            foreach (var header in headers)
            {
                request.Headers.TryAddWithoutValidation(header.Key, header.Value);
            }
        }

        var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
        await EnsureSuccessAsync(response, cancellationToken).ConfigureAwait(false);
        return await response.Content.ReadAsStreamAsync().ConfigureAwait(false);
    }

    private string BuildUrl(string path, Dictionary<string, string?>? queryParams = null)
    {
        var url = path.StartsWith("/") ? $"{_baseUrl}{path}" : $"{_baseUrl}/{path}";

        if (queryParams == null || queryParams.Count == 0)
            return url;

        var queryString = string.Join("&",
            queryParams
                .Where(kv => kv.Value != null)
                .Select(kv => $"{Uri.EscapeDataString(kv.Key)}={Uri.EscapeDataString(kv.Value!)}"));

        return string.IsNullOrEmpty(queryString) ? url : $"{url}?{queryString}";
    }

    private void ApplyDefaultHeaders(HttpRequestMessage request)
    {
        foreach (var header in _defaultHeaders)
        {
            if (!request.Headers.Contains(header.Key))
            {
                request.Headers.TryAddWithoutValidation(header.Key, header.Value);
            }
        }
    }

    private async Task<T> HandleResponseAsync<T>(HttpResponseMessage response, CancellationToken cancellationToken)
    {
        var content = await response.Content.ReadAsStringAsync().ConfigureAwait(false);

        if (!response.IsSuccessStatusCode)
        {
            LogHttpFailure(response);
            ThrowApiException(response, content);
        }

        if (string.IsNullOrEmpty(content))
        {
            throw new SandboxApiException(
                message: "Unexpected empty response body",
                statusCode: (int)response.StatusCode,
                error: new SandboxError(SandboxErrorCodes.UnexpectedResponse, "Unexpected empty response body"),
                rawBody: content);
        }

        try
        {
            return JsonSerializer.Deserialize<T>(content, JsonOptions)!;
        }
        catch (JsonException ex)
        {
            throw new SandboxApiException(
                message: $"Failed to deserialize response: {ex.Message}",
                statusCode: (int)response.StatusCode,
                rawBody: content,
                innerException: ex);
        }
    }

    private async Task EnsureSuccessAsync(HttpResponseMessage response, CancellationToken cancellationToken)
    {
        if (!response.IsSuccessStatusCode)
        {
            var content = await response.Content.ReadAsStringAsync().ConfigureAwait(false);
            LogHttpFailure(response);
            ThrowApiException(response, content);
        }
    }

    private void LogHttpFailure(HttpResponseMessage response)
    {
        var request = response.RequestMessage;
        var requestId = response.Headers.TryGetValues(Constants.RequestIdHeader, out var values)
            ? values.FirstOrDefault()
            : null;

        _logger.LogError(
            "HTTP request failed: method={Method}, url={Url}, status={StatusCode}, requestId={RequestId}",
            request?.Method.Method ?? "UNKNOWN",
            request?.RequestUri?.ToString() ?? "UNKNOWN",
            (int)response.StatusCode,
            requestId ?? string.Empty);
    }

    private static void ThrowApiException(HttpResponseMessage response, string content)
    {
        var requestId = response.Headers.TryGetValues(Constants.RequestIdHeader, out var values)
            ? values.FirstOrDefault()
            : null;

        string? errorMessage = null;
        string? errorCode = null;
        object? rawBody = content;

        if (!string.IsNullOrEmpty(content))
        {
            try
            {
                var parsed = JsonSerializer.Deserialize<Dictionary<string, JsonElement>>(content, JsonOptions);
                if (parsed != null)
                {
                    rawBody = parsed;
                    if (parsed.TryGetValue("message", out var msg))
                        errorMessage = msg.GetString();
                    if (parsed.TryGetValue("error", out var err) && err.ValueKind == JsonValueKind.Object)
                    {
                        if (err.TryGetProperty("message", out var errMsg))
                            errorMessage = errorMessage ?? errMsg.GetString();
                        if (err.TryGetProperty("code", out var errCode))
                            errorCode = errCode.GetString();
                    }
                    if (parsed.TryGetValue("code", out var code))
                        errorCode = errorCode ?? code.GetString();
                }
            }
            catch
            {
                // Ignore JSON parse errors
            }
        }

        var message = errorMessage ?? $"Request failed with status code {(int)response.StatusCode}";
        var sandboxErrorCode = errorCode ?? SandboxErrorCodes.UnexpectedResponse;

        throw new SandboxApiException(
            message: message,
            statusCode: (int)response.StatusCode,
            requestId: requestId,
            rawBody: rawBody,
            error: new SandboxError(sandboxErrorCode, errorMessage ?? message));
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Models/Execd.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Text.Json.Serialization;

namespace OpenSandbox.Models;

/// <summary>
/// A server-sent event from command execution.
/// </summary>
public class ServerStreamEvent
{
    /// <summary>
    /// Gets or sets the event type.
    /// </summary>
    [JsonPropertyName("type")]
    public required string Type { get; set; }

    /// <summary>
    /// Gets or sets the timestamp in milliseconds.
    /// </summary>
    [JsonPropertyName("timestamp")]
    public long? Timestamp { get; set; }

    /// <summary>
    /// Gets or sets the text content.
    /// </summary>
    [JsonPropertyName("text")]
    public string? Text { get; set; }

    /// <summary>
    /// Gets or sets the results map.
    /// </summary>
    [JsonPropertyName("results")]
    public Dictionary<string, object>? Results { get; set; }

    /// <summary>
    /// Gets or sets the error information.
    /// </summary>
    [JsonPropertyName("error")]
    public Dictionary<string, object>? Error { get; set; }

    /// <summary>
    /// Gets or sets the execution count.
    /// </summary>
    [JsonPropertyName("execution_count")]
    public int? ExecutionCount { get; set; }

    /// <summary>
    /// Gets or sets the execution time in milliseconds.
    /// </summary>
    [JsonPropertyName("execution_time")]
    public long? ExecutionTime { get; set; }
}

/// <summary>
/// Known event types for server stream events.
/// </summary>
public static class ServerStreamEventTypes
{
    /// <summary>
    /// Initialization event.
    /// </summary>
    public const string Init = "init";

    /// <summary>
    /// Standard output event.
    /// </summary>
    public const string Stdout = "stdout";

    /// <summary>
    /// Standard error event.
    /// </summary>
    public const string Stderr = "stderr";

    /// <summary>
    /// Result event.
    /// </summary>
    public const string Result = "result";

    /// <summary>
    /// Execution count event.
    /// </summary>
    public const string ExecutionCount = "execution_count";

    /// <summary>
    /// Execution complete event.
    /// </summary>
    public const string ExecutionComplete = "execution_complete";

    /// <summary>
    /// Error event.
    /// </summary>
    public const string Error = "error";
}

/// <summary>
/// Request to run a command.
/// </summary>
public class RunCommandRequest
{
    /// <summary>
    /// Gets or sets the command to run.
    /// </summary>
    [JsonPropertyName("command")]
    public required string Command { get; set; }

    /// <summary>
    /// Gets or sets the working directory.
    /// </summary>
    [JsonPropertyName("cwd")]
    public string? Cwd { get; set; }

    /// <summary>
    /// Gets or sets whether to run in background.
    /// </summary>
    [JsonPropertyName("background")]
    public bool? Background { get; set; }

    /// <summary>
    /// Gets or sets the maximum execution time in milliseconds.
    /// </summary>
    [JsonPropertyName("timeout")]
    public long? Timeout { get; set; }

    /// <summary>
    /// Gets or sets the Unix user ID used to run the command process.
    /// </summary>
    [JsonPropertyName("uid")]
    public int? Uid { get; set; }

    /// <summary>
    /// Gets or sets the Unix group ID used to run the command process.
    /// Requires <see cref="Uid"/> to be set.
    /// </summary>
    [JsonPropertyName("gid")]
    public int? Gid { get; set; }

    /// <summary>
    /// Gets or sets environment variables injected into the command process.
    /// </summary>
    [JsonPropertyName("envs")]
    public Dictionary<string, string>? Envs { get; set; }
}

/// <summary>
/// Options for running a command.
/// </summary>
public class RunCommandOptions
{
    /// <summary>
    /// Gets or sets the working directory for command execution.
    /// </summary>
    public string? WorkingDirectory { get; set; }

    /// <summary>
    /// Gets or sets whether to run the command in detached mode.
    /// </summary>
    public bool Background { get; set; }

    /// <summary>
    /// Gets or sets the maximum execution time in seconds.
    /// The server terminates the command when this duration is reached.
    /// </summary>
    public int? TimeoutSeconds { get; set; }

    /// <summary>
    /// Gets or sets the Unix user ID used to run the command process.
    /// </summary>
    public int? Uid { get; set; }

    /// <summary>
    /// Gets or sets the Unix group ID used to run the command process.
    /// Requires <see cref="Uid"/> to be set.
    /// </summary>
    public int? Gid { get; set; }

    /// <summary>
    /// Gets or sets environment variables injected into the command process.
    /// </summary>
    public Dictionary<string, string>? Envs { get; set; }
}

/// <summary>
/// Status information for a foreground or background command.
/// </summary>
public class CommandStatus
{
    /// <summary>
    /// Gets or sets the command ID.
    /// </summary>
    [JsonPropertyName("id")]
    public string? Id { get; set; }

    /// <summary>
    /// Gets or sets the original command text.
    /// </summary>
    [JsonPropertyName("content")]
    public string? Content { get; set; }

    /// <summary>
    /// Gets or sets whether the command is still running.
    /// </summary>
    [JsonPropertyName("running")]
    public bool? Running { get; set; }

    /// <summary>
    /// Gets or sets the exit code when the command has finished.
    /// </summary>
    [JsonPropertyName("exit_code")]
    public int? ExitCode { get; set; }

    /// <summary>
    /// Gets or sets the error message if the command failed.
    /// </summary>
    [JsonPropertyName("error")]
    public string? Error { get; set; }

    /// <summary>
    /// Gets or sets the command start time in RFC3339 format.
    /// </summary>
    [JsonPropertyName("started_at")]
    public DateTime? StartedAt { get; set; }

    /// <summary>
    /// Gets or sets the command finish time in RFC3339 format.
    /// </summary>
    [JsonPropertyName("finished_at")]
    public DateTime? FinishedAt { get; set; }
}

/// <summary>
/// Background command logs and incremental cursor.
/// </summary>
public class CommandLogs
{
    /// <summary>
    /// Gets or sets raw stdout/stderr content.
    /// </summary>
    public required string Content { get; set; }

    /// <summary>
    /// Gets or sets the latest cursor for incremental log polling.
    /// </summary>
    public long? Cursor { get; set; }
}

/// <summary>
/// Supported programming languages for code execution.
/// </summary>
public static class SupportedLanguages
{
    /// <summary>
    /// Python language.
    /// </summary>
    public const string Python = "python";

    /// <summary>
    /// Go language.
    /// </summary>
    public const string Go = "go";

    /// <summary>
    /// JavaScript language.
    /// </summary>
    public const string JavaScript = "javascript";

    /// <summary>
    /// TypeScript language.
    /// </summary>
    public const string TypeScript = "typescript";

    /// <summary>
    /// Bash shell.
    /// </summary>
    public const string Bash = "bash";

    /// <summary>
    /// Java language.
    /// </summary>
    public const string Java = "java";
}

/// <summary>
/// Raw metrics from the execd service.
/// </summary>
public class Metrics
{
    /// <summary>
    /// Gets or sets the CPU count.
    /// </summary>
    [JsonPropertyName("cpu_count")]
    public int? CpuCount { get; set; }

    /// <summary>
    /// Gets or sets the CPU usage percentage.
    /// </summary>
    [JsonPropertyName("cpu_used_pct")]
    public double? CpuUsedPct { get; set; }

    /// <summary>
    /// Gets or sets the total memory in MiB.
    /// </summary>
    [JsonPropertyName("mem_total_mib")]
    public double? MemTotalMib { get; set; }

    /// <summary>
    /// Gets or sets the used memory in MiB.
    /// </summary>
    [JsonPropertyName("mem_used_mib")]
    public double? MemUsedMib { get; set; }

    /// <summary>
    /// Gets or sets the timestamp.
    /// </summary>
    [JsonPropertyName("timestamp")]
    public long? Timestamp { get; set; }
}

/// <summary>
/// Normalized sandbox metrics.
/// </summary>
public class SandboxMetrics
{
    /// <summary>
    /// Gets or sets the CPU count.
    /// </summary>
    public int CpuCount { get; set; }

    /// <summary>
    /// Gets or sets the CPU usage percentage.
    /// </summary>
    public double CpuUsedPercentage { get; set; }

    /// <summary>
    /// Gets or sets the total memory in MiB.
    /// </summary>
    public double MemoryTotalMiB { get; set; }

    /// <summary>
    /// Gets or sets the used memory in MiB.
    /// </summary>
    public double MemoryUsedMiB { get; set; }

    /// <summary>
    /// Gets or sets the timestamp.
    /// </summary>
    public long Timestamp { get; set; }
}

/// <summary>
/// Response from ping endpoint.
/// </summary>
public class PingResponse
{
    // Empty response - ping just returns 200 OK
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Models/Execution.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Text.Json.Serialization;

namespace OpenSandbox.Models;

/// <summary>
/// An output message from command execution.
/// </summary>
public class OutputMessage
{
    /// <summary>
    /// Gets or sets the text content.
    /// </summary>
    public required string Text { get; set; }

    /// <summary>
    /// Gets or sets the timestamp in milliseconds.
    /// </summary>
    public required long Timestamp { get; set; }

    /// <summary>
    /// Gets or sets whether this is an error message.
    /// </summary>
    public bool IsError { get; set; }
}

/// <summary>
/// A result from command execution.
/// </summary>
public class ExecutionResult
{
    /// <summary>
    /// Gets or sets the text content.
    /// </summary>
    public string? Text { get; set; }

    /// <summary>
    /// Gets or sets the timestamp in milliseconds.
    /// </summary>
    public required long Timestamp { get; set; }

    /// <summary>
    /// Gets or sets the raw mime map from execd event.
    /// </summary>
    public IReadOnlyDictionary<string, object>? Raw { get; set; }
}

/// <summary>
/// An error from command execution.
/// </summary>
public class ExecutionError
{
    /// <summary>
    /// Gets or sets the error name.
    /// </summary>
    public required string Name { get; set; }

    /// <summary>
    /// Gets or sets the error value.
    /// </summary>
    public required string Value { get; set; }

    /// <summary>
    /// Gets or sets the timestamp in milliseconds.
    /// </summary>
    public required long Timestamp { get; set; }

    /// <summary>
    /// Gets or sets the traceback lines.
    /// </summary>
    public required IReadOnlyList<string> Traceback { get; set; }
}

/// <summary>
/// Completion information for command execution.
/// </summary>
public class ExecutionComplete
{
    /// <summary>
    /// Gets or sets the timestamp in milliseconds.
    /// </summary>
    public required long Timestamp { get; set; }

    /// <summary>
    /// Gets or sets the execution time in milliseconds.
    /// </summary>
    public required long ExecutionTimeMs { get; set; }
}

/// <summary>
/// Initialization information for command execution.
/// </summary>
public class ExecutionInit
{
    /// <summary>
    /// Gets or sets the execution ID.
    /// </summary>
    public required string Id { get; set; }

    /// <summary>
    /// Gets or sets the timestamp in milliseconds.
    /// </summary>
    public required long Timestamp { get; set; }
}

/// <summary>
/// Logs from command execution.
/// </summary>
public class ExecutionLogs
{
    /// <summary>
    /// Gets the stdout messages.
    /// </summary>
    public List<OutputMessage> Stdout { get; } = new();

    /// <summary>
    /// Gets the stderr messages.
    /// </summary>
    public List<OutputMessage> Stderr { get; } = new();
}

/// <summary>
/// Result of a command execution.
/// </summary>
public class Execution
{
    /// <summary>
    /// Gets or sets the execution ID.
    /// </summary>
    public string? Id { get; set; }

    /// <summary>
    /// Gets or sets the execution count.
    /// </summary>
    public int? ExecutionCount { get; set; }

    /// <summary>
    /// Gets the execution logs.
    /// </summary>
    public ExecutionLogs Logs { get; } = new();

    /// <summary>
    /// Gets the execution results.
    /// </summary>
    public List<ExecutionResult> Results { get; } = new();

    /// <summary>
    /// Gets or sets the execution error.
    /// </summary>
    public ExecutionError? Error { get; set; }

    /// <summary>
    /// Gets or sets the completion information.
    /// </summary>
    public ExecutionComplete? Complete { get; set; }
}

/// <summary>
/// Handlers for execution events.
/// </summary>
public class ExecutionHandlers
{
    /// <summary>
    /// Gets or sets the handler for stdout messages.
    /// </summary>
    public Func<OutputMessage, Task>? OnStdout { get; set; }

    /// <summary>
    /// Gets or sets the handler for stderr messages.
    /// </summary>
    public Func<OutputMessage, Task>? OnStderr { get; set; }

    /// <summary>
    /// Gets or sets the handler for execution results.
    /// </summary>
    public Func<ExecutionResult, Task>? OnResult { get; set; }

    /// <summary>
    /// Gets or sets the handler for execution completion.
    /// </summary>
    public Func<ExecutionComplete, Task>? OnExecutionComplete { get; set; }

    /// <summary>
    /// Gets or sets the handler for execution errors.
    /// </summary>
    public Func<ExecutionError, Task>? OnError { get; set; }

    /// <summary>
    /// Gets or sets the handler for execution initialization.
    /// </summary>
    public Func<ExecutionInit, Task>? OnInit { get; set; }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Models/Filesystem.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Text.Json.Serialization;

namespace OpenSandbox.Models;

/// <summary>
/// Information about a file in the sandbox.
/// </summary>
public class SandboxFileInfo
{
    /// <summary>
    /// Gets or sets the file path.
    /// </summary>
    [JsonPropertyName("path")]
    public required string Path { get; set; }

    /// <summary>
    /// Gets or sets the file size in bytes.
    /// </summary>
    [JsonPropertyName("size")]
    public long? Size { get; set; }

    /// <summary>
    /// Gets or sets the last modification time.
    /// </summary>
    [JsonPropertyName("modified_at")]
    public DateTime? ModifiedAt { get; set; }

    /// <summary>
    /// Gets or sets the creation time.
    /// </summary>
    [JsonPropertyName("created_at")]
    public DateTime? CreatedAt { get; set; }

    /// <summary>
    /// Gets or sets the file mode (permissions).
    /// </summary>
    [JsonPropertyName("mode")]
    public int? Mode { get; set; }

    /// <summary>
    /// Gets or sets the file owner.
    /// </summary>
    [JsonPropertyName("owner")]
    public string? Owner { get; set; }

    /// <summary>
    /// Gets or sets the file group.
    /// </summary>
    [JsonPropertyName("group")]
    public string? Group { get; set; }
}

/// <summary>
/// File permission settings.
/// </summary>
public class Permission
{
    /// <summary>
    /// Gets or sets the file mode (permissions).
    /// </summary>
    [JsonPropertyName("mode")]
    public int Mode { get; set; }

    /// <summary>
    /// Gets or sets the file owner.
    /// </summary>
    [JsonPropertyName("owner")]
    public string? Owner { get; set; }

    /// <summary>
    /// Gets or sets the file group.
    /// </summary>
    [JsonPropertyName("group")]
    public string? Group { get; set; }
}

/// <summary>
/// File metadata for upload operations.
/// </summary>
public class FileMetadata
{
    /// <summary>
    /// Gets or sets the file path.
    /// </summary>
    [JsonPropertyName("path")]
    public required string Path { get; set; }

    /// <summary>
    /// Gets or sets the file mode (permissions).
    /// </summary>
    [JsonPropertyName("mode")]
    public int? Mode { get; set; }

    /// <summary>
    /// Gets or sets the file owner.
    /// </summary>
    [JsonPropertyName("owner")]
    public string? Owner { get; set; }

    /// <summary>
    /// Gets or sets the file group.
    /// </summary>
    [JsonPropertyName("group")]
    public string? Group { get; set; }
}

/// <summary>
/// Entry for writing a file.
/// </summary>
public class WriteEntry
{
    /// <summary>
    /// Gets or sets the file path.
    /// </summary>
    public required string Path { get; set; }

    /// <summary>
    /// Gets or sets the file data.
    /// Supports: string, byte[], Stream.
    /// </summary>
    public object? Data { get; set; }

    /// <summary>
    /// Gets or sets the file mode (permissions).
    /// </summary>
    public int? Mode { get; set; }

    /// <summary>
    /// Gets or sets the file owner.
    /// </summary>
    public string? Owner { get; set; }

    /// <summary>
    /// Gets or sets the file group.
    /// </summary>
    public string? Group { get; set; }
}

/// <summary>
/// Entry for creating a directory.
/// </summary>
public class CreateDirectoryEntry
{
    /// <summary>
    /// Gets or sets the directory path.
    /// </summary>
    public required string Path { get; set; }

    /// <summary>
    /// Gets or sets the directory mode (permissions).
    /// </summary>
    public int? Mode { get; set; }

    /// <summary>
    /// Gets or sets the directory owner.
    /// </summary>
    public string? Owner { get; set; }

    /// <summary>
    /// Gets or sets the directory group.
    /// </summary>
    public string? Group { get; set; }
}

/// <summary>
/// Entry for searching files.
/// </summary>
public class SearchEntry
{
    /// <summary>
    /// Gets or sets the search path.
    /// </summary>
    public required string Path { get; set; }

    /// <summary>
    /// Gets or sets the search pattern (e.g., "*.txt").
    /// </summary>
    public string? Pattern { get; set; }
}

/// <summary>
/// Entry for moving/renaming a file.
/// </summary>
public class MoveEntry
{
    /// <summary>
    /// Gets or sets the source path.
    /// </summary>
    public required string Src { get; set; }

    /// <summary>
    /// Gets or sets the destination path.
    /// </summary>
    public required string Dest { get; set; }
}

/// <summary>
/// Entry for replacing content in a file.
/// </summary>
public class ContentReplaceEntry
{
    /// <summary>
    /// Gets or sets the file path.
    /// </summary>
    public required string Path { get; set; }

    /// <summary>
    /// Gets or sets the old content to replace.
    /// </summary>
    public required string OldContent { get; set; }

    /// <summary>
    /// Gets or sets the new content.
    /// </summary>
    public required string NewContent { get; set; }
}

/// <summary>
/// Entry for setting file permissions.
/// </summary>
public class SetPermissionEntry
{
    /// <summary>
    /// Gets or sets the file path.
    /// </summary>
    public required string Path { get; set; }

    /// <summary>
    /// Gets or sets the file mode (permissions).
    /// </summary>
    public required int Mode { get; set; }

    /// <summary>
    /// Gets or sets the file owner.
    /// </summary>
    public string? Owner { get; set; }

    /// <summary>
    /// Gets or sets the file group.
    /// </summary>
    public string? Group { get; set; }
}

/// <summary>
/// Options for reading a file as text.
/// </summary>
public class ReadFileOptions
{
    /// <summary>
    /// Gets or sets the text encoding (default: utf-8).
    /// </summary>
    public string? Encoding { get; set; }

    /// <summary>
    /// Gets or sets the byte range to read (e.g., "bytes=0-1023").
    /// </summary>
    public string? Range { get; set; }
}

/// <summary>
/// Options for reading a file as bytes.
/// </summary>
public class ReadBytesOptions
{
    /// <summary>
    /// Gets or sets the byte range to read (e.g., "bytes=0-1023").
    /// </summary>
    public string? Range { get; set; }
}

/// <summary>
/// API request model for renaming files.
/// </summary>
public class RenameFileItem
{
    /// <summary>
    /// Gets or sets the source path.
    /// </summary>
    [JsonPropertyName("src")]
    public required string Src { get; set; }

    /// <summary>
    /// Gets or sets the destination path.
    /// </summary>
    [JsonPropertyName("dest")]
    public required string Dest { get; set; }
}

/// <summary>
/// API request model for replacing file content.
/// </summary>
public class ReplaceFileContentItem
{
    /// <summary>
    /// Gets or sets the old content to replace.
    /// </summary>
    [JsonPropertyName("old")]
    public required string Old { get; set; }

    /// <summary>
    /// Gets or sets the new content.
    /// </summary>
    [JsonPropertyName("new")]
    public required string New { get; set; }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Models/Sandboxes.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Text.Json.Serialization;

namespace OpenSandbox.Models;

/// <summary>
/// Authentication credentials for pulling container images.
/// </summary>
public class ImageAuth
{
    /// <summary>
    /// Gets or sets the username for authentication.
    /// </summary>
    [JsonPropertyName("username")]
    public string? Username { get; set; }

    /// <summary>
    /// Gets or sets the password for authentication.
    /// </summary>
    [JsonPropertyName("password")]
    public string? Password { get; set; }

    /// <summary>
    /// Gets or sets the token for authentication.
    /// </summary>
    [JsonPropertyName("token")]
    public string? Token { get; set; }
}

/// <summary>
/// Specification for a container image.
/// </summary>
public class ImageSpec
{
    /// <summary>
    /// Gets or sets the image URI (e.g., "python:3.11").
    /// </summary>
    [JsonPropertyName("uri")]
    public required string Uri { get; set; }

    /// <summary>
    /// Gets or sets the optional authentication credentials.
    /// </summary>
    [JsonPropertyName("auth")]
    public ImageAuth? Auth { get; set; }
}

/// <summary>
/// Action for a network rule.
/// </summary>
[JsonConverter(typeof(JsonStringEnumConverter))]
public enum NetworkRuleAction
{
    /// <summary>
    /// Allow the network traffic.
    /// </summary>
    [JsonPropertyName("allow")]
    Allow,

    /// <summary>
    /// Deny the network traffic.
    /// </summary>
    [JsonPropertyName("deny")]
    Deny
}

/// <summary>
/// A network rule for egress traffic.
/// </summary>
public class NetworkRule
{
    /// <summary>
    /// Gets or sets whether to allow or deny matching targets.
    /// </summary>
    [JsonPropertyName("action")]
    public required NetworkRuleAction Action { get; set; }

    /// <summary>
    /// Gets or sets the FQDN or wildcard domain (e.g., "example.com", "*.example.com").
    /// </summary>
    [JsonPropertyName("target")]
    public required string Target { get; set; }
}

/// <summary>
/// Network policy for sandbox egress traffic.
/// </summary>
public class NetworkPolicy
{
    /// <summary>
    /// Gets or sets the default action when no egress rule matches. Defaults to "deny".
    /// </summary>
    [JsonPropertyName("defaultAction")]
    public NetworkRuleAction? DefaultAction { get; set; }

    /// <summary>
    /// Gets or sets the list of egress rules evaluated in order.
    /// </summary>
    [JsonPropertyName("egress")]
    public List<NetworkRule>? Egress { get; set; }
}

/// <summary>
/// Host path bind mount backend for a volume.
/// </summary>
public class Host
{
    /// <summary>
    /// Gets or sets the absolute host path.
    /// </summary>
    [JsonPropertyName("path")]
    public required string Path { get; set; }
}

/// <summary>
/// Platform-managed named volume backend (PVC in k8s, named volume in Docker).
/// </summary>
public class PVC
{
    /// <summary>
    /// Gets or sets the target claim/volume name.
    /// </summary>
    [JsonPropertyName("claimName")]
    public required string ClaimName { get; set; }
}

/// <summary>
/// Storage mount definition for sandbox creation.
/// Exactly one backend (Host or PVC) should be provided per volume.
/// </summary>
public class Volume
{
    /// <summary>
    /// Gets or sets the unique volume name within this sandbox request.
    /// </summary>
    [JsonPropertyName("name")]
    public required string Name { get; set; }

    /// <summary>
    /// Gets or sets the host-path backend configuration.
    /// </summary>
    [JsonPropertyName("host")]
    public Host? Host { get; set; }

    /// <summary>
    /// Gets or sets the PVC/named-volume backend configuration.
    /// </summary>
    [JsonPropertyName("pvc")]
    public PVC? Pvc { get; set; }

    /// <summary>
    /// Gets or sets the absolute mount path inside the container.
    /// </summary>
    [JsonPropertyName("mountPath")]
    public required string MountPath { get; set; }

    /// <summary>
    /// Gets or sets whether this volume is mounted read-only.
    /// </summary>
    [JsonPropertyName("readOnly")]
    public bool? ReadOnly { get; set; }

    /// <summary>
    /// Gets or sets the optional relative subpath under the volume backend.
    /// </summary>
    [JsonPropertyName("subPath")]
    public string? SubPath { get; set; }
}

/// <summary>
/// Status of a sandbox.
/// </summary>
public class SandboxStatus
{
    /// <summary>
    /// Gets or sets the current state of the sandbox.
    /// </summary>
    [JsonPropertyName("state")]
    public required string State { get; set; }

    /// <summary>
    /// Gets or sets the reason for the current state.
    /// </summary>
    [JsonPropertyName("reason")]
    public string? Reason { get; set; }

    /// <summary>
    /// Gets or sets additional message about the current state.
    /// </summary>
    [JsonPropertyName("message")]
    public string? Message { get; set; }
}

/// <summary>
/// Information about a sandbox.
/// </summary>
public class SandboxInfo
{
    /// <summary>
    /// Gets or sets the sandbox ID.
    /// </summary>
    [JsonPropertyName("id")]
    public required string Id { get; set; }

    /// <summary>
    /// Gets or sets the container image specification.
    /// </summary>
    [JsonPropertyName("image")]
    public required ImageSpec Image { get; set; }

    /// <summary>
    /// Gets or sets the entrypoint command.
    /// </summary>
    [JsonPropertyName("entrypoint")]
    public required IReadOnlyList<string> Entrypoint { get; set; }

    /// <summary>
    /// Gets or sets the custom metadata tags.
    /// </summary>
    [JsonPropertyName("metadata")]
    public IReadOnlyDictionary<string, string>? Metadata { get; set; }

    /// <summary>
    /// Gets or sets the sandbox status.
    /// </summary>
    [JsonPropertyName("status")]
    public required SandboxStatus Status { get; set; }

    /// <summary>
    /// Gets or sets the sandbox creation time.
    /// </summary>
    [JsonPropertyName("createdAt")]
    public required DateTime CreatedAt { get; set; }

    /// <summary>
    /// Gets or sets the sandbox expiration time.
    /// </summary>
    [JsonPropertyName("expiresAt")]
    public DateTime? ExpiresAt { get; set; }
}

/// <summary>
/// Request to create a new sandbox.
/// </summary>
public class CreateSandboxRequest
{
    /// <summary>
    /// Gets or sets the container image specification.
    /// </summary>
    [JsonPropertyName("image")]
    public required ImageSpec Image { get; set; }

    /// <summary>
    /// Gets or sets the entrypoint command.
    /// </summary>
    [JsonPropertyName("entrypoint")]
    public required IReadOnlyList<string> Entrypoint { get; set; }

    /// <summary>
    /// Gets or sets the timeout in seconds.
    /// </summary>
    [JsonPropertyName("timeout")]
    public int? Timeout { get; set; }

    /// <summary>
    /// Gets or sets the resource limits.
    /// </summary>
    [JsonPropertyName("resourceLimits")]
    public required IReadOnlyDictionary<string, string> ResourceLimits { get; set; }

    /// <summary>
    /// Gets or sets the environment variables.
    /// </summary>
    [JsonPropertyName("env")]
    public IReadOnlyDictionary<string, string>? Env { get; set; }

    /// <summary>
    /// Gets or sets the custom metadata tags.
    /// </summary>
    [JsonPropertyName("metadata")]
    public IReadOnlyDictionary<string, string>? Metadata { get; set; }

    /// <summary>
    /// Gets or sets the network policy.
    /// </summary>
    [JsonPropertyName("networkPolicy")]
    public NetworkPolicy? NetworkPolicy { get; set; }

    /// <summary>
    /// Gets or sets storage volumes to mount into the sandbox.
    /// </summary>
    [JsonPropertyName("volumes")]
    public IReadOnlyList<Volume>? Volumes { get; set; }

    /// <summary>
    /// Gets or sets the extension parameters.
    /// </summary>
    [JsonPropertyName("extensions")]
    public IReadOnlyDictionary<string, object>? Extensions { get; set; }
}

/// <summary>
/// Response from creating a sandbox.
/// </summary>
public class CreateSandboxResponse
{
    /// <summary>
    /// Gets or sets the sandbox ID.
    /// </summary>
    [JsonPropertyName("id")]
    public required string Id { get; set; }

    /// <summary>
    /// Gets or sets the sandbox status.
    /// </summary>
    [JsonPropertyName("status")]
    public required SandboxStatus Status { get; set; }

    /// <summary>
    /// Gets or sets the custom metadata tags.
    /// </summary>
    [JsonPropertyName("metadata")]
    public IReadOnlyDictionary<string, string>? Metadata { get; set; }

    /// <summary>
    /// Gets or sets the sandbox expiration time.
    /// </summary>
    [JsonPropertyName("expiresAt")]
    public DateTime? ExpiresAt { get; set; }

    /// <summary>
    /// Gets or sets the sandbox creation time.
    /// </summary>
    [JsonPropertyName("createdAt")]
    public required DateTime CreatedAt { get; set; }

    /// <summary>
    /// Gets or sets the entrypoint command.
    /// </summary>
    [JsonPropertyName("entrypoint")]
    public required IReadOnlyList<string> Entrypoint { get; set; }
}

/// <summary>
/// Pagination information for list responses.
/// </summary>
public class PaginationInfo
{
    /// <summary>
    /// Gets or sets the current page number.
    /// </summary>
    [JsonPropertyName("page")]
    public int Page { get; set; }

    /// <summary>
    /// Gets or sets the page size.
    /// </summary>
    [JsonPropertyName("pageSize")]
    public int PageSize { get; set; }

    /// <summary>
    /// Gets or sets the total number of items.
    /// </summary>
    [JsonPropertyName("totalItems")]
    public int TotalItems { get; set; }

    /// <summary>
    /// Gets or sets the total number of pages.
    /// </summary>
    [JsonPropertyName("totalPages")]
    public int TotalPages { get; set; }

    /// <summary>
    /// Gets or sets whether there is a next page.
    /// </summary>
    [JsonPropertyName("hasNextPage")]
    public bool HasNextPage { get; set; }
}

/// <summary>
/// Response from listing sandboxes.
/// </summary>
public class ListSandboxesResponse
{
    /// <summary>
    /// Gets or sets the list of sandboxes.
    /// </summary>
    [JsonPropertyName("items")]
    public required IReadOnlyList<SandboxInfo> Items { get; set; }

    /// <summary>
    /// Gets or sets the pagination information.
    /// </summary>
    [JsonPropertyName("pagination")]
    public PaginationInfo? Pagination { get; set; }
}

/// <summary>
/// Parameters for listing sandboxes.
/// </summary>
public class ListSandboxesParams
{
    /// <summary>
    /// Gets or sets the states to filter by.
    /// </summary>
    public IReadOnlyList<string>? States { get; set; }

    /// <summary>
    /// Gets or sets the metadata to filter by.
    /// </summary>
    public IReadOnlyDictionary<string, string>? Metadata { get; set; }

    /// <summary>
    /// Gets or sets the page number.
    /// </summary>
    public int? Page { get; set; }

    /// <summary>
    /// Gets or sets the page size.
    /// </summary>
    public int? PageSize { get; set; }
}

/// <summary>
/// Request to renew sandbox expiration.
/// </summary>
public class RenewSandboxExpirationRequest
{
    /// <summary>
    /// Gets or sets the new expiration time as ISO 8601 string.
    /// </summary>
    [JsonPropertyName("expiresAt")]
    public required string ExpiresAt { get; set; }
}

/// <summary>
/// Response from renewing sandbox expiration.
/// </summary>
public class RenewSandboxExpirationResponse
{
    /// <summary>
    /// Gets or sets the updated expiration time.
    /// </summary>
    [JsonPropertyName("expiresAt")]
    public DateTime? ExpiresAt { get; set; }
}

/// <summary>
/// Endpoint information for a sandbox port.
/// </summary>
public class Endpoint
{
    /// <summary>
    /// Gets or sets the endpoint address (host:port or path).
    /// </summary>
    [JsonPropertyName("endpoint")]
    public required string EndpointAddress { get; set; }

    /// <summary>
    /// Gets or sets headers that must be included when calling this endpoint.
    /// </summary>
    [JsonPropertyName("headers")]
    public IReadOnlyDictionary<string, string> Headers { get; set; } = new Dictionary<string, string>();
}

/// <summary>
/// Known sandbox states.
/// </summary>
public static class SandboxStates
{
    /// <summary>
    /// Sandbox is being created.
    /// </summary>
    public const string Creating = "Creating";

    /// <summary>
    /// Sandbox is running.
    /// </summary>
    public const string Running = "Running";

    /// <summary>
    /// Sandbox is being paused.
    /// </summary>
    public const string Pausing = "Pausing";

    /// <summary>
    /// Sandbox is paused.
    /// </summary>
    public const string Paused = "Paused";

    /// <summary>
    /// Sandbox is being resumed.
    /// </summary>
    public const string Resuming = "Resuming";

    /// <summary>
    /// Sandbox is being deleted.
    /// </summary>
    public const string Deleting = "Deleting";

    /// <summary>
    /// Sandbox has been deleted.
    /// </summary>
    public const string Deleted = "Deleted";

    /// <summary>
    /// Sandbox is in an error state.
    /// </summary>
    public const string Error = "Error";
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/OpenSandbox.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <TargetFrameworks>netstandard2.0;netstandard2.1;net6.0;net7.0;net8.0;net9.0;net10.0</TargetFrameworks>
    <LangVersion>latest</LangVersion>
    <Nullable>enable</Nullable>
    <ImplicitUsings>enable</ImplicitUsings>
    <RootNamespace>OpenSandbox</RootNamespace>
    <AssemblyName>OpenSandbox</AssemblyName>

    <!-- Package Information -->
    <PackageId>Alibaba.OpenSandbox</PackageId>
    <Version>$(OpenSandboxPackageVersion)</Version>
    <Authors>Alibaba Group</Authors>
    <Company>Alibaba Group Holding Ltd.</Company>
    <Product>OpenSandbox SDK</Product>
    <Description>A C# SDK for low-level interaction with OpenSandbox. It provides the ability to create, manage, and interact with secure sandbox environments, including executing shell commands, managing files, and reading resource metrics.</Description>
    <Copyright>Copyright 2026 Alibaba Group Holding Ltd.</Copyright>
    <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
    <PackageProjectUrl>https://open-sandbox.ai</PackageProjectUrl>
    <RepositoryUrl>https://github.com/alibaba/OpenSandbox.git</RepositoryUrl>
    <RepositoryType>git</RepositoryType>
    <PackageTags>sandbox;container;docker;execution;opensandbox;alibaba</PackageTags>
    <PackageReadmeFile>README.md</PackageReadmeFile>

    <!-- Build Settings -->
    <GenerateDocumentationFile>true</GenerateDocumentationFile>
    <NoWarn>$(NoWarn);CS1591</NoWarn>
    <TreatWarningsAsErrors Condition="'$(Configuration)' == 'Release'">true</TreatWarningsAsErrors>
  </PropertyGroup>

  <!-- Expose internals to test project and code interpreter SDK -->
  <ItemGroup>
    <InternalsVisibleTo Include="OpenSandbox.Tests" />
    <InternalsVisibleTo Include="OpenSandbox.CodeInterpreter" />
    <InternalsVisibleTo Include="OpenSandbox.CodeInterpreter.Tests" />
  </ItemGroup>

  <!-- Common Dependencies -->
  <ItemGroup>
    <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.2" />
    <PackageReference Include="System.Text.Json" Version="8.0.5" />
    <PackageReference Include="PolySharp" Version="1.14.1">
      <PrivateAssets>all</PrivateAssets>
      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
    </PackageReference>
  </ItemGroup>

  <!-- .NET Standard 2.0 specific dependencies -->
  <ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">
    <PackageReference Include="Microsoft.Bcl.AsyncInterfaces" Version="8.0.0" />
  </ItemGroup>


  <!-- Package files -->
  <ItemGroup>
    <None Include="..\..\README.md" Pack="true" PackagePath="\" />
  </ItemGroup>

</Project>


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Options.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Config;
using OpenSandbox.Factory;
using OpenSandbox.Models;

namespace OpenSandbox;

/// <summary>
/// Options for creating a new sandbox.
/// </summary>
public class SandboxCreateOptions
{
    /// <summary>
    /// Gets or sets the connection configuration.
    /// </summary>
    public ConnectionConfig? ConnectionConfig { get; set; }

    /// <summary>
    /// Gets or sets diagnostics options such as logging.
    /// </summary>
    public SdkDiagnosticsOptions? Diagnostics { get; set; }

    /// <summary>
    /// Gets or sets the adapter factory for advanced customization.
    /// </summary>
    public IAdapterFactory? AdapterFactory { get; set; }

    /// <summary>
    /// Gets or sets the container image URI (e.g., "python:3.11").
    /// Can also be an ImageSpec object with authentication.
    /// </summary>
    public required string Image { get; set; }

    /// <summary>
    /// Gets or sets the image authentication credentials.
    /// </summary>
    public ImageAuth? ImageAuth { get; set; }

    /// <summary>
    /// Gets or sets the entrypoint command for the sandbox.
    /// Defaults to ["tail", "-f", "/dev/null"].
    /// </summary>
    public IReadOnlyList<string>? Entrypoint { get; set; }

    /// <summary>
    /// Gets or sets the environment variables to inject into the sandbox.
    /// </summary>
    public IReadOnlyDictionary<string, string>? Env { get; set; }

    /// <summary>
    /// Gets or sets the custom metadata tags.
    /// </summary>
    public IReadOnlyDictionary<string, string>? Metadata { get; set; }

    /// <summary>
    /// Gets or sets the network policy for the sandbox.
    /// </summary>
    public NetworkPolicy? NetworkPolicy { get; set; }

    /// <summary>
    /// Gets or sets storage volumes mounted into the sandbox.
    /// </summary>
    public IReadOnlyList<Volume>? Volumes { get; set; }

    /// <summary>
    /// Gets or sets the extension parameters.
    /// </summary>
    public IReadOnlyDictionary<string, string>? Extensions { get; set; }

    /// <summary>
    /// Gets or sets the resource limits.
    /// </summary>
    public IReadOnlyDictionary<string, string>? Resource { get; set; }

    /// <summary>
    /// Gets or sets the sandbox timeout in seconds.
    /// </summary>
    public int? TimeoutSeconds { get; set; }

    /// <summary>
    /// Gets or sets whether the sandbox should disable automatic expiration and require explicit cleanup.
    /// </summary>
    public bool ManualCleanup { get; set; }

    /// <summary>
    /// Gets or sets whether to skip health checks during creation.
    /// </summary>
    public bool SkipHealthCheck { get; set; }

    /// <summary>
    /// Gets or sets a custom health check function.
    /// </summary>
    public Func<Sandbox, Task<bool>>? HealthCheck { get; set; }

    /// <summary>
    /// Gets or sets the timeout for waiting until ready in seconds.
    /// </summary>
    public int? ReadyTimeoutSeconds { get; set; }

    /// <summary>
    /// Gets or sets the health check polling interval in milliseconds.
    /// </summary>
    public int? HealthCheckPollingInterval { get; set; }
}

/// <summary>
/// Options for connecting to an existing sandbox.
/// </summary>
public class SandboxConnectOptions
{
    /// <summary>
    /// Gets or sets the connection configuration.
    /// </summary>
    public ConnectionConfig? ConnectionConfig { get; set; }

    /// <summary>
    /// Gets or sets diagnostics options such as logging.
    /// </summary>
    public SdkDiagnosticsOptions? Diagnostics { get; set; }

    /// <summary>
    /// Gets or sets the adapter factory for advanced customization.
    /// </summary>
    public IAdapterFactory? AdapterFactory { get; set; }

    /// <summary>
    /// Gets or sets the ID of the sandbox to connect to.
    /// </summary>
    public required string SandboxId { get; set; }

    /// <summary>
    /// Gets or sets whether to skip health checks after connecting.
    /// </summary>
    public bool SkipHealthCheck { get; set; }

    /// <summary>
    /// Gets or sets a custom health check function.
    /// </summary>
    public Func<Sandbox, Task<bool>>? HealthCheck { get; set; }

    /// <summary>
    /// Gets or sets the timeout for waiting until ready in seconds.
    /// </summary>
    public int? ReadyTimeoutSeconds { get; set; }

    /// <summary>
    /// Gets or sets the health check polling interval in milliseconds.
    /// </summary>
    public int? HealthCheckPollingInterval { get; set; }
}

/// <summary>
/// Options for resuming a sandbox.
/// </summary>
public class SandboxResumeOptions
{
    /// <summary>
    /// Gets or sets whether to skip health checks after resuming.
    /// </summary>
    public bool SkipHealthCheck { get; set; }

    /// <summary>
    /// Gets or sets the timeout for waiting until ready in seconds.
    /// </summary>
    public int? ReadyTimeoutSeconds { get; set; }

    /// <summary>
    /// Gets or sets the health check polling interval in milliseconds.
    /// </summary>
    public int? HealthCheckPollingInterval { get; set; }
}

/// <summary>
/// Options for waiting until a sandbox is ready.
/// </summary>
public class WaitUntilReadyOptions
{
    /// <summary>
    /// Gets or sets the timeout in seconds.
    /// </summary>
    public int ReadyTimeoutSeconds { get; set; }

    /// <summary>
    /// Gets or sets the polling interval in milliseconds.
    /// </summary>
    public int PollingIntervalMillis { get; set; }

    /// <summary>
    /// Gets or sets a custom health check function.
    /// </summary>
    public Func<Sandbox, Task<bool>>? HealthCheck { get; set; }
}

/// <summary>
/// Options for creating a sandbox manager.
/// </summary>
public class SandboxManagerOptions
{
    /// <summary>
    /// Gets or sets the connection configuration.
    /// </summary>
    public ConnectionConfig? ConnectionConfig { get; set; }

    /// <summary>
    /// Gets or sets diagnostics options such as logging.
    /// </summary>
    public SdkDiagnosticsOptions? Diagnostics { get; set; }

    /// <summary>
    /// Gets or sets the adapter factory for advanced customization.
    /// </summary>
    public IAdapterFactory? AdapterFactory { get; set; }
}

/// <summary>
/// Filter options for listing sandboxes.
/// </summary>
public class SandboxFilter
{
    /// <summary>
    /// Gets or sets the states to filter by.
    /// </summary>
    public IReadOnlyList<string>? States { get; set; }

    /// <summary>
    /// Gets or sets the metadata to filter by.
    /// </summary>
    public IReadOnlyDictionary<string, string>? Metadata { get; set; }

    /// <summary>
    /// Gets or sets the page number (1-indexed).
    /// </summary>
    public int? Page { get; set; }

    /// <summary>
    /// Gets or sets the page size.
    /// </summary>
    public int? PageSize { get; set; }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Sandbox.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Config;
using OpenSandbox.Core;
using OpenSandbox.Factory;
using OpenSandbox.Models;
using OpenSandbox.Services;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;

namespace OpenSandbox;

/// <summary>
/// Main entry point for interacting with a sandbox.
/// </summary>
/// <remarks>
/// <see cref="DisposeAsync"/> releases local SDK resources (HTTP clients and adapters) only.
/// To terminate the remote sandbox instance, call <see cref="KillAsync"/>.
/// </remarks>
public sealed class Sandbox : IAsyncDisposable
{
    /// <summary>
    /// Gets the sandbox ID.
    /// </summary>
    public string Id { get; }

    /// <summary>
    /// Gets the connection configuration.
    /// </summary>
    public ConnectionConfig ConnectionConfig { get; }

    /// <summary>
    /// Gets the command execution service.
    /// </summary>
    public IExecdCommands Commands { get; }

    /// <summary>
    /// Gets the filesystem service.
    /// </summary>
    public ISandboxFiles Files { get; }

    /// <summary>
    /// Gets the health check service.
    /// </summary>
    public IExecdHealth Health { get; }

    /// <summary>
    /// Gets the metrics service.
    /// </summary>
    public IExecdMetrics Metrics { get; }

    private readonly IEgress _egress;

    private readonly ISandboxes _sandboxes;
    private readonly IAdapterFactory _adapterFactory;
    private readonly string _lifecycleBaseUrl;
    private readonly string _execdBaseUrl;
    private readonly HttpClientProvider _httpClientProvider;
    private readonly ILoggerFactory _loggerFactory;
    private readonly ILogger _logger;
    private bool _disposed;

    internal HttpClientProvider SharedHttpClientProvider => _httpClientProvider;
    internal ILoggerFactory SharedLoggerFactory => _loggerFactory;

    private Sandbox(
        string id,
        ConnectionConfig connectionConfig,
        IAdapterFactory adapterFactory,
        string lifecycleBaseUrl,
        string execdBaseUrl,
        ILoggerFactory loggerFactory,
        HttpClientProvider httpClientProvider,
        ISandboxes sandboxes,
        IExecdCommands commands,
        ISandboxFiles files,
        IExecdHealth health,
        IExecdMetrics metrics,
        IEgress egress)
    {
        Id = id;
        ConnectionConfig = connectionConfig;
        _adapterFactory = adapterFactory;
        _lifecycleBaseUrl = lifecycleBaseUrl;
        _execdBaseUrl = execdBaseUrl;
        _loggerFactory = loggerFactory ?? NullLoggerFactory.Instance;
        _httpClientProvider = httpClientProvider;
        _logger = _loggerFactory.CreateLogger("OpenSandbox.Sandbox");
        _sandboxes = sandboxes;
        Commands = commands;
        Files = files;
        Health = health;
        Metrics = metrics;
        _egress = egress;
    }

    /// <summary>
    /// Creates a new sandbox.
    /// </summary>
    /// <param name="options">The creation options.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The created sandbox.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when request options are invalid.</exception>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    /// <exception cref="SandboxReadyTimeoutException">Thrown when readiness checks exceed timeout.</exception>
    /// <exception cref="SandboxException">Thrown when sandbox creation fails.</exception>
    public static async Task<Sandbox> CreateAsync(
        SandboxCreateOptions options,
        CancellationToken cancellationToken = default)
    {
        var connectionConfig = options.ConnectionConfig ?? new ConnectionConfig();
        var loggerFactory = options.Diagnostics?.LoggerFactory ?? NullLoggerFactory.Instance;
        var logger = loggerFactory.CreateLogger("OpenSandbox.Sandbox");
        var lifecycleBaseUrl = connectionConfig.GetBaseUrl();
        var adapterFactory = options.AdapterFactory ?? DefaultAdapterFactory.Create();
        var httpClientProvider = new HttpClientProvider(connectionConfig, loggerFactory);

        ISandboxes sandboxes;
        logger.LogInformation("Creating sandbox (image={Image}, useServerProxy={UseServerProxy})", options.Image, connectionConfig.UseServerProxy);
        try
        {
            var lifecycleStack = adapterFactory.CreateLifecycleStack(new CreateLifecycleStackOptions
            {
                ConnectionConfig = connectionConfig,
                LifecycleBaseUrl = lifecycleBaseUrl,
                HttpClientProvider = httpClientProvider,
                LoggerFactory = loggerFactory
            });
            sandboxes = lifecycleStack.Sandboxes;
        }
        catch
        {
            logger.LogError("Failed to initialize lifecycle adapters while creating sandbox");
            httpClientProvider.Dispose();
            throw;
        }

        var request = new CreateSandboxRequest
        {
            Image = new ImageSpec
            {
                Uri = options.Image,
                Auth = options.ImageAuth
            },
            Entrypoint = options.Entrypoint ?? Constants.DefaultEntrypoint,
            Timeout = options.ManualCleanup ? null : options.TimeoutSeconds ?? Constants.DefaultTimeoutSeconds,
            ResourceLimits = options.Resource ?? Constants.DefaultResourceLimits,
            Env = options.Env,
            Metadata = options.Metadata,
            NetworkPolicy = options.NetworkPolicy != null
                ? new NetworkPolicy
                {
                    DefaultAction = options.NetworkPolicy.DefaultAction ?? NetworkRuleAction.Deny,
                    Egress = options.NetworkPolicy.Egress
                }
                : null,
            Volumes = options.Volumes,
            Extensions = options.Extensions?.ToDictionary(kv => kv.Key, kv => (object)kv.Value)
        };

        string? sandboxId = null;
        try
        {
            var created = await sandboxes.CreateSandboxAsync(request, cancellationToken).ConfigureAwait(false);
            sandboxId = created.Id;
            logger.LogInformation("Sandbox created: {SandboxId}", sandboxId);

            var endpoint = await sandboxes.GetSandboxEndpointAsync(
                sandboxId,
                Constants.DefaultExecdPort,
                connectionConfig.UseServerProxy,
                cancellationToken).ConfigureAwait(false);
            var protocol = connectionConfig.Protocol == ConnectionProtocol.Https ? "https" : "http";
            var execdBaseUrl = $"{protocol}://{endpoint.EndpointAddress}";
            var execdHeaders = MergeHeaders(connectionConfig.Headers, endpoint.Headers);
            var egressEndpoint = await sandboxes.GetSandboxEndpointAsync(
                sandboxId,
                Constants.DefaultEgressPort,
                connectionConfig.UseServerProxy,
                cancellationToken).ConfigureAwait(false);
            var egressBaseUrl = $"{protocol}://{egressEndpoint.EndpointAddress}";
            var egressHeaders = MergeHeaders(connectionConfig.Headers, egressEndpoint.Headers);

            var execdStack = adapterFactory.CreateExecdStack(new CreateExecdStackOptions
            {
                ConnectionConfig = connectionConfig,
                ExecdBaseUrl = execdBaseUrl,
                ExecdHeaders = execdHeaders,
                HttpClientProvider = httpClientProvider,
                LoggerFactory = loggerFactory
            });
            var egressStack = adapterFactory.CreateEgressStack(new CreateEgressStackOptions
            {
                ConnectionConfig = connectionConfig,
                EgressBaseUrl = egressBaseUrl,
                EgressHeaders = egressHeaders,
                HttpClientProvider = httpClientProvider,
                LoggerFactory = loggerFactory
            });

            var sandbox = new Sandbox(
                sandboxId,
                connectionConfig,
                adapterFactory,
                lifecycleBaseUrl,
                execdBaseUrl,
                loggerFactory,
                httpClientProvider,
                sandboxes,
                execdStack.Commands,
                execdStack.Files,
                execdStack.Health,
                execdStack.Metrics,
                egressStack.Egress);

            if (!options.SkipHealthCheck)
            {
                logger.LogDebug("Waiting for sandbox readiness: {SandboxId}", sandboxId);
                await sandbox.WaitUntilReadyAsync(new WaitUntilReadyOptions
                {
                    ReadyTimeoutSeconds = options.ReadyTimeoutSeconds ?? Constants.DefaultReadyTimeoutSeconds,
                    PollingIntervalMillis = options.HealthCheckPollingInterval ?? Constants.DefaultHealthCheckPollingIntervalMillis,
                    HealthCheck = options.HealthCheck
                }, cancellationToken).ConfigureAwait(false);
            }

            return sandbox;
        }
        catch (Exception ex)
        {
            if (sandboxId != null)
            {
                try
                {
                    await sandboxes.DeleteSandboxAsync(sandboxId, CancellationToken.None).ConfigureAwait(false);
                }
                catch
                {
                    // Ignore cleanup failure; surface original error
                }
            }

            httpClientProvider.Dispose();
            logger.LogError(ex, "Sandbox create flow failed");
            throw;
        }
    }

    /// <summary>
    /// Connects to an existing sandbox.
    /// </summary>
    /// <param name="options">The connection options.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The connected sandbox.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when request options are invalid.</exception>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    /// <exception cref="SandboxReadyTimeoutException">Thrown when readiness checks exceed timeout.</exception>
    /// <exception cref="SandboxException">Thrown when sandbox connection fails.</exception>
    public static async Task<Sandbox> ConnectAsync(
        SandboxConnectOptions options,
        CancellationToken cancellationToken = default)
    {
        var connectionConfig = options.ConnectionConfig ?? new ConnectionConfig();
        var loggerFactory = options.Diagnostics?.LoggerFactory ?? NullLoggerFactory.Instance;
        var logger = loggerFactory.CreateLogger("OpenSandbox.Sandbox");
        var lifecycleBaseUrl = connectionConfig.GetBaseUrl();
        var adapterFactory = options.AdapterFactory ?? DefaultAdapterFactory.Create();
        var httpClientProvider = new HttpClientProvider(connectionConfig, loggerFactory);
        logger.LogInformation("Connecting to sandbox: {SandboxId}", options.SandboxId);

        ISandboxes sandboxes;
        try
        {
            var lifecycleStack = adapterFactory.CreateLifecycleStack(new CreateLifecycleStackOptions
            {
                ConnectionConfig = connectionConfig,
                LifecycleBaseUrl = lifecycleBaseUrl,
                HttpClientProvider = httpClientProvider,
                LoggerFactory = loggerFactory
            });
            sandboxes = lifecycleStack.Sandboxes;
        }
        catch (Exception ex)
        {
            logger.LogError(ex, "Failed to initialize lifecycle adapters while connecting sandbox");
            httpClientProvider.Dispose();
            throw;
        }

        try
        {
            var endpoint = await sandboxes.GetSandboxEndpointAsync(
                options.SandboxId,
                Constants.DefaultExecdPort,
                connectionConfig.UseServerProxy,
                cancellationToken).ConfigureAwait(false);
            var protocol = connectionConfig.Protocol == ConnectionProtocol.Https ? "https" : "http";
            var execdBaseUrl = $"{protocol}://{endpoint.EndpointAddress}";
            var execdHeaders = MergeHeaders(connectionConfig.Headers, endpoint.Headers);
            var egressEndpoint = await sandboxes.GetSandboxEndpointAsync(
                options.SandboxId,
                Constants.DefaultEgressPort,
                connectionConfig.UseServerProxy,
                cancellationToken).ConfigureAwait(false);
            var egressBaseUrl = $"{protocol}://{egressEndpoint.EndpointAddress}";
            var egressHeaders = MergeHeaders(connectionConfig.Headers, egressEndpoint.Headers);

            var execdStack = adapterFactory.CreateExecdStack(new CreateExecdStackOptions
            {
                ConnectionConfig = connectionConfig,
                ExecdBaseUrl = execdBaseUrl,
                ExecdHeaders = execdHeaders,
                HttpClientProvider = httpClientProvider,
                LoggerFactory = loggerFactory
            });
            var egressStack = adapterFactory.CreateEgressStack(new CreateEgressStackOptions
            {
                ConnectionConfig = connectionConfig,
                EgressBaseUrl = egressBaseUrl,
                EgressHeaders = egressHeaders,
                HttpClientProvider = httpClientProvider,
                LoggerFactory = loggerFactory
            });

            var sandbox = new Sandbox(
                options.SandboxId,
                connectionConfig,
                adapterFactory,
                lifecycleBaseUrl,
                execdBaseUrl,
                loggerFactory,
                httpClientProvider,
                sandboxes,
                execdStack.Commands,
                execdStack.Files,
                execdStack.Health,
                execdStack.Metrics,
                egressStack.Egress);

            if (!options.SkipHealthCheck)
            {
                await sandbox.WaitUntilReadyAsync(new WaitUntilReadyOptions
                {
                    ReadyTimeoutSeconds = options.ReadyTimeoutSeconds ?? Constants.DefaultReadyTimeoutSeconds,
                    PollingIntervalMillis = options.HealthCheckPollingInterval ?? Constants.DefaultHealthCheckPollingIntervalMillis,
                    HealthCheck = options.HealthCheck
                }, cancellationToken).ConfigureAwait(false);
            }

            return sandbox;
        }
        catch (Exception ex)
        {
            logger.LogError(ex, "Sandbox connect flow failed: {SandboxId}", options.SandboxId);
            httpClientProvider.Dispose();
            throw;
        }
    }

    /// <summary>
    /// Resumes a paused sandbox by ID.
    /// </summary>
    /// <param name="options">The connection options.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The resumed sandbox.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when request options are invalid.</exception>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    /// <exception cref="SandboxReadyTimeoutException">Thrown when readiness checks exceed timeout.</exception>
    /// <exception cref="SandboxException">Thrown when sandbox resume fails.</exception>
    public static async Task<Sandbox> ResumeAsync(
        SandboxConnectOptions options,
        CancellationToken cancellationToken = default)
    {
        var connectionConfig = options.ConnectionConfig ?? new ConnectionConfig();
        var loggerFactory = options.Diagnostics?.LoggerFactory ?? NullLoggerFactory.Instance;
        var logger = loggerFactory.CreateLogger("OpenSandbox.Sandbox");
        var lifecycleBaseUrl = connectionConfig.GetBaseUrl();
        var adapterFactory = options.AdapterFactory ?? DefaultAdapterFactory.Create();
        var httpClientProvider = new HttpClientProvider(connectionConfig, loggerFactory);
        logger.LogInformation("Resuming sandbox: {SandboxId}", options.SandboxId);

        try
        {
            var lifecycleStack = adapterFactory.CreateLifecycleStack(new CreateLifecycleStackOptions
            {
                ConnectionConfig = connectionConfig,
                LifecycleBaseUrl = lifecycleBaseUrl,
                HttpClientProvider = httpClientProvider,
                LoggerFactory = loggerFactory
            });

            await lifecycleStack.Sandboxes.ResumeSandboxAsync(options.SandboxId, cancellationToken).ConfigureAwait(false);
            return await ConnectAsync(options, cancellationToken).ConfigureAwait(false);
        }
        finally
        {
            httpClientProvider.Dispose();
        }
    }

    /// <summary>
    /// Gets information about this sandbox.
    /// </summary>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The sandbox information.</returns>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public Task<SandboxInfo> GetInfoAsync(CancellationToken cancellationToken = default)
    {
        return _sandboxes.GetSandboxAsync(Id, cancellationToken);
    }

    /// <summary>
    /// Checks if the sandbox is healthy.
    /// </summary>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>True if healthy, false otherwise.</returns>
    public async Task<bool> IsHealthyAsync(CancellationToken cancellationToken = default)
    {
        try
        {
            return await Health.PingAsync(cancellationToken).ConfigureAwait(false);
        }
        catch (Exception ex)
        {
            _logger.LogDebug(ex, "Health check failed for sandbox {SandboxId}", Id);
            return false;
        }
    }

    /// <summary>
    /// Gets the current resource metrics.
    /// </summary>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The sandbox metrics.</returns>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public Task<SandboxMetrics> GetMetricsAsync(CancellationToken cancellationToken = default)
    {
        return Metrics.GetMetricsAsync(cancellationToken);
    }

    /// <summary>
    /// Pauses the sandbox.
    /// </summary>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public Task PauseAsync(CancellationToken cancellationToken = default)
    {
        return _sandboxes.PauseSandboxAsync(Id, cancellationToken);
    }

    /// <summary>
    /// Resumes this paused sandbox and returns a fresh, connected instance.
    /// </summary>
    /// <param name="options">Optional resume options.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>A new sandbox instance with refreshed connections.</returns>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    /// <exception cref="SandboxReadyTimeoutException">Thrown when readiness checks exceed timeout.</exception>
    /// <exception cref="SandboxException">Thrown when sandbox resume fails.</exception>
    public async Task<Sandbox> ResumeAsync(
        SandboxResumeOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        await _sandboxes.ResumeSandboxAsync(Id, cancellationToken).ConfigureAwait(false);

        return await ConnectAsync(new SandboxConnectOptions
        {
            SandboxId = Id,
            ConnectionConfig = ConnectionConfig,
            Diagnostics = new SdkDiagnosticsOptions
            {
                LoggerFactory = _loggerFactory
            },
            AdapterFactory = _adapterFactory,
            SkipHealthCheck = options?.SkipHealthCheck ?? false,
            ReadyTimeoutSeconds = options?.ReadyTimeoutSeconds,
            HealthCheckPollingInterval = options?.HealthCheckPollingInterval
        }, cancellationToken).ConfigureAwait(false);
    }

    /// <summary>
    /// Terminates the sandbox.
    /// </summary>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public Task KillAsync(CancellationToken cancellationToken = default)
    {
        return _sandboxes.DeleteSandboxAsync(Id, cancellationToken);
    }

    /// <summary>
    /// Renews the sandbox expiration time.
    /// </summary>
    /// <param name="timeoutSeconds">The new timeout in seconds from now.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The renewal response.</returns>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public Task<RenewSandboxExpirationResponse> RenewAsync(
        int timeoutSeconds,
        CancellationToken cancellationToken = default)
    {
        var expiresAt = DateTime.UtcNow.AddSeconds(timeoutSeconds).ToString("O");
        return _sandboxes.RenewSandboxExpirationAsync(Id, new RenewSandboxExpirationRequest
        {
            ExpiresAt = expiresAt
        }, cancellationToken);
    }

    /// <summary>
    /// Gets current egress policy for this sandbox.
    /// </summary>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The current egress policy.</returns>
    public async Task<NetworkPolicy> GetEgressPolicyAsync(CancellationToken cancellationToken = default)
    {
        return await _egress.GetPolicyAsync(cancellationToken).ConfigureAwait(false);
    }

    /// <summary>
    /// Patches egress rules for this sandbox using sidecar merge semantics.
    ///
    /// Incoming rules take priority over existing rules with the same target.
    /// Existing rules for other targets remain unchanged. Within one patch payload,
    /// the first rule for a target wins. The current defaultAction is preserved.
    /// </summary>
    /// <param name="rules">Patch egress rules payload.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    public async Task PatchEgressRulesAsync(
        IReadOnlyList<NetworkRule> rules,
        CancellationToken cancellationToken = default)
    {
        await _egress.PatchRulesAsync(rules, cancellationToken).ConfigureAwait(false);
    }

    /// <summary>
    /// Gets the endpoint for a port.
    /// </summary>
    /// <param name="port">The port number.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The endpoint information.</returns>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public Task<Endpoint> GetEndpointAsync(int port, CancellationToken cancellationToken = default)
    {
        return _sandboxes.GetSandboxEndpointAsync(Id, port, ConnectionConfig.UseServerProxy, cancellationToken);
    }

    /// <summary>
    /// Gets the endpoint URL for a port.
    /// </summary>
    /// <param name="port">The port number.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The endpoint URL.</returns>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public async Task<string> GetEndpointUrlAsync(int port, CancellationToken cancellationToken = default)
    {
        var endpoint = await GetEndpointAsync(port, cancellationToken).ConfigureAwait(false);
        var protocol = ConnectionConfig.Protocol == ConnectionProtocol.Https ? "https" : "http";
        return $"{protocol}://{endpoint.EndpointAddress}";
    }

    /// <summary>
    /// Waits until the sandbox is ready.
    /// </summary>
    /// <param name="options">The wait options.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="SandboxReadyTimeoutException">Thrown when readiness checks exceed timeout.</exception>
    /// <exception cref="OperationCanceledException">Thrown when <paramref name="cancellationToken"/> is canceled.</exception>
    public async Task WaitUntilReadyAsync(
        WaitUntilReadyOptions options,
        CancellationToken cancellationToken = default)
    {
        _logger.LogDebug("Start readiness check for sandbox {SandboxId} (timeoutSeconds={TimeoutSeconds})", Id, options.ReadyTimeoutSeconds);
        var deadline = DateTime.UtcNow.AddSeconds(options.ReadyTimeoutSeconds);
        var attempt = 0;
        var errorDetail = "Health check returned false continuously.";

        while (true)
        {
            cancellationToken.ThrowIfCancellationRequested();

            if (DateTime.UtcNow > deadline)
            {
                var context = $"domain={ConnectionConfig.Domain}, useServerProxy={ConnectionConfig.UseServerProxy}";
                var suggestion = "If this sandbox runs in Docker bridge or remote-network mode, consider enabling useServerProxy=true.";
                if (!ConnectionConfig.UseServerProxy)
                {
                    suggestion += " You can also configure server-side [docker].host_ip for direct endpoint access.";
                }
                throw new SandboxReadyTimeoutException(
                    $"Sandbox health check timed out after {options.ReadyTimeoutSeconds}s ({attempt} attempts). {errorDetail} Connection context: {context}. {suggestion}");
            }
            attempt++;

            try
            {
                bool isReady;
                if (options.HealthCheck != null)
                {
                    isReady = await options.HealthCheck(this).ConfigureAwait(false);
                }
                else
                {
                    isReady = await Health.PingAsync(cancellationToken).ConfigureAwait(false);
                }

                if (isReady)
                {
                    _logger.LogInformation("Sandbox is ready: {SandboxId}", Id);
                    return;
                }

                errorDetail = "Health check returned false continuously.";
            }
            catch (Exception ex)
            {
                _logger.LogDebug(ex, "Readiness probe failed for sandbox {SandboxId}", Id);
                errorDetail = $"Last health check error: {ex.Message}";
            }

            await Task.Delay(options.PollingIntervalMillis, cancellationToken).ConfigureAwait(false);
        }
    }

    /// <summary>
    /// Releases resources used by this sandbox instance.
    /// </summary>
    public ValueTask DisposeAsync()
    {
        if (_disposed)
        {
            return default;
        }

        _disposed = true;
        _logger.LogDebug("Disposing sandbox resources: {SandboxId}", Id);
        _httpClientProvider.Dispose();
        return default;
    }

    internal static IReadOnlyDictionary<string, string> MergeHeaders(
        IReadOnlyDictionary<string, string> baseHeaders,
        IReadOnlyDictionary<string, string>? overrideHeaders)
    {
        var merged = baseHeaders.ToDictionary(header => header.Key, header => header.Value);
        if (overrideHeaders != null)
        {
            foreach (var header in overrideHeaders)
            {
                merged[header.Key] = header.Value;
            }
        }

        return merged;
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/SandboxManager.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Config;
using OpenSandbox.Factory;
using OpenSandbox.Models;
using OpenSandbox.Services;
using OpenSandbox.Core;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;

namespace OpenSandbox;

/// <summary>
/// Administrative interface for managing sandboxes.
/// </summary>
/// <remarks>
/// This type is intended for administrative lifecycle operations (list, inspect, pause, resume, kill, renew).
/// Dispose the manager when finished to release local SDK resources.
/// </remarks>
public sealed class SandboxManager : IAsyncDisposable
{
    private readonly ISandboxes _sandboxes;
    private readonly ConnectionConfig _connectionConfig;
    private readonly HttpClientProvider _httpClientProvider;
    private readonly ILogger _logger;
    private bool _disposed;

    private SandboxManager(
        ISandboxes sandboxes,
        ConnectionConfig connectionConfig,
        HttpClientProvider httpClientProvider,
        ILoggerFactory loggerFactory)
    {
        _sandboxes = sandboxes;
        _connectionConfig = connectionConfig;
        _httpClientProvider = httpClientProvider;
        _logger = (loggerFactory ?? NullLoggerFactory.Instance).CreateLogger("OpenSandbox.SandboxManager");
    }

    /// <summary>
    /// Creates a new sandbox manager.
    /// </summary>
    /// <param name="options">Optional configuration options.</param>
    /// <returns>A new sandbox manager instance.</returns>
    /// <exception cref="SandboxException">Thrown when manager initialization fails.</exception>
    public static SandboxManager Create(SandboxManagerOptions? options = null)
    {
        var connectionConfig = options?.ConnectionConfig ?? new ConnectionConfig();
        var lifecycleBaseUrl = connectionConfig.GetBaseUrl();
        var adapterFactory = options?.AdapterFactory ?? DefaultAdapterFactory.Create();
        var loggerFactory = options?.Diagnostics?.LoggerFactory ?? NullLoggerFactory.Instance;
        var httpClientProvider = new HttpClientProvider(connectionConfig, loggerFactory);
        var logger = loggerFactory.CreateLogger("OpenSandbox.SandboxManager");
        logger.LogInformation("Creating sandbox manager");

        try
        {
            var lifecycleStack = adapterFactory.CreateLifecycleStack(new CreateLifecycleStackOptions
            {
                ConnectionConfig = connectionConfig,
                LifecycleBaseUrl = lifecycleBaseUrl,
                HttpClientProvider = httpClientProvider,
                LoggerFactory = loggerFactory
            });

            return new SandboxManager(lifecycleStack.Sandboxes, connectionConfig, httpClientProvider, loggerFactory);
        }
        catch (Exception ex)
        {
            logger.LogError(ex, "Failed to create sandbox manager");
            httpClientProvider.Dispose();
            throw;
        }
    }

    /// <summary>
    /// Lists sandboxes with optional filtering.
    /// </summary>
    /// <param name="filter">Optional filter criteria.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The list of sandboxes.</returns>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public Task<ListSandboxesResponse> ListSandboxInfosAsync(
        SandboxFilter? filter = null,
        CancellationToken cancellationToken = default)
    {
        return _sandboxes.ListSandboxesAsync(new ListSandboxesParams
        {
            States = filter?.States,
            Metadata = filter?.Metadata,
            Page = filter?.Page,
            PageSize = filter?.PageSize
        }, cancellationToken);
    }

    /// <summary>
    /// Gets information about a specific sandbox.
    /// </summary>
    /// <param name="sandboxId">The sandbox ID.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The sandbox information.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="sandboxId"/> is null or empty.</exception>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public Task<SandboxInfo> GetSandboxInfoAsync(
        string sandboxId,
        CancellationToken cancellationToken = default)
    {
        _logger.LogDebug("Fetching sandbox info: {SandboxId}", sandboxId);
        return _sandboxes.GetSandboxAsync(sandboxId, cancellationToken);
    }

    /// <summary>
    /// Terminates a sandbox.
    /// </summary>
    /// <param name="sandboxId">The sandbox ID.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="sandboxId"/> is null or empty.</exception>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public Task KillSandboxAsync(
        string sandboxId,
        CancellationToken cancellationToken = default)
    {
        _logger.LogInformation("Killing sandbox: {SandboxId}", sandboxId);
        return _sandboxes.DeleteSandboxAsync(sandboxId, cancellationToken);
    }

    /// <summary>
    /// Pauses a sandbox.
    /// </summary>
    /// <param name="sandboxId">The sandbox ID.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="sandboxId"/> is null or empty.</exception>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public Task PauseSandboxAsync(
        string sandboxId,
        CancellationToken cancellationToken = default)
    {
        _logger.LogInformation("Pausing sandbox: {SandboxId}", sandboxId);
        return _sandboxes.PauseSandboxAsync(sandboxId, cancellationToken);
    }

    /// <summary>
    /// Resumes a paused sandbox.
    /// </summary>
    /// <param name="sandboxId">The sandbox ID.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="sandboxId"/> is null or empty.</exception>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public Task ResumeSandboxAsync(
        string sandboxId,
        CancellationToken cancellationToken = default)
    {
        _logger.LogInformation("Resuming sandbox: {SandboxId}", sandboxId);
        return _sandboxes.ResumeSandboxAsync(sandboxId, cancellationToken);
    }

    /// <summary>
    /// Renews the expiration time of a sandbox.
    /// </summary>
    /// <param name="sandboxId">The sandbox ID.</param>
    /// <param name="timeoutSeconds">The new timeout in seconds from now.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when arguments are invalid.</exception>
    /// <exception cref="SandboxApiException">Thrown when the sandbox API returns an error.</exception>
    public async Task RenewSandboxAsync(
        string sandboxId,
        int timeoutSeconds,
        CancellationToken cancellationToken = default)
    {
        _logger.LogInformation("Renewing sandbox expiration: {SandboxId} (timeoutSeconds={TimeoutSeconds})", sandboxId, timeoutSeconds);
        var expiresAt = DateTime.UtcNow.AddSeconds(timeoutSeconds).ToString("O");
        await _sandboxes.RenewSandboxExpirationAsync(sandboxId, new RenewSandboxExpirationRequest
        {
            ExpiresAt = expiresAt
        }, cancellationToken).ConfigureAwait(false);
    }

    /// <summary>
    /// Releases resources used by this manager.
    /// </summary>
    public ValueTask DisposeAsync()
    {
        if (_disposed)
        {
            return default;
        }

        _disposed = true;
        _logger.LogDebug("Disposing sandbox manager resources");
        _httpClientProvider.Dispose();
        return default;
    }
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Services/IEgress.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Models;

namespace OpenSandbox.Services;

/// <summary>
/// Service interface for direct egress sidecar operations.
/// </summary>
public interface IEgress
{
    Task<NetworkPolicy> GetPolicyAsync(CancellationToken cancellationToken = default);

    Task PatchRulesAsync(
        IReadOnlyList<NetworkRule> rules,
        CancellationToken cancellationToken = default);
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Services/IExecdCommands.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Models;
using OpenSandbox.Core;

namespace OpenSandbox.Services;

/// <summary>
/// Service interface for executing commands in a sandbox.
/// </summary>
public interface IExecdCommands
{
    /// <summary>
    /// Runs a command and streams server events (SSE).
    /// This is the lowest-level API for command execution.
    /// </summary>
    /// <param name="command">The command to run.</param>
    /// <param name="options">Optional command options.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>An async enumerable of server stream events.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    IAsyncEnumerable<ServerStreamEvent> RunStreamAsync(
        string command,
        RunCommandOptions? options = null,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Runs a command, consumes the stream, and builds a structured execution result.
    /// </summary>
    /// <param name="command">The command to run.</param>
    /// <param name="options">Optional command options.</param>
    /// <param name="handlers">Optional event handlers for real-time processing.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The command execution result.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task<Execution> RunAsync(
        string command,
        RunCommandOptions? options = null,
        ExecutionHandlers? handlers = null,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Interrupts the current execution in the given session.
    /// </summary>
    /// <param name="sessionId">The session ID to interrupt.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="sessionId"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task InterruptAsync(
        string sessionId,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Gets the current running status of a command.
    /// </summary>
    /// <param name="executionId">The command execution ID.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The command status.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="executionId"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task<CommandStatus> GetCommandStatusAsync(
        string executionId,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Gets logs for a background command.
    /// </summary>
    /// <param name="executionId">The command execution ID.</param>
    /// <param name="cursor">Optional line cursor for incremental reads.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>Background command logs and latest cursor.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="executionId"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task<CommandLogs> GetBackgroundCommandLogsAsync(
        string executionId,
        long? cursor = null,
        CancellationToken cancellationToken = default);
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Services/IExecdHealth.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Core;

namespace OpenSandbox.Services;

/// <summary>
/// Service interface for health checks on the execd service.
/// </summary>
public interface IExecdHealth
{
    /// <summary>
    /// Pings the execd service to check if it's healthy.
    /// </summary>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>True if the service is healthy, false otherwise.</returns>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task<bool> PingAsync(CancellationToken cancellationToken = default);
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Services/IExecdMetrics.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Models;
using OpenSandbox.Core;

namespace OpenSandbox.Services;

/// <summary>
/// Service interface for retrieving metrics from the execd service.
/// </summary>
public interface IExecdMetrics
{
    /// <summary>
    /// Gets the current resource metrics from the sandbox.
    /// </summary>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The sandbox metrics.</returns>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task<SandboxMetrics> GetMetricsAsync(CancellationToken cancellationToken = default);
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Services/ISandboxFiles.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Models;
using OpenSandbox.Core;

namespace OpenSandbox.Services;

/// <summary>
/// Service interface for filesystem operations in a sandbox.
/// </summary>
public interface ISandboxFiles
{
    /// <summary>
    /// Gets information about files at the specified paths.
    /// </summary>
    /// <param name="paths">The file paths to query.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>A dictionary mapping paths to file information.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task<IReadOnlyDictionary<string, SandboxFileInfo>> GetFileInfoAsync(
        IEnumerable<string> paths,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Searches for files matching the specified criteria.
    /// </summary>
    /// <param name="entry">The search entry with path and pattern.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>A list of matching files.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task<IReadOnlyList<SandboxFileInfo>> SearchAsync(
        SearchEntry entry,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Creates directories at the specified paths.
    /// </summary>
    /// <param name="entries">The directory entries to create.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task CreateDirectoriesAsync(
        IEnumerable<CreateDirectoryEntry> entries,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Deletes directories at the specified paths.
    /// </summary>
    /// <param name="paths">The directory paths to delete.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task DeleteDirectoriesAsync(
        IEnumerable<string> paths,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Writes files to the sandbox.
    /// </summary>
    /// <param name="entries">The file entries to write.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task WriteFilesAsync(
        IEnumerable<WriteEntry> entries,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Reads a file as text.
    /// </summary>
    /// <param name="path">The file path.</param>
    /// <param name="options">Optional read options.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The file content as a string.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task<string> ReadFileAsync(
        string path,
        ReadFileOptions? options = null,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Reads a file as bytes.
    /// </summary>
    /// <param name="path">The file path.</param>
    /// <param name="options">Optional read options.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The file content as a byte array.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task<byte[]> ReadBytesAsync(
        string path,
        ReadBytesOptions? options = null,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Reads a file as a stream of byte chunks.
    /// </summary>
    /// <param name="path">The file path.</param>
    /// <param name="options">Optional read options.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>An async enumerable of byte chunks.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    IAsyncEnumerable<byte[]> ReadBytesStreamAsync(
        string path,
        ReadBytesOptions? options = null,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Deletes files at the specified paths.
    /// </summary>
    /// <param name="paths">The file paths to delete.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task DeleteFilesAsync(
        IEnumerable<string> paths,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Moves/renames files.
    /// </summary>
    /// <param name="entries">The move entries with source and destination paths.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task MoveFilesAsync(
        IEnumerable<MoveEntry> entries,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Replaces content in files.
    /// </summary>
    /// <param name="entries">The content replace entries.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task ReplaceContentsAsync(
        IEnumerable<ContentReplaceEntry> entries,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Sets permissions on files.
    /// </summary>
    /// <param name="entries">The permission entries.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the execd service request fails.</exception>
    Task SetPermissionsAsync(
        IEnumerable<SetPermissionEntry> entries,
        CancellationToken cancellationToken = default);
}


================================================
FILE: sdks/sandbox/csharp/src/OpenSandbox/Services/ISandboxes.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Models;
using OpenSandbox.Core;

namespace OpenSandbox.Services;

/// <summary>
/// Service interface for sandbox lifecycle management.
/// </summary>
public interface ISandboxes
{
    /// <summary>
    /// Creates a new sandbox.
    /// </summary>
    /// <param name="request">The create sandbox request.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The created sandbox response.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when request values are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task<CreateSandboxResponse> CreateSandboxAsync(
        CreateSandboxRequest request,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Gets information about a sandbox.
    /// </summary>
    /// <param name="sandboxId">The sandbox ID.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The sandbox information.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="sandboxId"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task<SandboxInfo> GetSandboxAsync(
        string sandboxId,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Lists sandboxes with optional filtering.
    /// </summary>
    /// <param name="params">Optional filter parameters.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The list of sandboxes.</returns>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task<ListSandboxesResponse> ListSandboxesAsync(
        ListSandboxesParams? @params = null,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Deletes a sandbox.
    /// </summary>
    /// <param name="sandboxId">The sandbox ID.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="sandboxId"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task DeleteSandboxAsync(
        string sandboxId,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Pauses a sandbox.
    /// </summary>
    /// <param name="sandboxId">The sandbox ID.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="sandboxId"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task PauseSandboxAsync(
        string sandboxId,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Resumes a paused sandbox.
    /// </summary>
    /// <param name="sandboxId">The sandbox ID.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <exception cref="InvalidArgumentException">Thrown when <paramref name="sandboxId"/> is null or empty.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task ResumeSandboxAsync(
        string sandboxId,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Renews the expiration time of a sandbox.
    /// </summary>
    /// <param name="sandboxId">The sandbox ID.</param>
    /// <param name="request">The renewal request.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The renewal response.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when arguments are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task<RenewSandboxExpirationResponse> RenewSandboxExpirationAsync(
        string sandboxId,
        RenewSandboxExpirationRequest request,
        CancellationToken cancellationToken = default);

    /// <summary>
    /// Gets the endpoint for a sandbox port.
    /// </summary>
    /// <param name="sandboxId">The sandbox ID.</param>
    /// <param name="port">The port number.</param>
    /// <param name="useServerProxy">Whether to return a server-proxied URL.</param>
    /// <param name="cancellationToken">Cancellation token.</param>
    /// <returns>The endpoint information.</returns>
    /// <exception cref="InvalidArgumentException">Thrown when arguments are invalid.</exception>
    /// <exception cref="SandboxException">Thrown when the sandbox service request fails.</exception>
    Task<Endpoint> GetSandboxEndpointAsync(
        string sandboxId,
        int port,
        bool useServerProxy = false,
        CancellationToken cancellationToken = default);
}


================================================
FILE: sdks/sandbox/csharp/tests/OpenSandbox.Tests/CommandsAdapterTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Net;
using System.Text;
using System.Text.Json;
using FluentAssertions;
using OpenSandbox.Adapters;
using OpenSandbox.Core;
using OpenSandbox.Internal;
using OpenSandbox.Models;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using Xunit;

namespace OpenSandbox.Tests;

public class CommandsAdapterTests
{
    [Fact]
    public async Task GetCommandStatusAsync_ShouldParseStatusResponse()
    {
        var httpHandler = new StubHttpMessageHandler((request, _) =>
        {
            var body = "{\"id\":\"cmd-1\",\"content\":\"sleep 1\",\"running\":true,\"exit_code\":null}";
            return Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK)
            {
                Content = new StringContent(body, Encoding.UTF8, "application/json")
            });
        });
        var adapter = CreateAdapter(httpHandler);

        var status = await adapter.GetCommandStatusAsync("cmd-1");

        status.Id.Should().Be("cmd-1");
        status.Content.Should().Be("sleep 1");
        status.Running.Should().BeTrue();
        status.ExitCode.Should().BeNull();
        httpHandler.RequestUris.Should().Contain(uri => uri.EndsWith("/command/status/cmd-1", StringComparison.Ordinal));
    }

    [Fact]
    public async Task GetBackgroundCommandLogsAsync_ShouldParseCursorHeader()
    {
        var httpHandler = new StubHttpMessageHandler((request, _) =>
        {
            var response = new HttpResponseMessage(HttpStatusCode.OK)
            {
                Content = new StringContent("line1\nline2\n", Encoding.UTF8, "text/plain")
            };
            response.Headers.Add("EXECD-COMMANDS-TAIL-CURSOR", "42");
            return Task.FromResult(response);
        });
        var adapter = CreateAdapter(httpHandler);

        var logs = await adapter.GetBackgroundCommandLogsAsync("cmd-2", cursor: 10);

        logs.Content.Should().Contain("line1");
        logs.Cursor.Should().Be(42);
        httpHandler.RequestUris.Should().Contain(uri => uri.Contains("/command/cmd-2/logs?cursor=10", StringComparison.Ordinal));
    }

    [Fact]
    public async Task GetBackgroundCommandLogsAsync_ShouldReturnNullCursorWhenHeaderMissing()
    {
        var httpHandler = new StubHttpMessageHandler((request, _) =>
        {
            return Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK)
            {
                Content = new StringContent("only-content", Encoding.UTF8, "text/plain")
            });
        });
        var adapter = CreateAdapter(httpHandler);

        var logs = await adapter.GetBackgroundCommandLogsAsync("cmd-3");

        logs.Content.Should().Be("only-content");
        logs.Cursor.Should().BeNull();
    }

    [Fact]
    public async Task RunStreamAsync_ShouldSendTimeoutInMilliseconds()
    {
        var handler = new StubHttpMessageHandler(async (request, _) =>
        {
            request.Content.Should().NotBeNull();
            var body = await request.Content!.ReadAsStringAsync().ConfigureAwait(false);
            using var doc = JsonDocument.Parse(body);
            doc.RootElement.GetProperty("timeout").GetInt64().Should().Be(2000);

            return new HttpResponseMessage(HttpStatusCode.OK)
            {
                Content = new StringContent("data: {\"type\":\"init\",\"text\":\"cmd-1\"}\n\n", Encoding.UTF8, "text/event-stream")
            };
        });
        var adapter = CreateAdapter(handler);

        var options = new RunCommandOptions
        {
            TimeoutSeconds = 2
        };

        await foreach (var _ in adapter.RunStreamAsync("sleep 1", options))
        {
            // Drain events.
        }
    }

    [Fact]
    public async Task RunStreamAsync_ShouldSendUidGidAndEnvs()
    {
        var handler = new StubHttpMessageHandler(async (request, _) =>
        {
            request.Content.Should().NotBeNull();
            var body = await request.Content!.ReadAsStringAsync().ConfigureAwait(false);
            using var doc = JsonDocument.Parse(body);
            doc.RootElement.GetProperty("uid").GetInt32().Should().Be(1000);
            doc.RootElement.GetProperty("gid").GetInt32().Should().Be(1000);
            var envs = doc.RootElement.GetProperty("envs");
            envs.GetProperty("APP_ENV").GetString().Should().Be("test");
            envs.GetProperty("LOG_LEVEL").GetString().Should().Be("debug");

            return new HttpResponseMessage(HttpStatusCode.OK)
            {
                Content = new StringContent("data: {\"type\":\"init\",\"text\":\"cmd-1\"}\n\n", Encoding.UTF8, "text/event-stream")
            };
        });
        var adapter = CreateAdapter(handler);

        var options = new RunCommandOptions
        {
            Uid = 1000,
            Gid = 1000,
            Envs = new Dictionary<string, string>
            {
                ["APP_ENV"] = "test",
                ["LOG_LEVEL"] = "debug"
            }
        };

        await foreach (var _ in adapter.RunStreamAsync("id", options))
        {
            // Drain events.
        }
    }

    [Fact]
    public async Task RunStreamAsync_ShouldRejectGidWithoutUid()
    {
        var handler = new StubHttpMessageHandler((_, _) =>
        {
            throw new InvalidOperationException("HTTP should not be called when options are invalid.");
        });
        var adapter = CreateAdapter(handler);

        var options = new RunCommandOptions
        {
            Gid = 1000
        };

        var act = async () =>
        {
            await foreach (var _ in adapter.RunStreamAsync("id", options))
            {
                // Drain events.
            }
        };

        await act.Should().ThrowAsync<InvalidArgumentException>()
            .WithMessage("*uid is required when gid is provided*");
    }

    private static CommandsAdapter CreateAdapter(HttpMessageHandler httpHandler)
    {
        var baseUrl = "http://execd.local";
        var headers = new Dictionary<string, string> { ["X-Test"] = "true" };
        var client = new HttpClientWrapper(new HttpClient(httpHandler), baseUrl, headers);
        var sseHttpClient = new HttpClient(httpHandler);
        var logger = NullLoggerFactory.Instance.CreateLogger("CommandsAdapterTests");
        return new CommandsAdapter(client, sseHttpClient, baseUrl, headers, logger);
    }

    private sealed class StubHttpMessageHandler : HttpMessageHandler
    {
        private readonly Func<HttpRequestMessage, CancellationToken, Task<HttpResponseMessage>> _handler;

        public StubHttpMessageHandler(Func<HttpRequestMessage, CancellationToken, Task<HttpResponseMessage>> handler)
        {
            _handler = handler;
        }

        public List<string> RequestUris { get; } = new();

        protected override async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
        {
            RequestUris.Add(request.RequestUri?.ToString() ?? string.Empty);
            return await _handler(request, cancellationToken).ConfigureAwait(false);
        }
    }
}


================================================
FILE: sdks/sandbox/csharp/tests/OpenSandbox.Tests/ConnectionConfigTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using FluentAssertions;
using OpenSandbox.Config;
using OpenSandbox.Core;
using Xunit;

namespace OpenSandbox.Tests;

public class ConnectionConfigTests
{
    [Fact]
    public void Constructor_WithDefaultOptions_ShouldUseDefaults()
    {
        // Arrange & Act
        var config = new ConnectionConfig();

        // Assert
        config.Protocol.Should().Be(ConnectionProtocol.Http);
        config.Domain.Should().Be("localhost:8080");
        config.ApiKey.Should().BeNull();
        config.RequestTimeoutSeconds.Should().Be(Constants.DefaultRequestTimeoutSeconds);
        config.UseServerProxy.Should().BeFalse();
        config.Headers.Should().BeEmpty();
    }

    [Fact]
    public void Constructor_WithCustomOptions_ShouldApplyOptions()
    {
        // Arrange
        var options = new ConnectionConfigOptions
        {
            Domain = "api.example.com",
            Protocol = ConnectionProtocol.Https,
            ApiKey = "test-api-key",
            RequestTimeoutSeconds = 60,
            UseServerProxy = true,
            Headers = new Dictionary<string, string>
            {
                ["X-Custom-Header"] = "custom-value"
            }
        };

        // Act
        var config = new ConnectionConfig(options);

        // Assert
        config.Protocol.Should().Be(ConnectionProtocol.Https);
        config.Domain.Should().Be("api.example.com");
        config.ApiKey.Should().Be("test-api-key");
        config.RequestTimeoutSeconds.Should().Be(60);
        config.UseServerProxy.Should().BeTrue();
        config.Headers.Should().ContainKey("X-Custom-Header");
        config.Headers["X-Custom-Header"].Should().Be("custom-value");
    }

    [Fact]
    public void Constructor_WithApiKey_ShouldAddApiKeyHeader()
    {
        // Arrange
        var options = new ConnectionConfigOptions
        {
            ApiKey = "my-secret-key"
        };

        // Act
        var config = new ConnectionConfig(options);

        // Assert
        config.Headers.Should().ContainKey(Constants.ApiKeyHeader);
        config.Headers[Constants.ApiKeyHeader].Should().Be("my-secret-key");
    }

    [Fact]
    public void GetBaseUrl_WithHttpProtocol_ShouldReturnHttpUrl()
    {
        // Arrange
        var config = new ConnectionConfig(new ConnectionConfigOptions
        {
            Domain = "localhost:8080",
            Protocol = ConnectionProtocol.Http
        });

        // Act
        var baseUrl = config.GetBaseUrl();

        // Assert
        baseUrl.Should().Be("http://localhost:8080/v1");
    }

    [Fact]
    public void GetBaseUrl_WithHttpsProtocol_ShouldReturnHttpsUrl()
    {
        // Arrange
        var config = new ConnectionConfig(new ConnectionConfigOptions
        {
            Domain = "api.example.com",
            Protocol = ConnectionProtocol.Https
        });

        // Act
        var baseUrl = config.GetBaseUrl();

        // Assert
        baseUrl.Should().Be("https://api.example.com/v1");
    }

    [Fact]
    public void GetBaseUrl_WithFullUrl_ShouldPreserveScheme()
    {
        // Arrange
        var config = new ConnectionConfig(new ConnectionConfigOptions
        {
            Domain = "https://api.example.com"
        });

        // Act
        var baseUrl = config.GetBaseUrl();

        // Assert
        baseUrl.Should().Be("https://api.example.com/v1");
    }

    [Fact]
    public void GetBaseUrl_WithV1Suffix_ShouldNotDuplicate()
    {
        // Arrange
        var config = new ConnectionConfig(new ConnectionConfigOptions
        {
            Domain = "https://api.example.com/v1"
        });

        // Act
        var baseUrl = config.GetBaseUrl();

        // Assert
        baseUrl.Should().Be("https://api.example.com/v1");
    }

    [Fact]
    public void GetBaseUrl_WithTrailingSlash_ShouldNormalize()
    {
        // Arrange
        var config = new ConnectionConfig(new ConnectionConfigOptions
        {
            Domain = "api.example.com/"
        });

        // Act
        var baseUrl = config.GetBaseUrl();

        // Assert
        baseUrl.Should().Be("http://api.example.com/v1");
    }

    [Fact]
    public void CreateHttpClient_ShouldReturnConfiguredClient()
    {
        // Arrange
        var config = new ConnectionConfig(new ConnectionConfigOptions
        {
            RequestTimeoutSeconds = 45
        });

        // Act
        var client = config.CreateHttpClient();

        // Assert
        client.Should().NotBeNull();
        client.Timeout.Should().Be(TimeSpan.FromSeconds(45));
    }

    [Fact]
    public void GetHttpClient_ShouldReturnSameInstance()
    {
        // Arrange
        var config = new ConnectionConfig();

        // Act
        var client1 = config.GetHttpClient();
        var client2 = config.GetHttpClient();

        // Assert
        client1.Should().BeSameAs(client2);
    }

    [Fact]
    public void CreateSseHttpClient_ShouldHaveInfiniteTimeout()
    {
        // Arrange
        var config = new ConnectionConfig();

        // Act
        var client = config.CreateSseHttpClient();

        // Assert
        client.Should().NotBeNull();
        client.Timeout.Should().Be(Timeout.InfiniteTimeSpan);
    }
}


================================================
FILE: sdks/sandbox/csharp/tests/OpenSandbox.Tests/ConstantsTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using FluentAssertions;
using OpenSandbox.Core;
using Xunit;

namespace OpenSandbox.Tests;

public class ConstantsTests
{
    [Fact]
    public void DefaultExecdPort_ShouldBe44772()
    {
        Constants.DefaultExecdPort.Should().Be(44772);
    }

    [Fact]
    public void DefaultEntrypoint_ShouldBeTailCommand()
    {
        Constants.DefaultEntrypoint.Should().BeEquivalentTo(new[] { "tail", "-f", "/dev/null" });
    }

    [Fact]
    public void DefaultResourceLimits_ShouldContainCpuAndMemory()
    {
        Constants.DefaultResourceLimits.Should().ContainKey("cpu");
        Constants.DefaultResourceLimits.Should().ContainKey("memory");
        Constants.DefaultResourceLimits["cpu"].Should().Be("1");
        Constants.DefaultResourceLimits["memory"].Should().Be("2Gi");
    }

    [Fact]
    public void DefaultTimeoutSeconds_ShouldBe600()
    {
        Constants.DefaultTimeoutSeconds.Should().Be(600);
    }

    [Fact]
    public void DefaultReadyTimeoutSeconds_ShouldBe30()
    {
        Constants.DefaultReadyTimeoutSeconds.Should().Be(30);
    }

    [Fact]
    public void DefaultHealthCheckPollingIntervalMillis_ShouldBe200()
    {
        Constants.DefaultHealthCheckPollingIntervalMillis.Should().Be(200);
    }

    [Fact]
    public void DefaultRequestTimeoutSeconds_ShouldBe30()
    {
        Constants.DefaultRequestTimeoutSeconds.Should().Be(30);
    }

    [Fact]
    public void EnvDomain_ShouldBeCorrect()
    {
        Constants.EnvDomain.Should().Be("OPEN_SANDBOX_DOMAIN");
    }

    [Fact]
    public void EnvApiKey_ShouldBeCorrect()
    {
        Constants.EnvApiKey.Should().Be("OPEN_SANDBOX_API_KEY");
    }

    [Fact]
    public void ApiKeyHeader_ShouldBeCorrect()
    {
        Constants.ApiKeyHeader.Should().Be("OPEN-SANDBOX-API-KEY");
    }

    [Fact]
    public void RequestIdHeader_ShouldBeCorrect()
    {
        Constants.RequestIdHeader.Should().Be("x-request-id");
    }
}


================================================
FILE: sdks/sandbox/csharp/tests/OpenSandbox.Tests/ExceptionTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using FluentAssertions;
using OpenSandbox.Core;
using Xunit;

namespace OpenSandbox.Tests;

public class ExceptionTests
{
    [Fact]
    public void SandboxError_ShouldStoreCodeAndMessage()
    {
        // Arrange & Act
        var error = new SandboxError("TEST_CODE", "Test message");

        // Assert
        error.Code.Should().Be("TEST_CODE");
        error.Message.Should().Be("Test message");
    }

    [Fact]
    public void SandboxError_ToString_WithMessage_ShouldFormatCorrectly()
    {
        // Arrange
        var error = new SandboxError("TEST_CODE", "Test message");

        // Act
        var result = error.ToString();

        // Assert
        result.Should().Be("[TEST_CODE] Test message");
    }

    [Fact]
    public void SandboxError_ToString_WithoutMessage_ShouldFormatCorrectly()
    {
        // Arrange
        var error = new SandboxError("TEST_CODE");

        // Act
        var result = error.ToString();

        // Assert
        result.Should().Be("[TEST_CODE]");
    }

    [Fact]
    public void SandboxException_ShouldContainError()
    {
        // Arrange
        var error = new SandboxError("TEST_CODE", "Test message");

        // Act
        var exception = new SandboxException("Exception message", error: error);

        // Assert
        exception.Message.Should().Be("Exception message");
        exception.Error.Should().Be(error);
    }

    [Fact]
    public void SandboxException_ShouldContainRequestId()
    {
        // Arrange & Act
        var exception = new SandboxException("Exception message", requestId: "req-base-123");

        // Assert
        exception.RequestId.Should().Be("req-base-123");
    }

    [Fact]
    public void SandboxException_ShouldDeclareLegacyConstructor_ForBinaryCompatibility()
    {
        var constructor = typeof(SandboxException).GetConstructor(
            new[]
            {
                typeof(string),
                typeof(Exception),
                typeof(SandboxError)
            });

        constructor.Should().NotBeNull();
    }

    [Fact]
    public void SandboxException_WithoutError_ShouldCreateDefaultError()
    {
        // Arrange & Act
        var exception = new SandboxException("Exception message");

        // Assert
        exception.Error.Should().NotBeNull();
        exception.Error.Code.Should().Be(SandboxErrorCodes.InternalUnknownError);
    }

    [Fact]
    public void SandboxApiException_ShouldContainStatusCodeAndRequestId()
    {
        // Arrange & Act
        var exception = new SandboxApiException(
            message: "API error",
            statusCode: 404,
            requestId: "req-123",
            rawBody: "Not found");

        // Assert
        exception.Message.Should().Be("API error");
        exception.StatusCode.Should().Be(404);
        exception.RequestId.Should().Be("req-123");
        exception.RawBody.Should().Be("Not found");
        exception.Error.Code.Should().Be(SandboxErrorCodes.UnexpectedResponse);
    }

    [Fact]
    public void SandboxApiException_ShouldDeclareRequestIdProperty_ForBinaryCompatibility()
    {
        var requestIdProperty = typeof(SandboxApiException).GetProperty(
            "RequestId",
            System.Reflection.BindingFlags.Public |
            System.Reflection.BindingFlags.Instance |
            System.Reflection.BindingFlags.DeclaredOnly);

        requestIdProperty.Should().NotBeNull();
    }

    [Fact]
    public void SandboxApiException_WithCustomError_ShouldUseProvidedError()
    {
        // Arrange
        var error = new SandboxError("CUSTOM_CODE", "Custom message");

        // Act
        var exception = new SandboxApiException(
            message: "API error",
            statusCode: 500,
            error: error);

        // Assert
        exception.Error.Should().Be(error);
        exception.Error.Code.Should().Be("CUSTOM_CODE");
    }

    [Fact]
    public void SandboxReadyTimeoutException_ShouldHaveCorrectErrorCode()
    {
        // Arrange & Act
        var exception = new SandboxReadyTimeoutException("Timeout waiting for sandbox");

        // Assert
        exception.Error.Code.Should().Be(SandboxErrorCodes.ReadyTimeout);
        exception.Message.Should().Be("Timeout waiting for sandbox");
    }

    [Fact]
    public void SandboxUnhealthyException_ShouldHaveCorrectErrorCode()
    {
        // Arrange & Act
        var exception = new SandboxUnhealthyException("Sandbox is unhealthy");

        // Assert
        exception.Error.Code.Should().Be(SandboxErrorCodes.Unhealthy);
        exception.Message.Should().Be("Sandbox is unhealthy");
    }

    [Fact]
    public void InvalidArgumentException_ShouldHaveCorrectErrorCode()
    {
        // Arrange & Act
        var exception = new InvalidArgumentException("Invalid argument provided");

        // Assert
        exception.Error.Code.Should().Be(SandboxErrorCodes.InvalidArgument);
        exception.Message.Should().Be("Invalid argument provided");
    }

    [Fact]
    public void SandboxInternalException_ShouldHaveCorrectErrorCode()
    {
        // Arrange & Act
        var exception = new SandboxInternalException("Internal error occurred");

        // Assert
        exception.Error.Code.Should().Be(SandboxErrorCodes.InternalUnknownError);
        exception.Message.Should().Be("Internal error occurred");
    }

    [Fact]
    public void SandboxException_WithInnerException_ShouldPreserveInnerException()
    {
        // Arrange
        var innerException = new InvalidOperationException("Inner error");

        // Act
        var exception = new SandboxException("Outer error", innerException);

        // Assert
        exception.InnerException.Should().Be(innerException);
    }
}


================================================
FILE: sdks/sandbox/csharp/tests/OpenSandbox.Tests/ModelsTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using FluentAssertions;
using OpenSandbox.Models;
using Xunit;

namespace OpenSandbox.Tests;

public class ModelsTests
{
    [Fact]
    public void Execution_ShouldInitializeWithEmptyCollections()
    {
        // Arrange & Act
        var execution = new Execution();

        // Assert
        execution.Id.Should().BeNull();
        execution.ExecutionCount.Should().BeNull();
        execution.Logs.Should().NotBeNull();
        execution.Logs.Stdout.Should().BeEmpty();
        execution.Logs.Stderr.Should().BeEmpty();
        execution.Results.Should().BeEmpty();
        execution.Error.Should().BeNull();
        execution.Complete.Should().BeNull();
    }

    [Fact]
    public void ExecutionLogs_ShouldAllowAddingMessages()
    {
        // Arrange
        var logs = new ExecutionLogs();
        var stdoutMsg = new OutputMessage { Text = "stdout", Timestamp = 1000, IsError = false };
        var stderrMsg = new OutputMessage { Text = "stderr", Timestamp = 2000, IsError = true };

        // Act
        logs.Stdout.Add(stdoutMsg);
        logs.Stderr.Add(stderrMsg);

        // Assert
        logs.Stdout.Should().HaveCount(1);
        logs.Stdout[0].Text.Should().Be("stdout");
        logs.Stderr.Should().HaveCount(1);
        logs.Stderr[0].Text.Should().Be("stderr");
        logs.Stderr[0].IsError.Should().BeTrue();
    }

    [Fact]
    public void OutputMessage_ShouldStoreProperties()
    {
        // Arrange & Act
        var msg = new OutputMessage
        {
            Text = "Hello World",
            Timestamp = 1234567890,
            IsError = false
        };

        // Assert
        msg.Text.Should().Be("Hello World");
        msg.Timestamp.Should().Be(1234567890);
        msg.IsError.Should().BeFalse();
    }

    [Fact]
    public void ExecutionResult_ShouldStoreProperties()
    {
        // Arrange & Act
        var result = new ExecutionResult
        {
            Text = "Result text",
            Timestamp = 1234567890,
            Raw = new Dictionary<string, object> { ["text/plain"] = "Result text" }
        };

        // Assert
        result.Text.Should().Be("Result text");
        result.Timestamp.Should().Be(1234567890);
        result.Raw.Should().ContainKey("text/plain");
    }

    [Fact]
    public void ExecutionError_ShouldStoreProperties()
    {
        // Arrange & Act
        var error = new ExecutionError
        {
            Name = "ValueError",
            Value = "Invalid value",
            Timestamp = 1234567890,
            Traceback = new List<string> { "line 1", "line 2" }
        };

        // Assert
        error.Name.Should().Be("ValueError");
        error.Value.Should().Be("Invalid value");
        error.Timestamp.Should().Be(1234567890);
        error.Traceback.Should().HaveCount(2);
    }

    [Fact]
    public void ExecutionComplete_ShouldStoreProperties()
    {
        // Arrange & Act
        var complete = new ExecutionComplete
        {
            Timestamp = 1234567890,
            ExecutionTimeMs = 500
        };

        // Assert
        complete.Timestamp.Should().Be(1234567890);
        complete.ExecutionTimeMs.Should().Be(500);
    }

    [Fact]
    public void SandboxInfo_ShouldStoreProperties()
    {
        // Arrange & Act
        var info = new SandboxInfo
        {
            Id = "sandbox-123",
            Image = new ImageSpec { Uri = "ubuntu:latest" },
            Entrypoint = new List<string> { "tail", "-f", "/dev/null" },
            Status = new SandboxStatus { State = "Running" },
            CreatedAt = DateTime.UtcNow,
            ExpiresAt = DateTime.UtcNow.AddMinutes(10),
            Metadata = new Dictionary<string, string> { ["key"] = "value" }
        };

        // Assert
        info.Id.Should().Be("sandbox-123");
        info.Image.Uri.Should().Be("ubuntu:latest");
        info.Entrypoint.Should().HaveCount(3);
        info.Status.State.Should().Be("Running");
        info.Metadata.Should().ContainKey("key");
    }

    [Fact]
    public void SandboxStatus_ShouldStoreProperties()
    {
        // Arrange & Act
        var status = new SandboxStatus
        {
            State = "Error",
            Reason = "ImagePullFailed",
            Message = "Failed to pull image"
        };

        // Assert
        status.State.Should().Be("Error");
        status.Reason.Should().Be("ImagePullFailed");
        status.Message.Should().Be("Failed to pull image");
    }

    [Fact]
    public void ImageSpec_WithAuth_ShouldStoreCredentials()
    {
        // Arrange & Act
        var image = new ImageSpec
        {
            Uri = "private-registry.com/image:tag",
            Auth = new ImageAuth
            {
                Username = "user",
                Password = "pass"
            }
        };

        // Assert
        image.Uri.Should().Be("private-registry.com/image:tag");
        image.Auth.Should().NotBeNull();
        image.Auth!.Username.Should().Be("user");
        image.Auth.Password.Should().Be("pass");
    }

    [Fact]
    public void NetworkPolicy_ShouldStoreRules()
    {
        // Arrange & Act
        var policy = new NetworkPolicy
        {
            DefaultAction = NetworkRuleAction.Deny,
            Egress = new List<NetworkRule>
            {
                new() { Action = NetworkRuleAction.Allow, Target = "example.com" },
                new() { Action = NetworkRuleAction.Allow, Target = "*.trusted.com" }
            }
        };

        // Assert
        policy.DefaultAction.Should().Be(NetworkRuleAction.Deny);
        policy.Egress.Should().HaveCount(2);
        policy.Egress![0].Action.Should().Be(NetworkRuleAction.Allow);
        policy.Egress[0].Target.Should().Be("example.com");
    }

    [Fact]
    public void SandboxMetrics_ShouldStoreProperties()
    {
        // Arrange & Act
        var metrics = new SandboxMetrics
        {
            CpuCount = 4,
            CpuUsedPercentage = 25.5,
            MemoryTotalMiB = 8192,
            MemoryUsedMiB = 2048,
            Timestamp = 1234567890
        };

        // Assert
        metrics.CpuCount.Should().Be(4);
        metrics.CpuUsedPercentage.Should().Be(25.5);
        metrics.MemoryTotalMiB.Should().Be(8192);
        metrics.MemoryUsedMiB.Should().Be(2048);
        metrics.Timestamp.Should().Be(1234567890);
    }

    [Fact]
    public void SandboxFileInfo_ShouldStoreProperties()
    {
        // Arrange & Act
        var fileInfo = new SandboxFileInfo
        {
            Path = "/tmp/test.txt",
            Size = 1024,
            Mode = 644,
            Owner = "root",
            Group = "root",
            CreatedAt = DateTime.UtcNow,
            ModifiedAt = DateTime.UtcNow
        };

        // Assert
        fileInfo.Path.Should().Be("/tmp/test.txt");
        fileInfo.Size.Should().Be(1024);
        fileInfo.Mode.Should().Be(644);
        fileInfo.Owner.Should().Be("root");
    }

    [Fact]
    public void WriteEntry_ShouldStoreProperties()
    {
        // Arrange & Act
        var entry = new WriteEntry
        {
            Path = "/tmp/file.txt",
            Data = "Hello World",
            Mode = 644,
            Owner = "user",
            Group = "group"
        };

        // Assert
        entry.Path.Should().Be("/tmp/file.txt");
        entry.Data.Should().Be("Hello World");
        entry.Mode.Should().Be(644);
    }

    [Fact]
    public void SearchEntry_ShouldStoreProperties()
    {
        // Arrange & Act
        var entry = new SearchEntry
        {
            Path = "/tmp",
            Pattern = "*.txt"
        };

        // Assert
        entry.Path.Should().Be("/tmp");
        entry.Pattern.Should().Be("*.txt");
    }

    [Fact]
    public void MoveEntry_ShouldStoreProperties()
    {
        // Arrange & Act
        var entry = new MoveEntry
        {
            Src = "/tmp/old.txt",
            Dest = "/tmp/new.txt"
        };

        // Assert
        entry.Src.Should().Be("/tmp/old.txt");
        entry.Dest.Should().Be("/tmp/new.txt");
    }

    [Fact]
    public void RunCommandOptions_ShouldStoreProperties()
    {
        // Arrange & Act
        var options = new RunCommandOptions
        {
            WorkingDirectory = "/home/user",
            Background = true,
            TimeoutSeconds = 30,
            Uid = 1000,
            Gid = 1000,
            Envs = new Dictionary<string, string>
            {
                ["APP_ENV"] = "test"
            }
        };

        // Assert
        options.WorkingDirectory.Should().Be("/home/user");
        options.Background.Should().BeTrue();
        options.TimeoutSeconds.Should().Be(30);
        options.Uid.Should().Be(1000);
        options.Gid.Should().Be(1000);
        options.Envs.Should().ContainKey("APP_ENV");
    }

    [Fact]
    public void ServerStreamEvent_ShouldStoreProperties()
    {
        // Arrange & Act
        var ev = new ServerStreamEvent
        {
            Type = "stdout",
            Text = "output text",
            Timestamp = 1234567890,
            ExecutionCount = 1,
            ExecutionTime = 100
        };

        // Assert
        ev.Type.Should().Be("stdout");
        ev.Text.Should().Be("output text");
        ev.Timestamp.Should().Be(1234567890);
        ev.ExecutionCount.Should().Be(1);
        ev.ExecutionTime.Should().Be(100);
    }

    [Fact]
    public void CommandStatus_ShouldStoreProperties()
    {
        var startedAt = DateTime.UtcNow.AddSeconds(-5);
        var finishedAt = DateTime.UtcNow;
        var status = new CommandStatus
        {
            Id = "cmd-1",
            Content = "echo hello",
            Running = false,
            ExitCode = 0,
            Error = null,
            StartedAt = startedAt,
            FinishedAt = finishedAt
        };

        status.Id.Should().Be("cmd-1");
        status.Content.Should().Be("echo hello");
        status.Running.Should().BeFalse();
        status.ExitCode.Should().Be(0);
        status.StartedAt.Should().Be(startedAt);
        status.FinishedAt.Should().Be(finishedAt);
    }

    [Fact]
    public void CommandLogs_ShouldStoreProperties()
    {
        var logs = new CommandLogs
        {
            Content = "line1\nline2\n",
            Cursor = 12
        };

        logs.Content.Should().Contain("line1");
        logs.Cursor.Should().Be(12);
    }

    [Fact]
    public void PaginationInfo_ShouldStoreProperties()
    {
        // Arrange & Act
        var pagination = new PaginationInfo
        {
            Page = 1,
            PageSize = 10,
            TotalItems = 100,
            TotalPages = 10,
            HasNextPage = true
        };

        // Assert
        pagination.Page.Should().Be(1);
        pagination.PageSize.Should().Be(10);
        pagination.TotalItems.Should().Be(100);
        pagination.TotalPages.Should().Be(10);
        pagination.HasNextPage.Should().BeTrue();
    }
}


================================================
FILE: sdks/sandbox/csharp/tests/OpenSandbox.Tests/OpenSandbox.Tests.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <TargetFramework>net10.0</TargetFramework>
    <LangVersion>12.0</LangVersion>
    <Nullable>enable</Nullable>
    <ImplicitUsings>enable</ImplicitUsings>
    <IsPackable>false</IsPackable>
    <IsTestProject>true</IsTestProject>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
    <PackageReference Include="xunit" Version="2.9.2" />
    <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
      <PrivateAssets>all</PrivateAssets>
    </PackageReference>
    <PackageReference Include="coverlet.collector" Version="6.0.2">
      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
      <PrivateAssets>all</PrivateAssets>
    </PackageReference>
    <PackageReference Include="Moq" Version="4.20.72" />
    <PackageReference Include="FluentAssertions" Version="6.12.2" />
  </ItemGroup>

  <ItemGroup>
    <ProjectReference Include="..\..\src\OpenSandbox\OpenSandbox.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: sdks/sandbox/csharp/tests/OpenSandbox.Tests/OptionsTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using FluentAssertions;
using OpenSandbox.Config;
using OpenSandbox.Core;
using OpenSandbox.Models;
using Xunit;

namespace OpenSandbox.Tests;

public class OptionsTests
{
    [Fact]
    public void SandboxCreateOptions_ShouldStoreProperties()
    {
        // Arrange & Act
        var options = new SandboxCreateOptions
        {
            Image = "ubuntu:latest",
            TimeoutSeconds = 600,
            Entrypoint = new[] { "bash" },
            Env = new Dictionary<string, string> { ["KEY"] = "VALUE" },
            Metadata = new Dictionary<string, string> { ["tag"] = "test" },
            Resource = new Dictionary<string, string> { ["cpu"] = "2" },
            SkipHealthCheck = true,
            ReadyTimeoutSeconds = 60,
            HealthCheckPollingInterval = 500
        };

        // Assert
        options.Image.Should().Be("ubuntu:latest");
        options.TimeoutSeconds.Should().Be(600);
        options.Entrypoint.Should().Contain("bash");
        options.Env.Should().ContainKey("KEY");
        options.Metadata.Should().ContainKey("tag");
        options.Resource.Should().ContainKey("cpu");
        options.SkipHealthCheck.Should().BeTrue();
        options.ReadyTimeoutSeconds.Should().Be(60);
        options.HealthCheckPollingInterval.Should().Be(500);
    }

    [Fact]
    public void SandboxCreateOptions_WithNetworkPolicy_ShouldStorePolicy()
    {
        // Arrange & Act
        var options = new SandboxCreateOptions
        {
            Image = "python:3.11",
            NetworkPolicy = new NetworkPolicy
            {
                DefaultAction = NetworkRuleAction.Deny,
                Egress = new List<NetworkRule>
                {
                    new() { Action = NetworkRuleAction.Allow, Target = "pypi.org" }
                }
            }
        };

        // Assert
        options.NetworkPolicy.Should().NotBeNull();
        options.NetworkPolicy!.DefaultAction.Should().Be(NetworkRuleAction.Deny);
        options.NetworkPolicy.Egress.Should().HaveCount(1);
    }

    [Fact]
    public void SandboxCreateOptions_WithImageAuth_ShouldStoreAuth()
    {
        // Arrange & Act
        var options = new SandboxCreateOptions
        {
            Image = "private-registry.com/image:tag",
            ImageAuth = new ImageAuth
            {
                Username = "user",
                Password = "pass"
            }
        };

        // Assert
        options.ImageAuth.Should().NotBeNull();
        options.ImageAuth!.Username.Should().Be("user");
        options.ImageAuth.Password.Should().Be("pass");
    }

    [Fact]
    public void SandboxConnectOptions_ShouldStoreProperties()
    {
        // Arrange & Act
        var options = new SandboxConnectOptions
        {
            SandboxId = "sandbox-123",
            SkipHealthCheck = false,
            ReadyTimeoutSeconds = 30,
            HealthCheckPollingInterval = 200
        };

        // Assert
        options.SandboxId.Should().Be("sandbox-123");
        options.SkipHealthCheck.Should().BeFalse();
        options.ReadyTimeoutSeconds.Should().Be(30);
        options.HealthCheckPollingInterval.Should().Be(200);
    }

    [Fact]
    public void SandboxResumeOptions_ShouldStoreProperties()
    {
        // Arrange & Act
        var options = new SandboxResumeOptions
        {
            SkipHealthCheck = true,
            ReadyTimeoutSeconds = 45,
            HealthCheckPollingInterval = 300
        };

        // Assert
        options.SkipHealthCheck.Should().BeTrue();
        options.ReadyTimeoutSeconds.Should().Be(45);
        options.HealthCheckPollingInterval.Should().Be(300);
    }

    [Fact]
    public void WaitUntilReadyOptions_ShouldStoreProperties()
    {
        // Arrange & Act
        var options = new WaitUntilReadyOptions
        {
            ReadyTimeoutSeconds = 60,
            PollingIntervalMillis = 500
        };

        // Assert
        options.ReadyTimeoutSeconds.Should().Be(60);
        options.PollingIntervalMillis.Should().Be(500);
    }

    [Fact]
    public void WaitUntilReadyOptions_WithCustomHealthCheck_ShouldStoreFunction()
    {
        // Arrange
        Func<Sandbox, Task<bool>> healthCheck = async (sbx) =>
        {
            await Task.Delay(1);
            return true;
        };

        // Act
        var options = new WaitUntilReadyOptions
        {
            ReadyTimeoutSeconds = 30,
            PollingIntervalMillis = 200,
            HealthCheck = healthCheck
        };

        // Assert
        options.HealthCheck.Should().NotBeNull();
        options.HealthCheck.Should().BeSameAs(healthCheck);
    }

    [Fact]
    public void SandboxManagerOptions_ShouldStoreProperties()
    {
        // Arrange
        var config = new ConnectionConfig(new ConnectionConfigOptions
        {
            Domain = "api.example.com"
        });

        // Act
        var options = new SandboxManagerOptions
        {
            ConnectionConfig = config
        };

        // Assert
        options.ConnectionConfig.Should().BeSameAs(config);
    }

    [Fact]
    public void SandboxFilter_ShouldStoreProperties()
    {
        // Arrange & Act
        var filter = new SandboxFilter
        {
            States = new[] { "Running", "Paused" },
            Metadata = new Dictionary<string, string> { ["env"] = "test" },
            Page = 1,
            PageSize = 20
        };

        // Assert
        filter.States.Should().HaveCount(2);
        filter.States.Should().Contain("Running");
        filter.States.Should().Contain("Paused");
        filter.Metadata.Should().ContainKey("env");
        filter.Page.Should().Be(1);
        filter.PageSize.Should().Be(20);
    }

    [Fact]
    public void SandboxFilter_WithNullValues_ShouldAllowNulls()
    {
        // Arrange & Act
        var filter = new SandboxFilter();

        // Assert
        filter.States.Should().BeNull();
        filter.Metadata.Should().BeNull();
        filter.Page.Should().BeNull();
        filter.PageSize.Should().BeNull();
    }
}


================================================
FILE: sdks/sandbox/csharp/tests/OpenSandbox.Tests/SandboxEgressLifecycleTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using FluentAssertions;
using OpenSandbox.Config;
using OpenSandbox.Core;
using OpenSandbox.Factory;
using OpenSandbox.Models;
using OpenSandbox.Services;
using Microsoft.Extensions.Logging.Abstractions;
using Xunit;

namespace OpenSandbox.Tests;

public class SandboxEgressLifecycleTests
{
    [Fact]
    public async Task CreateAsync_ShouldBuildEgressStackOnce_AndReuseItForOperations()
    {
        var sandboxes = new StubSandboxes();
        var egress = new StubEgress();
        var adapterFactory = new StubAdapterFactory(sandboxes, egress);

        var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
        {
            Image = "python:3.12",
            ConnectionConfig = new ConnectionConfig(new ConnectionConfigOptions
            {
                Domain = "127.0.0.1:8080",
                Protocol = ConnectionProtocol.Http
            }),
            AdapterFactory = adapterFactory,
            SkipHealthCheck = true,
            Diagnostics = new SdkDiagnosticsOptions
            {
                LoggerFactory = NullLoggerFactory.Instance
            }
        });

        await sandbox.GetEgressPolicyAsync();
        await sandbox.PatchEgressRulesAsync([new NetworkRule
        {
            Action = NetworkRuleAction.Allow,
            Target = "www.github.com"
        }]);

        sandboxes.EndpointCalls.Should().Equal(Constants.DefaultExecdPort, Constants.DefaultEgressPort);
        adapterFactory.EgressStackCallCount.Should().Be(1);
        adapterFactory.LastEgressBaseUrl.Should().Be($"http://127.0.0.1:{Constants.DefaultEgressPort}");
        egress.GetPolicyCallCount.Should().Be(1);
        egress.PatchRulesCallCount.Should().Be(1);
    }

    private sealed class StubAdapterFactory : IAdapterFactory
    {
        private readonly ISandboxes _sandboxes;
        private readonly IEgress _egress;

        public StubAdapterFactory(ISandboxes sandboxes, IEgress egress)
        {
            _sandboxes = sandboxes;
            _egress = egress;
        }

        public int EgressStackCallCount { get; private set; }

        public string? LastEgressBaseUrl { get; private set; }

        public LifecycleStack CreateLifecycleStack(CreateLifecycleStackOptions options)
        {
            return new LifecycleStack
            {
                Sandboxes = _sandboxes
            };
        }

        public ExecdStack CreateExecdStack(CreateExecdStackOptions options)
        {
            return new ExecdStack
            {
                Commands = new StubCommands(),
                Files = new StubFiles(),
                Health = new StubHealth(),
                Metrics = new StubMetrics()
            };
        }

        public EgressStack CreateEgressStack(CreateEgressStackOptions options)
        {
            EgressStackCallCount++;
            LastEgressBaseUrl = options.EgressBaseUrl;
            return new EgressStack
            {
                Egress = _egress
            };
        }
    }

    private sealed class StubSandboxes : ISandboxes
    {
        public List<int> EndpointCalls { get; } = new();

        public Task<CreateSandboxResponse> CreateSandboxAsync(CreateSandboxRequest request, CancellationToken cancellationToken = default)
        {
            return Task.FromResult(new CreateSandboxResponse
            {
                Id = "sandbox-test-id",
                Status = new SandboxStatus
                {
                    State = "Running"
                },
                CreatedAt = DateTime.UtcNow,
                Entrypoint = ["/bin/sh"]
            });
        }

        public Task<SandboxInfo> GetSandboxAsync(string sandboxId, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task<ListSandboxesResponse> ListSandboxesAsync(ListSandboxesParams? @params = null, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task DeleteSandboxAsync(string sandboxId, CancellationToken cancellationToken = default) =>
            Task.CompletedTask;

        public Task PauseSandboxAsync(string sandboxId, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task ResumeSandboxAsync(string sandboxId, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task<RenewSandboxExpirationResponse> RenewSandboxExpirationAsync(string sandboxId, RenewSandboxExpirationRequest request, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task<Endpoint> GetSandboxEndpointAsync(string sandboxId, int port, bool useServerProxy = false, CancellationToken cancellationToken = default)
        {
            EndpointCalls.Add(port);
            return Task.FromResult(new Endpoint
            {
                EndpointAddress = $"127.0.0.1:{port}",
                Headers = new Dictionary<string, string>
                {
                    ["X-Port"] = port.ToString()
                }
            });
        }
    }

    private sealed class StubEgress : IEgress
    {
        public int GetPolicyCallCount { get; private set; }

        public int PatchRulesCallCount { get; private set; }

        public Task<NetworkPolicy> GetPolicyAsync(CancellationToken cancellationToken = default)
        {
            GetPolicyCallCount++;
            return Task.FromResult(new NetworkPolicy
            {
                DefaultAction = NetworkRuleAction.Deny,
                Egress = [new NetworkRule
                {
                    Action = NetworkRuleAction.Allow,
                    Target = "pypi.org"
                }]
            });
        }

        public Task PatchRulesAsync(IReadOnlyList<NetworkRule> rules, CancellationToken cancellationToken = default)
        {
            PatchRulesCallCount++;
            return Task.CompletedTask;
        }
    }

    private sealed class StubCommands : IExecdCommands
    {
        public IAsyncEnumerable<ServerStreamEvent> RunStreamAsync(string command, RunCommandOptions? options = null, CancellationToken cancellationToken = default) =>
            AsyncEnumerable.Empty<ServerStreamEvent>();

        public Task<Execution> RunAsync(string command, RunCommandOptions? options = null, ExecutionHandlers? handlers = null, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task<CommandStatus> GetCommandStatusAsync(string executionId, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task<CommandLogs> GetBackgroundCommandLogsAsync(string executionId, long? cursor = null, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task InterruptAsync(string executionId, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();
    }

    private sealed class StubFiles : ISandboxFiles
    {
        public Task<IReadOnlyDictionary<string, SandboxFileInfo>> GetFileInfoAsync(IEnumerable<string> paths, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task<IReadOnlyList<SandboxFileInfo>> SearchAsync(SearchEntry entry, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task CreateDirectoriesAsync(IEnumerable<CreateDirectoryEntry> entries, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task DeleteDirectoriesAsync(IEnumerable<string> paths, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task WriteFilesAsync(IEnumerable<WriteEntry> entries, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task<string> ReadFileAsync(string path, ReadFileOptions? options = null, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task<byte[]> ReadBytesAsync(string path, ReadBytesOptions? options = null, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public IAsyncEnumerable<byte[]> ReadBytesStreamAsync(string path, ReadBytesOptions? options = null, CancellationToken cancellationToken = default) =>
            AsyncEnumerable.Empty<byte[]>();

        public Task DeleteFilesAsync(IEnumerable<string> paths, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task MoveFilesAsync(IEnumerable<MoveEntry> entries, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task ReplaceContentsAsync(IEnumerable<ContentReplaceEntry> entries, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();

        public Task SetPermissionsAsync(IEnumerable<SetPermissionEntry> entries, CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();
    }

    private sealed class StubHealth : IExecdHealth
    {
        public Task<bool> PingAsync(CancellationToken cancellationToken = default) => Task.FromResult(true);
    }

    private sealed class StubMetrics : IExecdMetrics
    {
        public Task<SandboxMetrics> GetMetricsAsync(CancellationToken cancellationToken = default) =>
            throw new NotImplementedException();
    }
}


================================================
FILE: sdks/sandbox/csharp/tests/OpenSandbox.Tests/SandboxReadinessDiagnosticsTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using FluentAssertions;
using Moq;
using OpenSandbox.Config;
using OpenSandbox.Core;
using OpenSandbox.Factory;
using OpenSandbox.Models;
using OpenSandbox.Services;
using Xunit;

namespace OpenSandbox.Tests;

public class SandboxReadinessDiagnosticsTests
{
    [Fact]
    public async Task WaitUntilReadyAsync_WhenHealthCheckThrows_IncludesLastErrorAndConnectionContext()
    {
        // Arrange
        var healthMock = new Mock<IExecdHealth>();
        healthMock
            .Setup(x => x.PingAsync(It.IsAny<CancellationToken>()))
            .ThrowsAsync(new Exception("connect ECONNREFUSED 127.0.0.1:8080"));

        var sandbox = await CreateSandboxForReadinessTestAsync(healthMock, useServerProxy: false);

        // Act
        Func<Task> action = async () =>
            await sandbox.WaitUntilReadyAsync(new WaitUntilReadyOptions
            {
                ReadyTimeoutSeconds = 1,
                PollingIntervalMillis = 1
            });

        // Assert
        try
        {
            var ex = await action.Should().ThrowAsync<SandboxReadyTimeoutException>();
            ex.Which.Message.Should().Contain("Sandbox health check timed out");
            ex.Which.Message.Should().Contain("Last health check error");
            ex.Which.Message.Should().Contain("domain=localhost:8080");
            ex.Which.Message.Should().Contain("useServerProxy=False");
            ex.Which.Message.Should().Contain("useServerProxy=true");
            ex.Which.Message.Should().Contain("[docker].host_ip");
        }
        finally
        {
            await sandbox.DisposeAsync();
        }
    }

    [Fact]
    public async Task WaitUntilReadyAsync_WhenHealthCheckReturnsFalse_UsesFalseContinuouslyHint()
    {
        // Arrange
        var healthMock = new Mock<IExecdHealth>();
        healthMock
            .Setup(x => x.PingAsync(It.IsAny<CancellationToken>()))
            .ReturnsAsync(false);

        var sandbox = await CreateSandboxForReadinessTestAsync(healthMock, useServerProxy: true);

        // Act
        Func<Task> action = async () =>
            await sandbox.WaitUntilReadyAsync(new WaitUntilReadyOptions
            {
                ReadyTimeoutSeconds = 1,
                PollingIntervalMillis = 1
            });

        // Assert
        try
        {
            var ex = await action.Should().ThrowAsync<SandboxReadyTimeoutException>();
            ex.Which.Message.Should().Contain("Health check returned false continuously.");
            ex.Which.Message.Should().Contain("useServerProxy=True");
            ex.Which.Message.Should().NotContain("[docker].host_ip");
        }
        finally
        {
            await sandbox.DisposeAsync();
        }
    }

    private static async Task<Sandbox> CreateSandboxForReadinessTestAsync(
        Mock<IExecdHealth> healthMock,
        bool useServerProxy)
    {
        var sandboxesMock = new Mock<ISandboxes>();
        sandboxesMock
            .Setup(x => x.GetSandboxEndpointAsync(
                It.IsAny<string>(),
                It.IsAny<int>(),
                useServerProxy,
                It.IsAny<CancellationToken>()))
            .ReturnsAsync(new Endpoint
            {
                EndpointAddress = "127.0.0.1:44772",
                Headers = new Dictionary<string, string>()
            });

        var adapterFactoryMock = new Mock<IAdapterFactory>();
        adapterFactoryMock
            .Setup(x => x.CreateLifecycleStack(It.IsAny<CreateLifecycleStackOptions>()))
            .Returns(new LifecycleStack
            {
                Sandboxes = sandboxesMock.Object
            });

        adapterFactoryMock
            .Setup(x => x.CreateExecdStack(It.IsAny<CreateExecdStackOptions>()))
            .Returns(new ExecdStack
            {
                Commands = Mock.Of<IExecdCommands>(),
                Files = Mock.Of<ISandboxFiles>(),
                Health = healthMock.Object,
                Metrics = Mock.Of<IExecdMetrics>()
            });

        adapterFactoryMock
            .Setup(x => x.CreateEgressStack(It.IsAny<CreateEgressStackOptions>()))
            .Returns(new EgressStack
            {
                Egress = Mock.Of<IEgress>()
            });

        return await Sandbox.ConnectAsync(new SandboxConnectOptions
        {
            SandboxId = "sbx-ready-diagnostics",
            ConnectionConfig = new ConnectionConfig(new ConnectionConfigOptions
            {
                Domain = "localhost:8080",
                UseServerProxy = useServerProxy
            }),
            AdapterFactory = adapterFactoryMock.Object,
            SkipHealthCheck = true
        });
    }
}


================================================
FILE: sdks/sandbox/csharp/tests/OpenSandbox.Tests/SandboxesAdapterTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Net;
using System.Text;
using FluentAssertions;
using OpenSandbox.Adapters;
using OpenSandbox.Internal;
using OpenSandbox.Models;
using Xunit;

namespace OpenSandbox.Tests;

public class SandboxesAdapterTests
{
    [Fact]
    public async Task GetSandboxEndpointAsync_ShouldIncludeUseServerProxyQueryParam()
    {
        // Arrange
        var handler = new CaptureHandler();
        var client = new HttpClient(handler);
        var wrapper = new HttpClientWrapper(client, "http://localhost:8080/v1");
        var adapter = new SandboxesAdapter(wrapper);

        // Act
        _ = await adapter.GetSandboxEndpointAsync("sbx-1", 44772, useServerProxy: true);

        // Assert
        handler.LastRequestUri.Should().NotBeNull();
        handler.LastRequestUri!.PathAndQuery.Should().Contain("/sandboxes/sbx-1/endpoints/44772");
        handler.LastRequestUri!.Query.Should().Contain("use_server_proxy=true");
    }

    [Fact]
    public async Task GetSandboxEndpointAsync_ShouldDefaultUseServerProxyToFalse()
    {
        // Arrange
        var handler = new CaptureHandler();
        var client = new HttpClient(handler);
        var wrapper = new HttpClientWrapper(client, "http://localhost:8080/v1");
        var adapter = new SandboxesAdapter(wrapper);

        // Act
        _ = await adapter.GetSandboxEndpointAsync("sbx-2", 44772);

        // Assert
        handler.LastRequestUri.Should().NotBeNull();
        handler.LastRequestUri!.Query.Should().Contain("use_server_proxy=false");
    }

    [Fact]
    public async Task GetSandboxAsync_ShouldTreatMissingExpiresAtAsNull()
    {
        var payload = """
        {
          "id": "sbx-1",
          "image": { "uri": "python:3.11" },
          "entrypoint": ["python"],
          "status": { "state": "Running" },
          "createdAt": "2026-03-14T12:00:00Z"
        }
        """;
        var adapter = CreateAdapterWithJsonResponse(payload);

        SandboxInfo sandbox = await adapter.GetSandboxAsync("sbx-1");

        sandbox.ExpiresAt.Should().BeNull();
    }

    [Fact]
    public async Task CreateSandboxAsync_ShouldTreatMissingExpiresAtAsNull()
    {
        var payload = """
        {
          "id": "sbx-2",
          "status": { "state": "Pending" },
          "createdAt": "2026-03-14T12:00:00Z",
          "entrypoint": ["python"]
        }
        """;
        var adapter = CreateAdapterWithJsonResponse(payload);

        CreateSandboxResponse response = await adapter.CreateSandboxAsync(new CreateSandboxRequest
        {
            Image = new ImageSpec { Uri = "python:3.11" },
            ResourceLimits = new Dictionary<string, string>(),
            Entrypoint = new List<string> { "python" }
        });

        response.ExpiresAt.Should().BeNull();
    }

    private static SandboxesAdapter CreateAdapterWithJsonResponse(string payload)
    {
        var handler = new StaticJsonHandler(payload);
        var client = new HttpClient(handler);
        var wrapper = new HttpClientWrapper(client, "http://localhost:8080/v1");
        return new SandboxesAdapter(wrapper);
    }

    private sealed class CaptureHandler : HttpMessageHandler
    {
        public Uri? LastRequestUri { get; private set; }

        protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
        {
            LastRequestUri = request.RequestUri;
            var payload = "{\"endpoint\":\"example.internal:44772\",\"headers\":{}}";
            var response = new HttpResponseMessage(HttpStatusCode.OK)
            {
                Content = new StringContent(payload, Encoding.UTF8, "application/json")
            };
            return Task.FromResult(response);
        }
    }

    private sealed class StaticJsonHandler(string payload) : HttpMessageHandler
    {
        protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
        {
            var response = new HttpResponseMessage(HttpStatusCode.OK)
            {
                Content = new StringContent(payload, Encoding.UTF8, "application/json")
            };
            return Task.FromResult(response);
        }
    }
}


================================================
FILE: sdks/sandbox/csharp/tests/OpenSandbox.Tests/SseParserTests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Net;
using System.Text;
using FluentAssertions;
using OpenSandbox.Adapters;
using OpenSandbox.Core;
using OpenSandbox.Models;
using Xunit;

namespace OpenSandbox.Tests;

public class SseParserTests
{
    [Fact]
    public async Task ParseJsonEventStreamAsync_WithSseFormat_ShouldParseEvents()
    {
        // Arrange
        var sseContent = @"data: {""type"":""init"",""text"":""session-123""}

data: {""type"":""stdout"",""text"":""Hello World""}

data: {""type"":""execution_complete"",""execution_time"":100}

";
        var response = CreateMockResponse(HttpStatusCode.OK, sseContent);

        // Act
        var events = new List<ServerStreamEvent>();
        await foreach (var ev in SseParser.ParseJsonEventStreamAsync<ServerStreamEvent>(response))
        {
            events.Add(ev);
        }

        // Assert
        events.Should().HaveCount(3);
        events[0].Type.Should().Be("init");
        events[0].Text.Should().Be("session-123");
        events[1].Type.Should().Be("stdout");
        events[1].Text.Should().Be("Hello World");
        events[2].Type.Should().Be("execution_complete");
        events[2].ExecutionTime.Should().Be(100);
    }

    [Fact]
    public async Task ParseJsonEventStreamAsync_WithNdjsonFormat_ShouldParseEvents()
    {
        // Arrange
        var ndjsonContent = @"{""type"":""init"",""text"":""session-456""}
{""type"":""stderr"",""text"":""Error message""}
{""type"":""execution_complete"",""execution_time"":50}
";
        var response = CreateMockResponse(HttpStatusCode.OK, ndjsonContent);

        // Act
        var events = new List<ServerStreamEvent>();
        await foreach (var ev in SseParser.ParseJsonEventStreamAsync<ServerStreamEvent>(response))
        {
            events.Add(ev);
        }

        // Assert
        events.Should().HaveCount(3);
        events[0].Type.Should().Be("init");
        events[1].Type.Should().Be("stderr");
        events[1].Text.Should().Be("Error message");
        events[2].Type.Should().Be("execution_complete");
    }

    [Fact]
    public async Task ParseJsonEventStreamAsync_WithSseComments_ShouldSkipComments()
    {
        // Arrange
        var sseContent = @": this is a comment
data: {""type"":""stdout"",""text"":""output""}
: another comment
";
        var response = CreateMockResponse(HttpStatusCode.OK, sseContent);

        // Act
        var events = new List<ServerStreamEvent>();
        await foreach (var ev in SseParser.ParseJsonEventStreamAsync<ServerStreamEvent>(response))
        {
            events.Add(ev);
        }

        // Assert
        events.Should().HaveCount(1);
        events[0].Type.Should().Be("stdout");
    }

    [Fact]
    public async Task ParseJsonEventStreamAsync_WithSseMetadata_ShouldSkipMetadata()
    {
        // Arrange
        var sseContent = @"event: message
id: 123
retry: 5000
data: {""type"":""stdout"",""text"":""output""}
";
        var response = CreateMockResponse(HttpStatusCode.OK, sseContent);

        // Act
        var events = new List<ServerStreamEvent>();
        await foreach (var ev in SseParser.ParseJsonEventStreamAsync<ServerStreamEvent>(response))
        {
            events.Add(ev);
        }

        // Assert
        events.Should().HaveCount(1);
        events[0].Type.Should().Be("stdout");
    }

    [Fact]
    public async Task ParseJsonEventStreamAsync_WithEmptyLines_ShouldSkipEmptyLines()
    {
        // Arrange
        var sseContent = @"

data: {""type"":""stdout"",""text"":""output""}


";
        var response = CreateMockResponse(HttpStatusCode.OK, sseContent);

        // Act
        var events = new List<ServerStreamEvent>();
        await foreach (var ev in SseParser.ParseJsonEventStreamAsync<ServerStreamEvent>(response))
        {
            events.Add(ev);
        }

        // Assert
        events.Should().HaveCount(1);
    }

    [Fact]
    public async Task ParseJsonEventStreamAsync_WithInvalidJson_ShouldSkipInvalidLines()
    {
        // Arrange
        var sseContent = @"data: {""type"":""stdout"",""text"":""valid""}
data: not valid json
data: {""type"":""stderr"",""text"":""also valid""}
";
        var response = CreateMockResponse(HttpStatusCode.OK, sseContent);

        // Act
        var events = new List<ServerStreamEvent>();
        await foreach (var ev in SseParser.ParseJsonEventStreamAsync<ServerStreamEvent>(response))
        {
            events.Add(ev);
        }

        // Assert
        events.Should().HaveCount(2);
        events[0].Type.Should().Be("stdout");
        events[1].Type.Should().Be("stderr");
    }

    [Fact]
    public async Task ParseJsonEventStreamAsync_WithErrorResponse_ShouldThrowSandboxApiException()
    {
        // Arrange
        var errorContent = @"{""message"":""Not found"",""code"":""NOT_FOUND""}";
        var response = CreateMockResponse(HttpStatusCode.NotFound, errorContent);

        // Act & Assert
        var exception = await Assert.ThrowsAsync<SandboxApiException>(async () =>
        {
            await foreach (var _ in SseParser.ParseJsonEventStreamAsync<ServerStreamEvent>(response))
            {
                // Should not reach here
            }
        });

        exception.StatusCode.Should().Be(404);
        exception.Message.Should().Be("Not found");
        exception.Error.Code.Should().Be("NOT_FOUND");
    }

    [Fact]
    public async Task ParseJsonEventStreamAsync_WithErrorResponseNoJson_ShouldUseFallbackMessage()
    {
        // Arrange
        var response = CreateMockResponse(HttpStatusCode.InternalServerError, "Internal Server Error");

        // Act & Assert
        var exception = await Assert.ThrowsAsync<SandboxApiException>(async () =>
        {
            await foreach (var _ in SseParser.ParseJsonEventStreamAsync<ServerStreamEvent>(response, "Custom fallback"))
            {
                // Should not reach here
            }
        });

        exception.StatusCode.Should().Be(500);
        exception.Message.Should().Be("Custom fallback");
    }

    [Fact]
    public async Task ParseJsonEventStreamAsync_WithCancellation_ShouldStopParsing()
    {
        // Arrange
        var sseContent = @"data: {""type"":""stdout"",""text"":""line1""}
data: {""type"":""stdout"",""text"":""line2""}
data: {""type"":""stdout"",""text"":""line3""}
";
        var response = CreateMockResponse(HttpStatusCode.OK, sseContent);
        var cts = new CancellationTokenSource();

        // Act
        var events = new List<ServerStreamEvent>();
        await Assert.ThrowsAsync<OperationCanceledException>(async () =>
        {
            await foreach (var ev in SseParser.ParseJsonEventStreamAsync<ServerStreamEvent>(response, cancellationToken: cts.Token))
            {
                events.Add(ev);
                if (events.Count == 1)
                {
                    cts.Cancel();
                }
            }
        });

        // Assert
        events.Should().HaveCount(1);
    }

    private static HttpResponseMessage CreateMockResponse(HttpStatusCode statusCode, string content)
    {
        return new HttpResponseMessage(statusCode)
        {
            Content = new StringContent(content, Encoding.UTF8, "text/event-stream")
        };
    }
}


================================================
FILE: sdks/sandbox/javascript/.nvmrc
================================================
20


================================================
FILE: sdks/sandbox/javascript/README.md
================================================
# Alibaba Sandbox SDK for JavaScript/TypeScript

English | [中文](README_zh.md)

A TypeScript/JavaScript SDK for low-level interaction with OpenSandbox. It provides the ability to create, manage, and interact with secure sandbox environments, including executing shell commands, managing files, and reading resource metrics.

## Installation

### npm

```bash
npm install @alibaba-group/opensandbox
```

### pnpm

```bash
pnpm add @alibaba-group/opensandbox
```

### yarn

```bash
yarn add @alibaba-group/opensandbox
```

## Quick Start

The following example shows how to create a sandbox and execute a shell command.

> **Note**: Before running this example, ensure the OpenSandbox service is running. See the root [README.md](../../../README.md) for startup instructions.

```ts
import { ConnectionConfig, Sandbox, SandboxException } from "@alibaba-group/opensandbox";

const config = new ConnectionConfig({
  domain: "api.opensandbox.io",
  apiKey: "your-api-key",
  // protocol: "https",
  // requestTimeoutSeconds: 60,
});

try {
  const sandbox = await Sandbox.create({
    connectionConfig: config,
    image: "ubuntu",
    timeoutSeconds: 10 * 60,
  });

  const execution = await sandbox.commands.run("echo 'Hello Sandbox!'");
  console.log(execution.logs.stdout[0]?.text);

  // Optional but recommended: terminate the remote instance when you are done.
  await sandbox.kill();
  await sandbox.close();
} catch (err) {
  if (err instanceof SandboxException) {
    console.error(
      `Sandbox Error: [${err.error.code}] ${err.error.message ?? ""}`,
    );
    console.error(`Request ID: ${err.requestId ?? "N/A"}`);
  } else {
    console.error(err);
  }
}
```

## Usage Examples

### 1. Lifecycle Management

Manage the sandbox lifecycle, including renewal, pausing, and resuming.

```ts
const info = await sandbox.getInfo();
console.log("State:", info.status.state);
console.log("Created:", info.createdAt);
console.log("Expires:", info.expiresAt); // null when manual cleanup mode is used

await sandbox.pause();

// Resume returns a fresh, connected Sandbox instance.
const resumed = await sandbox.resume();

// Renew: expiresAt = now + timeoutSeconds
await resumed.renew(30 * 60);
```

Create a non-expiring sandbox by passing `timeoutSeconds: null`:

```ts
const manual = await Sandbox.create({
  connectionConfig: config,
  image: "ubuntu",
  timeoutSeconds: null,
});
```

### 2. Custom Health Check

Define custom logic to determine whether the sandbox is ready/healthy. This overrides the default ping check used during readiness checks.

```ts
const sandbox = await Sandbox.create({
  connectionConfig: config,
  image: "nginx:latest",
  healthCheck: async (sbx) => {
    // Example: consider the sandbox healthy when port 80 endpoint becomes available
    const ep = await sbx.getEndpoint(80);
    return !!ep.endpoint;
  },
});
```

### 3. Command Execution & Streaming

Execute commands and handle output streams in real-time.

```ts
import type { ExecutionHandlers } from "@alibaba-group/opensandbox";

const handlers: ExecutionHandlers = {
  onStdout: (m) => console.log("STDOUT:", m.text),
  onStderr: (m) => console.error("STDERR:", m.text),
  onExecutionComplete: (c) =>
    console.log("Finished in", c.executionTimeMs, "ms"),
};

await sandbox.commands.run(
  'for i in 1 2 3; do echo "Count $i"; sleep 0.2; done',
  undefined,
  handlers,
);
```

### 4. Comprehensive File Operations

Manage files and directories, including read, write, list/search, and delete.

```ts
await sandbox.files.createDirectories([{ path: "/tmp/demo", mode: 755 }]);

await sandbox.files.writeFiles([
  { path: "/tmp/demo/hello.txt", data: "Hello World", mode: 644 },
]);

const content = await sandbox.files.readFile("/tmp/demo/hello.txt");
console.log("Content:", content);

const files = await sandbox.files.search({
  path: "/tmp/demo",
  pattern: "*.txt",
});
console.log(files.map((f) => f.path));

await sandbox.files.deleteDirectories(["/tmp/demo"]);
```

### 5. Endpoints

`getEndpoint()` returns an endpoint **without a scheme** (for example `"localhost:44772"`). Use `getEndpointUrl()` if you want a ready-to-use absolute URL (for example `"http://localhost:44772"`).

```ts
const { endpoint } = await sandbox.getEndpoint(44772);
const url = await sandbox.getEndpointUrl(44772);
```

### 6. Sandbox Management (Admin)

Use `SandboxManager` for administrative tasks and finding existing sandboxes.

```ts
import { SandboxManager } from "@alibaba-group/opensandbox";

const manager = SandboxManager.create({ connectionConfig: config });
const list = await manager.listSandboxInfos({
  states: ["Running"],
  pageSize: 10,
});
console.log(list.items.map((s) => s.id));
await manager.close();
```

## Configuration

### 1. Connection Configuration

The `ConnectionConfig` class manages API server connection settings.

Runtime notes:

- In browsers, the SDK uses the global `fetch` implementation.
- In Node.js, every `Sandbox` and `SandboxManager` clones the base `ConnectionConfig` via `withTransportIfMissing()`, so each instance gets an isolated `undici` keep-alive pool. Call `sandbox.close()` or `manager.close()` when you are done so the SDK can release the associated agent.

| Parameter               | Description                                                                                                  | Default          | Environment Variable   |
| ----------------------- | ------------------------------------------------------------------------------------------------------------ | ---------------- | ---------------------- |
| `apiKey`                | API key for authentication                                                                                   | Optional         | `OPEN_SANDBOX_API_KEY` |
| `domain`                | Sandbox service domain (`host[:port]`)                                                                       | `localhost:8080` | `OPEN_SANDBOX_DOMAIN`  |
| `protocol`              | HTTP protocol (`http`/`https`)                                                                               | `http`           | -                      |
| `requestTimeoutSeconds` | Request timeout applied to SDK HTTP calls                                                                    | `30`             | -                      |
| `debug`                 | Enable basic HTTP debug logging                                                                              | `false`          | -                      |
| `headers`               | Extra headers applied to every request                                                                       | `{}`             | -                      |
| `useServerProxy`        | Use sandbox server as proxy for execd/endpoint requests (e.g. when client cannot reach the sandbox directly) | `false`          | -                      |

```ts
import { ConnectionConfig } from "@alibaba-group/opensandbox";

// 1. Basic configuration
const config = new ConnectionConfig({
  domain: "api.opensandbox.io",
  apiKey: "your-key",
  requestTimeoutSeconds: 60,
});

// 2. Advanced: custom headers
const config2 = new ConnectionConfig({
  domain: "api.opensandbox.io",
  apiKey: "your-key",
  headers: { "X-Custom-Header": "value" },
});
```

### 2. Sandbox Creation Configuration

`Sandbox.create()` allows configuring the sandbox environment.

| Parameter                    | Description                                      | Default                      |
| ---------------------------- | ------------------------------------------------ | ---------------------------- |
| `image`                      | Docker image to use                              | Required                     |
| `timeoutSeconds`             | Automatic termination timeout (server-side TTL)  | 10 minutes                   |
| `entrypoint`                 | Container entrypoint command                     | `["tail","-f","/dev/null"]`  |
| `resource`                   | CPU and memory limits (string map)               | `{"cpu":"1","memory":"2Gi"}` |
| `env`                        | Environment variables                            | `{}`                         |
| `metadata`                   | Custom metadata tags                             | `{}`                         |
| `networkPolicy`              | Optional outbound network policy (egress)        | -                            |
| `extensions`                 | Extra server-defined fields                      | `{}`                         |
| `skipHealthCheck`            | Skip readiness checks (`Running` + health check) | `false`                      |
| `healthCheck`                | Custom readiness check                           | -                            |
| `readyTimeoutSeconds`        | Max time to wait for readiness                   | 30 seconds                   |
| `healthCheckPollingInterval` | Poll interval while waiting (milliseconds)       | 200 ms                       |

Note: metadata keys under `opensandbox.io/` are reserved for system-managed
labels and will be rejected by the server.

```ts
const sandbox = await Sandbox.create({
  connectionConfig: config,
  image: "python:3.11",
  networkPolicy: {
    defaultAction: "deny",
    egress: [{ action: "allow", target: "pypi.org" }],
  },
});
```

### 3. Runtime Egress Policy Updates

Runtime egress reads and patches go directly to the sandbox egress sidecar.
The SDK first resolves the sandbox endpoint on port `18080`, then calls the sidecar `/policy` API.

Patch uses merge semantics:
- Incoming rules take priority over existing rules with the same `target`.
- Existing rules for other targets remain unchanged.
- Within a single patch payload, the first rule for a `target` wins.
- The current `defaultAction` is preserved.

```ts
const policy = await sandbox.getEgressPolicy();

await sandbox.patchEgressRules([
  { action: "allow", target: "www.github.com" },
  { action: "deny", target: "pypi.org" },
]);
```

### 4. Resource cleanup

Both `Sandbox` and `SandboxManager` own a scoped HTTP agent when running on Node.js
so you can safely reuse the same `ConnectionConfig`. Once you are finished interacting
with the sandbox or administration APIs, call `sandbox.close()` / `manager.close()` to
release the underlying agent.

## Browser Notes

- The SDK can run in browsers, but **streaming file uploads are Node-only**.
- If you pass `ReadableStream` or `AsyncIterable` for `writeFiles`, the browser will fall back to **buffering in memory** before upload.
- Reason: browsers do not support streaming `multipart/form-data` bodies with custom boundaries (required by the execd upload API).


================================================
FILE: sdks/sandbox/javascript/README_zh.md
================================================
# Alibaba Sandbox JavaScript/TypeScript SDK

中文 | [English](README.md)

用于与 OpenSandbox 进行底层交互的 TypeScript/JavaScript SDK。它提供了创建、管理和与安全沙箱环境交互的能力，包括执行 Shell 命令、管理文件以及读取资源指标等。

## 安装指南

### npm

```bash
npm install @alibaba-group/opensandbox
```

### pnpm

```bash
pnpm add @alibaba-group/opensandbox
```

### yarn

```bash
yarn add @alibaba-group/opensandbox
```

## 快速开始

以下示例展示了如何创建一个沙箱并执行 Shell 命令。

> **注意**: 在运行此示例之前，请确保 OpenSandbox 服务已启动。服务启动请参考根目录的 [README_zh.md](../../../docs/README_zh.md)。

```ts
import { ConnectionConfig, Sandbox, SandboxException } from "@alibaba-group/opensandbox";

const config = new ConnectionConfig({
  domain: "api.opensandbox.io",
  apiKey: "your-api-key",
  // protocol: "https",
  // requestTimeoutSeconds: 60,
});

try {
  const sandbox = await Sandbox.create({
    connectionConfig: config,
    image: "ubuntu",
    timeoutSeconds: 10 * 60,
  });

  const execution = await sandbox.commands.run("echo 'Hello Sandbox!'");
  console.log(execution.logs.stdout[0]?.text);

  await sandbox.kill();
  await sandbox.close();
} catch (err) {
  if (err instanceof SandboxException) {
    console.error(`沙箱错误: [${err.error.code}] ${err.error.message ?? ""}`);
    console.error(`Request ID: ${err.requestId ?? "N/A"}`);
  } else {
    console.error(err);
  }
}
```

## 核心功能示例

### 1. 生命周期管理

管理沙箱的生命周期，包括续期、暂停、恢复和状态查询。

```ts
const info = await sandbox.getInfo();
console.log("状态:", info.status.state);
console.log("创建时间:", info.createdAt);
console.log("过期时间:", info.expiresAt);

await sandbox.pause();

// resume 会返回新的、已连接的 Sandbox 实例
const resumed = await sandbox.resume();

// renew：expiresAt = now + timeoutSeconds
await resumed.renew(30 * 60);

// 获取当前状态
const info = await resumed.getInfo();
console.log("状态:", info.status.state);
console.log("过期时间:", info.expiresAt); // 使用手动清理模式时为 null
```

通过传入 `timeoutSeconds: null` 创建一个不会自动过期的沙箱：

```ts
const manual = await Sandbox.create({
  connectionConfig: config,
  image: "ubuntu",
  timeoutSeconds: null,
});
```

### 2. 自定义健康检查

定义自定义逻辑来判断沙箱是否就绪/健康。这会覆盖“就绪检测”默认使用的 ping 检查逻辑。

```ts
const sandbox = await Sandbox.create({
  connectionConfig: config,
  image: "nginx:latest",
  healthCheck: async (sbx) => {
    // 示例：当 80 端口 endpoint 可获取时认为沙箱可用
    const ep = await sbx.getEndpoint(80);
    return !!ep.endpoint;
  },
});
```

### 3. 命令执行与流式响应

执行命令并实时处理输出流。

```ts
import type { ExecutionHandlers } from "@alibaba-group/opensandbox";

const handlers: ExecutionHandlers = {
  onStdout: (m) => console.log("STDOUT:", m.text),
  onStderr: (m) => console.error("STDERR:", m.text),
  onExecutionComplete: (c) => console.log("耗时(ms):", c.executionTimeMs),
};

await sandbox.commands.run(
  'for i in 1 2 3; do echo "Count $i"; sleep 0.2; done',
  undefined,
  handlers,
);
```

### 4. 全面的文件操作

管理文件和目录，包括读写、列表/搜索与删除。

```ts
await sandbox.files.createDirectories([{ path: "/tmp/demo", mode: 755 }]);

await sandbox.files.writeFiles([
  { path: "/tmp/demo/hello.txt", data: "Hello World", mode: 644 },
]);

const content = await sandbox.files.readFile("/tmp/demo/hello.txt");
console.log("文件内容:", content);

const files = await sandbox.files.search({
  path: "/tmp/demo",
  pattern: "*.txt",
});
console.log(files.map((f) => f.path));

await sandbox.files.deleteDirectories(["/tmp/demo"]);
```

### 5. Endpoint

`getEndpoint()` 返回 **不带 scheme** 的 endpoint（例如 `"localhost:44772"`）。如果你希望直接得到可用的绝对 URL（例如 `"http://localhost:44772"`），请使用 `getEndpointUrl()`。

```ts
const { endpoint } = await sandbox.getEndpoint(44772);
const url = await sandbox.getEndpointUrl(44772);
```

### 6. 沙箱管理（Admin）

使用 `SandboxManager` 进行管理操作，如查询现有沙箱列表。

```ts
import { SandboxManager } from "@alibaba-group/opensandbox";

const manager = SandboxManager.create({ connectionConfig: config });
const list = await manager.listSandboxInfos({
  states: ["Running"],
  pageSize: 10,
});
console.log(list.items.map((s) => s.id));
```

## 配置说明

### 1. 连接配置 (Connection Configuration)

`ConnectionConfig` 类管理与 API 服务器的连接设置。

运行环境说明：

- 浏览器环境下，SDK 使用全局 `fetch`。
- Node.js 环境下，每个 `Sandbox` 和 `SandboxManager` 都会通过 `ConnectionConfig.withTransportIfMissing()` 创建独立的 keep-alive 池（基于 `undici`）。完成交互后请调用 `sandbox.close()` 或 `manager.close()` 来释放对应的 agent，以避免遗留连接，这与 Python SDK 的 transport 生命周期一致。

| 参数                    | 描述                                                                      | 默认值           | 环境变量               |
| ----------------------- | ------------------------------------------------------------------------- | ---------------- | ---------------------- |
| `apiKey`                | 用于认证的 API Key                                                        | 可选             | `OPEN_SANDBOX_API_KEY` |
| `domain`                | 沙箱服务域名（`host[:port]`）                                             | `localhost:8080` | `OPEN_SANDBOX_DOMAIN`  |
| `protocol`              | HTTP 协议（`http`/`https`）                                               | `http`           | -                      |
| `requestTimeoutSeconds` | SDK HTTP 请求超时（秒）                                                   | `30`             | -                      |
| `debug`                 | 是否开启基础 HTTP 调试日志                                                | `false`          | -                      |
| `headers`               | 每次请求附加的 Header                                                     | `{}`             | -                      |
| `useServerProxy`        | 是否通过沙箱服务代理访问 execd/endpoint（适用于客户端无法直连沙箱的场景） | `false`          | -                      |

```ts
import { ConnectionConfig } from "@alibaba-group/opensandbox";

// 1. 基础配置
const config = new ConnectionConfig({
  domain: "api.opensandbox.io",
  apiKey: "your-key",
  requestTimeoutSeconds: 60,
});

// 2. 进阶配置：自定义 headers
const config2 = new ConnectionConfig({
  domain: "api.opensandbox.io",
  apiKey: "your-key",
  headers: { "X-Custom-Header": "value" },
});
```

### 2. 沙箱创建配置 (Sandbox Creation Configuration)

`Sandbox.create()` 用于配置沙箱环境。

| 参数                         | 描述                                 | 默认值                       |
| ---------------------------- | ------------------------------------ | ---------------------------- |
| `image`                      | 使用的 Docker 镜像                   | 必填                         |
| `timeoutSeconds`             | 自动终止超时时间（服务端 TTL）       | 10 分钟                      |
| `entrypoint`                 | 容器启动入口命令                     | `["tail","-f","/dev/null"]`  |
| `resource`                   | CPU/内存限制（字符串 map）           | `{"cpu":"1","memory":"2Gi"}` |
| `env`                        | 环境变量                             | `{}`                         |
| `metadata`                   | 自定义元数据标签                     | `{}`                         |
| `networkPolicy`              | 可选的出站网络策略（egress）         | -                            |
| `extensions`                 | 额外的服务端扩展字段                 | `{}`                         |
| `skipHealthCheck`            | 跳过就绪检测（`Running` + 健康检查） | `false`                      |
| `healthCheck`                | 自定义就绪检查                       | -                            |
| `readyTimeoutSeconds`        | 等待就绪最大时间                     | 30 秒                        |
| `healthCheckPollingInterval` | 就绪轮询间隔（毫秒）                 | 200 ms                       |

注意：`opensandbox.io/` 前缀下的 metadata key 属于系统保留标签，服务端会拒绝用户传入。

```ts
const sandbox = await Sandbox.create({
  connectionConfig: config,
  image: "python:3.11",
  networkPolicy: {
    defaultAction: "deny",
    egress: [{ action: "allow", target: "pypi.org" }],
  },
});
```

### 3. 运行时 Egress 策略更新

运行时的 egress 查询和 patch 会直接访问沙箱内的 egress sidecar。
SDK 会先解析 `18080` 端口对应的 sandbox endpoint，再调用 sidecar 的 `/policy` API。

```ts
const policy = await sandbox.getEgressPolicy();

await sandbox.patchEgressRules([
  { action: "allow", target: "www.github.com" },
  { action: "deny", target: "pypi.org" },
]);
```

### 4. 资源清理

在 Node.js 环境下，`Sandbox` 和 `SandboxManager` 会拥有各自的 HTTP agent，因此即使多个实例共享同一个 `ConnectionConfig` 也不会互相影响。SDK 会借助 `ConnectionConfig.withTransportIfMissing()` 复刻每个实例的 transport。完成使用后调用 `sandbox.close()` / `manager.close()` 来释放底层连接池；

## 浏览器注意事项

- SDK 可在浏览器运行，但**流式文件上传仅支持 Node**。
- 如果 `writeFiles` 传入 `ReadableStream` 或 `AsyncIterable`，浏览器会回退为**先缓存在内存，再上传**。
- 原因：浏览器不支持以自定义 boundary 的 `multipart/form-data` 流式请求体（execd 上传接口需要此能力）。


================================================
FILE: sdks/sandbox/javascript/eslint.config.mjs
================================================
import path from "node:path";
import { fileURLToPath } from "node:url";
import { createBaseConfig } from "../../eslint.base.mjs";

const __dirname = path.dirname(fileURLToPath(import.meta.url));

export default createBaseConfig({
  tsconfigRootDir: __dirname,
  tsconfigPath: "./tsconfig.json",
  extraIgnores: ["src/api/**", "src/**/*.d.ts", "src/**/*.js"],
  includeScripts: true,
});

================================================
FILE: sdks/sandbox/javascript/package.json
================================================
{
  "name": "@alibaba-group/opensandbox",
  "version": "0.1.5",
  "description": "OpenSandbox TypeScript/JavaScript SDK (sandbox lifecycle + execd APIs)",
  "license": "Apache-2.0",
  "type": "module",
  "main": "./dist/index.js",
  "types": "./dist/index.d.ts",
  "exports": {
    ".": {
      "types": "./dist/index.d.ts",
      "import": "./dist/index.js",
      "require": "./dist/cjs/index.cjs",
      "default": "./dist/index.js"
    },
    "./internal": {
      "types": "./dist/internal.d.ts",
      "import": "./dist/internal.js",
      "require": "./dist/cjs/internal.cjs",
      "default": "./dist/internal.js"
    }
  },
  "browser": "./dist/index.js",
  "sideEffects": false,
  "repository": {
    "type": "git",
    "url": "https://github.com/alibaba/OpenSandbox.git"
  },
  "bugs": {
    "url": "https://github.com/alibaba/OpenSandbox/issues"
  },
  "homepage": "https://open-sandbox.ai",
  "files": [
    "dist",
    "src"
  ],
  "engines": {
    "node": ">=20"
  },
  "packageManager": "pnpm@9.15.0",
  "scripts": {
    "gen:api": "node ./scripts/generate-api.mjs",
    "build": "pnpm run gen:api && tsup",
    "test": "pnpm run build && node --test tests/*.test.mjs",
    "lint": "eslint src scripts --max-warnings 0",
    "clean": "rm -rf dist"
  },
  "dependencies": {
    "openapi-fetch": "^0.14.1",
    "undici": "^7.18.2"
  },
  "devDependencies": {
    "@eslint/js": "^9.39.2",
    "eslint": "^9.39.2",
    "globals": "^17.0.0",
    "openapi-typescript": "^7.9.1",
    "tsup": "^8.5.0",
    "typescript": "^5.7.2",
    "typescript-eslint": "^8.52.0"
  }
}


================================================
FILE: sdks/sandbox/javascript/scripts/generate-api.mjs
================================================
#!/usr/bin/env node

// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { spawnSync } from "node:child_process";
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
import path from "node:path";
import process from "node:process";
import { fileURLToPath } from "node:url";

const LICENSE_OWNER = "Alibaba Group Holding Ltd.";
const LICENSE_MARKER_REGEX = new RegExp(`Copyright [0-9]{4} ${LICENSE_OWNER}`);

function buildLicenseText() {
  const year = new Date().getFullYear();
  return `Copyright ${year} ${LICENSE_OWNER}.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.`;
}

function asLineCommentHeader(text) {
  return text
    .split("\n")
    .map((line) => `// ${line}`)
    .join("\n");
}

function ensureLicenseHeader(filePath) {
  const body = readFileSync(filePath, "utf8");
  const head = body.split("\n").slice(0, 40).join("\n");
  if (LICENSE_MARKER_REGEX.test(head)) {
    return;
  }
  const header = asLineCommentHeader(buildLicenseText());
  writeFileSync(filePath, `${header}\n\n${body}`, "utf8");
}

function fail(message) {
  console.error(`❌ ${message}`);
  process.exit(1);
}

function run(cmd, args, cwd) {
  const pretty = [cmd, ...args].join(" ");
  console.log(`\n▶ ${pretty}`);
  const res = spawnSync(cmd, args, { cwd, stdio: "inherit" });
  if (res.status !== 0) {
    fail(`Command failed (exit=${res.status}): ${pretty}`);
  }
}

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

// scripts/ -> package root
const packageRoot = path.resolve(__dirname, "..");
// scripts/ -> repo root (OpenSandbox/)
const repoRoot = path.resolve(__dirname, "../../../../");

const specs = {
  execd: path.join(repoRoot, "specs", "execd-api.yaml"),
  egress: path.join(repoRoot, "specs", "egress-api.yaml"),
  lifecycle: path.join(repoRoot, "specs", "sandbox-lifecycle.yml"),
};

for (const [name, p] of Object.entries(specs)) {
  if (!existsSync(p)) {
    fail(`OpenAPI spec not found for '${name}': ${p}`);
  }
}

const outDir = path.join(packageRoot, "src", "api");
mkdirSync(outDir, { recursive: true });

const outFiles = {
  execd: path.join(outDir, "execd.ts"),
  egress: path.join(outDir, "egress.ts"),
  lifecycle: path.join(outDir, "lifecycle.ts"),
};

console.log("🚀 OpenSandbox TypeScript SDK API Generator");
console.log(`- repoRoot: ${repoRoot}`);
console.log(`- outDir:   ${outDir}`);

// Use pnpm as requested by the project rules.
run("pnpm", ["exec", "openapi-typescript", specs.execd, "-o", outFiles.execd], packageRoot);
run("pnpm", ["exec", "openapi-typescript", specs.egress, "-o", outFiles.egress], packageRoot);
run(
  "pnpm",
  ["exec", "openapi-typescript", specs.lifecycle, "-o", outFiles.lifecycle],
  packageRoot,
);

// The generator may overwrite outputs; re-apply unified license headers after generation.
ensureLicenseHeader(outFiles.execd);
ensureLicenseHeader(outFiles.egress);
ensureLicenseHeader(outFiles.lifecycle);

console.log("\n✅ API type generation completed:");
console.log(`- ${path.relative(packageRoot, outFiles.execd)}`);
console.log(`- ${path.relative(packageRoot, outFiles.egress)}`);
console.log(`- ${path.relative(packageRoot, outFiles.lifecycle)}`);


================================================
FILE: sdks/sandbox/javascript/src/adapters/commandsAdapter.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { ExecdClient } from "../openapi/execdClient.js";
import { throwOnOpenApiFetchError } from "./openapiError.js";
import { parseJsonEventStream } from "./sse.js";
import type { paths as ExecdPaths } from "../api/execd.js";
import type {
  CommandExecution,
  CommandLogs,
  CommandStatus,
  RunCommandOpts,
  ServerStreamEvent,
} from "../models/execd.js";
import type { ExecdCommands } from "../services/execdCommands.js";
import type { ExecutionHandlers } from "../models/execution.js";
import { ExecutionEventDispatcher } from "../models/executionEventDispatcher.js";

function joinUrl(baseUrl: string, pathname: string): string {
  const base = baseUrl.endsWith("/") ? baseUrl.slice(0, -1) : baseUrl;
  const path = pathname.startsWith("/") ? pathname : `/${pathname}`;
  return `${base}${path}`;
}

/** Request body for POST /command (from generated spec; includes uid, gid, envs). */
type ApiRunCommandRequest =
  ExecdPaths["/command"]["post"]["requestBody"]["content"]["application/json"];
type ApiCommandStatusOk =
  ExecdPaths["/command/status/{id}"]["get"]["responses"][200]["content"]["application/json"];
type ApiCommandLogsOk =
  ExecdPaths["/command/{id}/logs"]["get"]["responses"][200]["content"]["text/plain"];

function toRunCommandRequest(command: string, opts?: RunCommandOpts): ApiRunCommandRequest {
  if (opts?.gid != null && opts.uid == null) {
    throw new Error("uid is required when gid is provided");
  }

  const body: ApiRunCommandRequest = {
    command,
    cwd: opts?.workingDirectory,
    background: !!opts?.background,
  };
  if (opts?.timeoutSeconds != null) {
    body.timeout = Math.round(opts.timeoutSeconds * 1000);
  }
  if (opts?.uid != null) {
    body.uid = opts.uid;
  }
  if (opts?.gid != null) {
    body.gid = opts.gid;
  }
  if (opts?.envs != null) {
    body.envs = opts.envs;
  }
  return body;
}

function parseOptionalDate(value: unknown, field: string): Date | undefined {
  if (value == null) return undefined;
  if (value instanceof Date) return value;
  if (typeof value !== "string") {
    throw new Error(`Invalid ${field}: expected ISO string, got ${typeof value}`);
  }
  const parsed = new Date(value);
  if (Number.isNaN(parsed.getTime())) {
    throw new Error(`Invalid ${field}: ${value}`);
  }
  return parsed;
}

export interface CommandsAdapterOptions {
  /**
   * Must match the baseUrl used by the ExecdClient.
   */
  baseUrl: string;
  fetch?: typeof fetch;
  headers?: Record<string, string>;
}

export class CommandsAdapter implements ExecdCommands {
  private readonly fetch: typeof fetch;

  constructor(
    private readonly client: ExecdClient,
    private readonly opts: CommandsAdapterOptions,
  ) {
    this.fetch = opts.fetch ?? fetch;
  }

  async interrupt(sessionId: string): Promise<void> {
    const { error, response } = await this.client.DELETE("/command", {
      params: { query: { id: sessionId } },
    });
    throwOnOpenApiFetchError({ error, response }, "Interrupt command failed");
  }

  async getCommandStatus(commandId: string): Promise<CommandStatus> {
    const { data, error, response } = await this.client.GET("/command/status/{id}", {
      params: { path: { id: commandId } },
    });
    throwOnOpenApiFetchError({ error, response }, "Get command status failed");
    const ok = data as ApiCommandStatusOk | undefined;
    if (!ok || typeof ok !== "object") {
      throw new Error("Get command status failed: unexpected response shape");
    }
    return {
      id: ok.id,
      content: ok.content,
      running: ok.running,
      exitCode: ok.exit_code ?? null,
      error: ok.error,
      startedAt: parseOptionalDate(ok.started_at, "startedAt"),
      finishedAt: parseOptionalDate(ok.finished_at, "finishedAt") ?? null,
    };
  }

  async getBackgroundCommandLogs(commandId: string, cursor?: number): Promise<CommandLogs> {
    const { data, error, response } = await this.client.GET("/command/{id}/logs", {
      params: { path: { id: commandId }, query: cursor == null ? {} : { cursor } },
      parseAs: "text",
    });
    throwOnOpenApiFetchError({ error, response }, "Get command logs failed");
    const ok = data as ApiCommandLogsOk | undefined;
    if (typeof ok !== "string") {
      throw new Error("Get command logs failed: unexpected response shape");
    }
    const cursorHeader = response.headers.get("EXECD-COMMANDS-TAIL-CURSOR");
    const parsedCursor = (cursorHeader != null && cursorHeader !== "") ? Number(cursorHeader) : undefined;
    return {
      content: ok,
      cursor: Number.isFinite(parsedCursor ?? NaN) ? parsedCursor : undefined,
    };
  }

  async *runStream(
    command: string,
    opts?: RunCommandOpts,
    signal?: AbortSignal,
  ): AsyncIterable<ServerStreamEvent> {
    const url = joinUrl(this.opts.baseUrl, "/command");
    const body = JSON.stringify(toRunCommandRequest(command, opts));

    const res = await this.fetch(url, {
      method: "POST",
      headers: {
        "accept": "text/event-stream",
        "content-type": "application/json",
        ...(this.opts.headers ?? {}),
      },
      body,
      signal,
    });

    for await (const ev of parseJsonEventStream<ServerStreamEvent>(res, { fallbackErrorMessage: "Run command failed" })) {
      yield ev;
    }
  }

  async run(
    command: string,
    opts?: RunCommandOpts,
    handlers?: ExecutionHandlers,
    signal?: AbortSignal,
  ): Promise<CommandExecution> {
    const execution: CommandExecution = {
      logs: { stdout: [], stderr: [] },
      result: [],
    };
    const dispatcher = new ExecutionEventDispatcher(execution, handlers);
    for await (const ev of this.runStream(command, opts, signal)) {
      // Keep legacy behavior: if server sends "init" with empty id, preserve previous id.
      if (ev.type === "init" && (ev.text ?? "") === "" && execution.id) {
        (ev as any).text = execution.id;
      }
      await dispatcher.dispatch(ev as any);
    }

    return execution;
  }
}

================================================
FILE: sdks/sandbox/javascript/src/adapters/egressAdapter.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { EgressClient } from "../openapi/egressClient.js";
import { throwOnOpenApiFetchError } from "./openapiError.js";
import type { paths as EgressPaths } from "../api/egress.js";
import type { NetworkPolicy, NetworkRule } from "../models/sandboxes.js";
import type { Egress } from "../services/egress.js";

type ApiGetPolicyOk =
  EgressPaths["/policy"]["get"]["responses"][200]["content"]["application/json"];
type ApiPatchRulesRequest =
  EgressPaths["/policy"]["patch"]["requestBody"]["content"]["application/json"];

export class EgressAdapter implements Egress {
  constructor(private readonly client: EgressClient) {}

  async getPolicy(): Promise<NetworkPolicy> {
    const { data, error, response } = await this.client.GET("/policy");
    throwOnOpenApiFetchError({ error, response }, "Get sandbox egress policy failed");
    const raw = data as ApiGetPolicyOk | undefined;
    if (!raw || typeof raw !== "object" || !raw.policy || typeof raw.policy !== "object") {
      throw new Error("Get sandbox egress policy failed: unexpected response shape");
    }
    return raw.policy as NetworkPolicy;
  }

  async patchRules(rules: NetworkRule[]): Promise<void> {
    const body: ApiPatchRulesRequest = rules as unknown as ApiPatchRulesRequest;
    const { error, response } = await this.client.PATCH("/policy", {
      body,
    });
    throwOnOpenApiFetchError({ error, response }, "Patch sandbox egress rules failed");
  }
}


================================================
FILE: sdks/sandbox/javascript/src/adapters/filesystemAdapter.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { ExecdClient } from "../openapi/execdClient.js";
import { throwOnOpenApiFetchError } from "./openapiError.js";
import type { SandboxFiles } from "../services/filesystem.js";
import type { paths as ExecdPaths } from "../api/execd.js";
import type {
  ContentReplaceEntry,
  FileInfo,
  FileMetadata,
  FilesInfoResponse,
  MoveEntry,
  Permission,
  RenameFileItem,
  ReplaceFileContentItem,
  SearchEntry,
  SearchFilesResponse,
  SetPermissionEntry,
  WriteEntry,
} from "../models/filesystem.js";
import { SandboxApiException, SandboxError } from "../core/exceptions.js";

function joinUrl(baseUrl: string, pathname: string): string {
  const base = baseUrl.endsWith("/") ? baseUrl.slice(0, -1) : baseUrl;
  const path = pathname.startsWith("/") ? pathname : `/${pathname}`;
  return `${base}${path}`;
}

function toUploadBlob(data: Blob | Uint8Array | ArrayBuffer | string): Blob {
  if (typeof data === "string") return new Blob([data]);
  if (data instanceof Blob) return data;
  if (data instanceof ArrayBuffer) return new Blob([data]);
  // Copy into a new Uint8Array backed by ArrayBuffer (not SharedArrayBuffer)
  const copied = Uint8Array.from(data);
  return new Blob([copied.buffer]);
}

function isReadableStream(v: unknown): v is ReadableStream<Uint8Array> {
  return !!v && typeof (v as any).getReader === "function";
}

function isAsyncIterable(v: unknown): v is AsyncIterable<Uint8Array> {
  return !!v && typeof (v as any)[Symbol.asyncIterator] === "function";
}

function isNodeRuntime(): boolean {
  const p = (globalThis as any)?.process;
  return !!(p?.versions?.node);
}

async function collectBytes(
  source: ReadableStream<Uint8Array> | AsyncIterable<Uint8Array>
): Promise<Uint8Array> {
  const chunks: Uint8Array[] = [];
  let total = 0;

  if (isReadableStream(source)) {
    const reader = source.getReader();
    try {
      while (true) {
        const { done, value } = await reader.read();
        if (done) break;
        if (value) {
          chunks.push(value);
          total += value.length;
        }
      }
    } finally {
      reader.releaseLock();
    }
  } else {
    for await (const chunk of source) {
      chunks.push(chunk);
      total += chunk.length;
    }
  }

  const out = new Uint8Array(total);
  let offset = 0;
  for (const chunk of chunks) {
    out.set(chunk, offset);
    offset += chunk.length;
  }
  return out;
}

function toReadableStream(
  it: AsyncIterable<Uint8Array>
): ReadableStream<Uint8Array> {
  const RS: any = ReadableStream as any;
  if (typeof RS?.from === "function") return RS.from(it);
  const iterator = it[Symbol.asyncIterator]();
  return new ReadableStream<Uint8Array>({
    async pull(controller) {
      const r = await iterator.next();
      if (r.done) {
        controller.close();
        return;
      }
      controller.enqueue(r.value);
    },
    async cancel() {
      await iterator.return?.();
    },
  });
}

function basename(p: string): string {
  const parts = p.split("/").filter(Boolean);
  return parts.length ? parts[parts.length - 1] : "file";
}

function encodeUtf8(s: string): Uint8Array {
  return new TextEncoder().encode(s);
}

async function* multipartUploadBody(opts: {
  boundary: string;
  metadataJson: string;
  fileName: string;
  fileContentType: string;
  file: ReadableStream<Uint8Array> | AsyncIterable<Uint8Array>;
}): AsyncIterable<Uint8Array> {
  const b = opts.boundary;

  // Part 1: metadata (application/json)
  yield encodeUtf8(`--${b}\r\n`);
  yield encodeUtf8(
    `Content-Disposition: form-data; name="metadata"; filename="metadata"\r\n`
  );
  yield encodeUtf8(`Content-Type: application/json\r\n\r\n`);
  yield encodeUtf8(opts.metadataJson);
  yield encodeUtf8(`\r\n`);

  // Part 2: file
  yield encodeUtf8(`--${b}\r\n`);
  yield encodeUtf8(
    `Content-Disposition: form-data; name="file"; filename="${opts.fileName}"\r\n`
  );
  yield encodeUtf8(`Content-Type: ${opts.fileContentType}\r\n\r\n`);

  if (isReadableStream(opts.file)) {
    const reader = opts.file.getReader();
    try {
      while (true) {
        const { done, value } = await reader.read();
        if (done) break;
        if (value) yield value;
      }
    } finally {
      reader.releaseLock();
    }
  } else {
    for await (const chunk of opts.file) {
      yield chunk;
    }
  }

  yield encodeUtf8(`\r\n--${b}--\r\n`);
}

export interface FilesystemAdapterOptions {
  /**
   * Must match the baseUrl used by the ExecdClient, used for binary endpoints
   * like download/upload where we bypass JSON parsing.
   */
  baseUrl: string;
  fetch?: typeof fetch;
  headers?: Record<string, string>;
}

function toPermission(e: {
  mode?: number;
  owner?: string;
  group?: string;
}): Permission {
  return {
    mode: e.mode ?? 755,
    owner: e.owner,
    group: e.group,
  } as Permission;
}

/**
 * Filesystem adapter that exposes user-facing file APIs (`sandbox.files`).
 *
 * This adapter owns all request/response conversions:
 * - Maps friendly method shapes to API payloads
 * - Parses timestamps into `Date`
 * - Implements streaming upload/download helpers
 */
export class FilesystemAdapter implements SandboxFiles {
  private readonly fetch: typeof fetch;

  private static readonly Api = {
    // This is intentionally derived from OpenAPI schema types so API changes surface quickly.
    SearchFilesOk:
      null as unknown as ExecdPaths["/files/search"]["get"]["responses"][200]["content"]["application/json"],
    FilesInfoOk:
      null as unknown as ExecdPaths["/files/info"]["get"]["responses"][200]["content"]["application/json"],
    MakeDirsRequest:
      null as unknown as ExecdPaths["/directories"]["post"]["requestBody"]["content"]["application/json"],
    SetPermissionsRequest:
      null as unknown as ExecdPaths["/files/permissions"]["post"]["requestBody"]["content"]["application/json"],
    MoveFilesRequest:
      null as unknown as ExecdPaths["/files/mv"]["post"]["requestBody"]["content"]["application/json"],
    ReplaceContentsRequest:
      null as unknown as ExecdPaths["/files/replace"]["post"]["requestBody"]["content"]["application/json"],
  };

  constructor(
    private readonly client: ExecdClient,
    private readonly opts: FilesystemAdapterOptions
  ) {
    this.fetch = opts.fetch ?? fetch;
  }

  private parseIsoDate(field: string, v: unknown): Date {
    if (typeof v !== "string" || !v) {
      throw new Error(`Invalid ${field}: expected ISO string, got ${typeof v}`);
    }
    const d = new Date(v);
    if (Number.isNaN(d.getTime())) {
      throw new Error(`Invalid ${field}: ${v}`);
    }
    return d;
  }

  private static readonly _ApiFileInfo =
    null as unknown as (typeof FilesystemAdapter.Api.SearchFilesOk)[number];

  private mapApiFileInfo(raw: typeof FilesystemAdapter._ApiFileInfo): FileInfo {
    const { path, size, created_at, modified_at, mode, owner, group, ...rest } =
      raw;

    return {
      ...rest,
      path,
      size,
      mode,
      owner,
      group,
      createdAt: created_at
        ? this.parseIsoDate("createdAt", created_at)
        : undefined,
      modifiedAt: modified_at
        ? this.parseIsoDate("modifiedAt", modified_at)
        : undefined,
    };
  }

  async getFileInfo(paths: string[]): Promise<Record<string, FileInfo>> {
    const { data, error, response } = await this.client.GET("/files/info", {
      params: { query: { path: paths } },
    });
    throwOnOpenApiFetchError({ error, response }, "Get file info failed");
    const raw = data as typeof FilesystemAdapter.Api.FilesInfoOk | undefined;
    if (!raw) return {} as FilesInfoResponse;
    if (typeof raw !== "object") {
      throw new Error(
        `Get file info failed: unexpected response shape (got ${typeof raw})`
      );
    }
    const out: Record<string, FileInfo> = {};
    for (const [k, v] of Object.entries(raw as Record<string, unknown>)) {
      if (!v || typeof v !== "object") {
        throw new Error(
          `Get file info failed: invalid file info for path=${k}`
        );
      }
      out[k] = this.mapApiFileInfo(v as typeof FilesystemAdapter._ApiFileInfo);
    }
    return out as FilesInfoResponse;
  }

  async deleteFiles(paths: string[]): Promise<void> {
    const { error, response } = await this.client.DELETE("/files", {
      params: { query: { path: paths } },
    });
    throwOnOpenApiFetchError({ error, response }, "Delete files failed");
  }

  async createDirectories(
    entries: Pick<WriteEntry, "path" | "mode" | "owner" | "group">[]
  ): Promise<void> {
    const map: Record<string, Permission> = {};
    for (const e of entries) {
      map[e.path] = toPermission(e);
    }
    const body = map as unknown as typeof FilesystemAdapter.Api.MakeDirsRequest;
    const { error, response } = await this.client.POST("/directories", {
      body,
    });
    throwOnOpenApiFetchError({ error, response }, "Create directories failed");
  }

  async deleteDirectories(paths: string[]): Promise<void> {
    const { error, response } = await this.client.DELETE("/directories", {
      params: { query: { path: paths } },
    });
    throwOnOpenApiFetchError({ error, response }, "Delete directories failed");
  }

  async setPermissions(entries: SetPermissionEntry[]): Promise<void> {
    const req: Record<string, Permission> = {};
    for (const e of entries) {
      req[e.path] = toPermission(e);
    }
    const body =
      req as unknown as typeof FilesystemAdapter.Api.SetPermissionsRequest;
    const { error, response } = await this.client.POST("/files/permissions", {
      body,
    });
    throwOnOpenApiFetchError({ error, response }, "Set permissions failed");
  }

  async moveFiles(entries: MoveEntry[]): Promise<void> {
    const req: RenameFileItem[] = entries.map((e) => ({
      src: e.src,
      dest: e.dest,
    }));
    const body =
      req as unknown as typeof FilesystemAdapter.Api.MoveFilesRequest;
    const { error, response } = await this.client.POST("/files/mv", {
      body,
    });
    throwOnOpenApiFetchError({ error, response }, "Move files failed");
  }

  async replaceContents(entries: ContentReplaceEntry[]): Promise<void> {
    const req: Record<string, ReplaceFileContentItem> = {};
    for (const e of entries) {
      req[e.path] = { old: e.oldContent, new: e.newContent };
    }
    const body =
      req as unknown as typeof FilesystemAdapter.Api.ReplaceContentsRequest;
    const { error, response } = await this.client.POST("/files/replace", {
      body,
    });
    throwOnOpenApiFetchError({ error, response }, "Replace contents failed");
  }

  async search(entry: SearchEntry): Promise<SearchFilesResponse> {
    const { data, error, response } = await this.client.GET("/files/search", {
      params: { query: { path: entry.path, pattern: entry.pattern } },
    });
    throwOnOpenApiFetchError({ error, response }, "Search files failed");

    // Make the OpenAPI contract explicit (and fail loudly on unexpected shapes).
    const ok = data as typeof FilesystemAdapter.Api.SearchFilesOk | undefined;
    if (!ok) return [];
    if (!Array.isArray(ok)) {
      throw new Error(
        `Search files failed: unexpected response shape (expected array, got ${typeof ok})`
      );
    }
    return ok.map((x) => this.mapApiFileInfo(x));
  }

  private async uploadFile(
    meta: FileMetadata,
    data:
      | Blob
      | Uint8Array
      | ArrayBuffer
      | string
      | AsyncIterable<Uint8Array>
      | ReadableStream<Uint8Array>
  ): Promise<void> {
    const url = joinUrl(this.opts.baseUrl, "/files/upload");
    const fileName = basename(meta.path);
    const metadataJson = JSON.stringify(meta);

    // Streaming path (large files): build multipart body manually to avoid buffering.
    if (isReadableStream(data) || isAsyncIterable(data)) {
      // Browsers do not allow streaming multipart requests with custom boundaries.
      // Fall back to in-memory uploads when streaming is unavailable.
      if (!isNodeRuntime()) {
        const bytes = await collectBytes(data);
        return await this.uploadFile(meta, bytes);
      }
      const boundary = `opensandbox_${Math.random()
        .toString(16)
        .slice(2)}_${Date.now()}`;
      const bodyIt = multipartUploadBody({
        boundary,
        metadataJson,
        fileName,
        fileContentType: "application/octet-stream",
        file: data,
      });
      const stream = toReadableStream(bodyIt);

      const res = await this.fetch(url, {
        method: "POST",
        headers: {
          "content-type": `multipart/form-data; boundary=${boundary}`,
          ...(this.opts.headers ?? {}),
        },
        body: stream as any,
        // Node fetch (undici) requires duplex for streaming request bodies.
        duplex: "half" as any,
      } as any);

      if (!res.ok) {
        const requestId = res.headers.get("x-request-id") ?? undefined;
        const rawBody = await res.text().catch(() => undefined);
        throw new SandboxApiException({
          message: `Upload failed (status=${res.status})`,
          statusCode: res.status,
          requestId,
          error: new SandboxError(
            SandboxError.UNEXPECTED_RESPONSE,
            "Upload failed"
          ),
          rawBody,
        });
      }
      return;
    }

    // In-memory path (small files): use FormData.
    const form = new FormData();
    form.append(
      "metadata",
      new Blob([metadataJson], { type: "application/json" }),
      "metadata"
    );

    if (typeof data === "string") {
      const textBlob = new Blob([data], { type: "text/plain; charset=utf-8" });
      form.append("file", textBlob, fileName);
    } else {
      const blob = toUploadBlob(data);
      const fileBlob = blob.type
        ? blob
        : new Blob([blob], { type: "application/octet-stream" });
      form.append("file", fileBlob, fileName);
    }

    const res = await this.fetch(url, {
      method: "POST",
      headers: {
        ...(this.opts.headers ?? {}),
      },
      body: form,
    });

    if (!res.ok) {
      const requestId = res.headers.get("x-request-id") ?? undefined;
      const rawBody = await res.text().catch(() => undefined);
      throw new SandboxApiException({
        message: `Upload failed (status=${res.status})`,
        statusCode: res.status,
        requestId,
        error: new SandboxError(
          SandboxError.UNEXPECTED_RESPONSE,
          "Upload failed"
        ),
        rawBody,
      });
    }
  }

  async readBytes(
    path: string,
    opts?: { range?: string }
  ): Promise<Uint8Array> {
    const url =
      joinUrl(this.opts.baseUrl, "/files/download") +
      `?path=${encodeURIComponent(path)}`;
    const res = await this.fetch(url, {
      method: "GET",
      headers: {
        ...(this.opts.headers ?? {}),
        ...(opts?.range ? { Range: opts.range } : {}),
      },
    });
    if (!res.ok) {
      const requestId = res.headers.get("x-request-id") ?? undefined;
      const rawBody = await res.text().catch(() => undefined);
      throw new SandboxApiException({
        message: "Download failed",
        statusCode: res.status,
        requestId,
        error: new SandboxError(
          SandboxError.UNEXPECTED_RESPONSE,
          "Download failed"
        ),
        rawBody,
      });
    }
    const ab = await res.arrayBuffer();
    return new Uint8Array(ab);
  }

  readBytesStream(
    path: string,
    opts?: { range?: string }
  ): AsyncIterable<Uint8Array> {
    return this.downloadStream(path, opts);
  }

  private async *downloadStream(
    path: string,
    opts?: { range?: string }
  ): AsyncIterable<Uint8Array> {
    const url =
      joinUrl(this.opts.baseUrl, "/files/download") +
      `?path=${encodeURIComponent(path)}`;
    const res = await this.fetch(url, {
      method: "GET",
      headers: {
        ...(this.opts.headers ?? {}),
        ...(opts?.range ? { Range: opts.range } : {}),
      },
    });
    if (!res.ok) {
      const requestId = res.headers.get("x-request-id") ?? undefined;
      const rawBody = await res.text().catch(() => undefined);
      throw new SandboxApiException({
        message: "Download stream failed",
        statusCode: res.status,
        requestId,
        error: new SandboxError(
          SandboxError.UNEXPECTED_RESPONSE,
          "Download stream failed"
        ),
        rawBody,
      });
    }

    const body = res.body as ReadableStream<Uint8Array> | null;
    if (!body) return;
    const reader = body.getReader();
    while (true) {
      const { done, value } = await reader.read();
      if (done) return;
      if (value) yield value;
    }
  }

  async readFile(
    path: string,
    opts?: { encoding?: string; range?: string }
  ): Promise<string> {
    const bytes = await this.readBytes(path, { range: opts?.range });
    const encoding = opts?.encoding ?? "utf-8";
    return new TextDecoder(encoding).decode(bytes);
  }

  async writeFiles(entries: WriteEntry[]): Promise<void> {
    for (const e of entries) {
      const meta: FileMetadata = {
        path: e.path,
        owner: e.owner,
        group: e.group,
        mode: e.mode,
      };
      await this.uploadFile(meta, e.data ?? "");
    }
  }
}

================================================
FILE: sdks/sandbox/javascript/src/adapters/healthAdapter.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { ExecdClient } from "../openapi/execdClient.js";
import { throwOnOpenApiFetchError } from "./openapiError.js";
import type { ExecdHealth } from "../services/execdHealth.js";

export class HealthAdapter implements ExecdHealth {
  constructor(private readonly client: ExecdClient) {}

  async ping(): Promise<boolean> {
    const { error, response } = await this.client.GET("/ping");
    throwOnOpenApiFetchError({ error, response }, "Execd ping failed");
    return true;
  }
}

================================================
FILE: sdks/sandbox/javascript/src/adapters/metricsAdapter.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { ExecdClient } from "../openapi/execdClient.js";
import { throwOnOpenApiFetchError } from "./openapiError.js";
import type { paths as ExecdPaths } from "../api/execd.js";
import type { SandboxMetrics } from "../models/execd.js";
import type { ExecdMetrics } from "../services/execdMetrics.js";

type ApiMetricsOk =
  ExecdPaths["/metrics"]["get"]["responses"][200]["content"]["application/json"];

function normalizeMetrics(m: ApiMetricsOk): SandboxMetrics {
  const cpuCount = m.cpu_count ?? 0;
  const cpuUsedPercentage = m.cpu_used_pct ?? 0;
  const memoryTotalMiB = m.mem_total_mib ?? 0;
  const memoryUsedMiB = m.mem_used_mib ?? 0;
  const timestamp = m.timestamp ?? 0;
  return {
    cpuCount: Number(cpuCount),
    cpuUsedPercentage: Number(cpuUsedPercentage),
    memoryTotalMiB: Number(memoryTotalMiB),
    memoryUsedMiB: Number(memoryUsedMiB),
    timestamp: Number(timestamp),
  };
}

export class MetricsAdapter implements ExecdMetrics {
  constructor(private readonly client: ExecdClient) {}

  async getMetrics(): Promise<SandboxMetrics> {
    const { data, error, response } = await this.client.GET("/metrics");
    throwOnOpenApiFetchError({ error, response }, "Get execd metrics failed");
    const ok = data as ApiMetricsOk | undefined;
    if (!ok || typeof ok !== "object") {
      throw new Error("Get execd metrics failed: unexpected response shape");
    }
    return normalizeMetrics(ok);
  }
}

================================================
FILE: sdks/sandbox/javascript/src/adapters/openapiError.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { SandboxApiException, SandboxError } from "../core/exceptions.js";

export function throwOnOpenApiFetchError(
  result: { error?: unknown; response: Response },
  fallbackMessage: string,
): void {
  if (!result.error) return;

  const requestId = result.response.headers.get("x-request-id") ?? undefined;
  const status = (result.response as any).status ?? 0;

  const err = result.error as any;
  const message =
    err?.message ??
    err?.error?.message ??
    fallbackMessage;

  const code = err?.code ?? err?.error?.code;
  const msg = err?.message ?? err?.error?.message ?? message;

  throw new SandboxApiException({
    message: msg,
    statusCode: status,
    requestId,
    error: code ? new SandboxError(String(code), String(msg ?? "")) : new SandboxError(SandboxError.UNEXPECTED_RESPONSE, String(msg ?? "")),
    rawBody: result.error,
  });
}

================================================
FILE: sdks/sandbox/javascript/src/adapters/sandboxesAdapter.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { LifecycleClient } from "../openapi/lifecycleClient.js";
import { throwOnOpenApiFetchError } from "./openapiError.js";
import type { paths as LifecyclePaths } from "../api/lifecycle.js";
import type {
  Sandboxes,
} from "../services/sandboxes.js";
import type {
  CreateSandboxRequest,
  CreateSandboxResponse,
  Endpoint,
  ListSandboxesParams,
  ListSandboxesResponse,
  RenewSandboxExpirationRequest,
  RenewSandboxExpirationResponse,
  SandboxId,
  SandboxInfo,
} from "../models/sandboxes.js";

type ApiCreateSandboxRequest =
  LifecyclePaths["/sandboxes"]["post"]["requestBody"]["content"]["application/json"];
type ApiCreateSandboxOk =
  LifecyclePaths["/sandboxes"]["post"]["responses"][202]["content"]["application/json"];
type ApiGetSandboxOk =
  LifecyclePaths["/sandboxes/{sandboxId}"]["get"]["responses"][200]["content"]["application/json"];
type ApiListSandboxesOk =
  LifecyclePaths["/sandboxes"]["get"]["responses"][200]["content"]["application/json"];
type ApiRenewSandboxExpirationRequest =
  LifecyclePaths["/sandboxes/{sandboxId}/renew-expiration"]["post"]["requestBody"]["content"]["application/json"];
type ApiRenewSandboxExpirationOk =
  LifecyclePaths["/sandboxes/{sandboxId}/renew-expiration"]["post"]["responses"][200]["content"]["application/json"];
type ApiEndpointOk =
  LifecyclePaths["/sandboxes/{sandboxId}/endpoints/{port}"]["get"]["responses"][200]["content"]["application/json"];

function encodeMetadataFilter(metadata: Record<string, string>): string {
  // The Lifecycle API expects a single `metadata` query parameter whose value is `k=v&k2=v2`.
  // The query serializer will URL-encode the value (e.g. `=` -> %3D and `&` -> %26).
  const parts: string[] = [];
  for (const [k, v] of Object.entries(metadata)) {
    parts.push(`${k}=${v}`);
  }
  return parts.join("&");
}

export class SandboxesAdapter implements Sandboxes {
  constructor(private readonly client: LifecycleClient) {}

  private parseIsoDate(field: string, v: unknown): Date {
    if (typeof v !== "string" || !v) {
      throw new Error(`Invalid ${field}: expected ISO string, got ${typeof v}`);
    }
    const d = new Date(v);
    if (Number.isNaN(d.getTime())) {
      throw new Error(`Invalid ${field}: ${v}`);
    }
    return d;
  }

  private parseOptionalIsoDate(field: string, v: unknown): Date | null {
    if (v == null) return null;
    return this.parseIsoDate(field, v);
  }

  private mapSandboxInfo(raw: ApiGetSandboxOk): SandboxInfo {
    return {
      ...(raw ?? {}),
      createdAt: this.parseIsoDate("createdAt", raw?.createdAt),
      expiresAt: this.parseOptionalIsoDate("expiresAt", raw?.expiresAt),
    } as SandboxInfo;
  }

  async createSandbox(req: CreateSandboxRequest): Promise<CreateSandboxResponse> {
    // Make the OpenAPI contract explicit so backend schema changes surface quickly.
    const body: ApiCreateSandboxRequest = req as unknown as ApiCreateSandboxRequest;
    const { data, error, response } = await this.client.POST("/sandboxes", {
      body,
    });
    throwOnOpenApiFetchError({ error, response }, "Create sandbox failed");
    const raw = data as ApiCreateSandboxOk | undefined;
    if (!raw || typeof raw !== "object") {
      throw new Error("Create sandbox failed: unexpected response shape");
    }
    return {
      ...(raw ?? {}),
      createdAt: this.parseIsoDate("createdAt", raw?.createdAt),
      expiresAt: this.parseOptionalIsoDate("expiresAt", raw?.expiresAt),
    } as CreateSandboxResponse;
  }

  async getSandbox(sandboxId: SandboxId): Promise<SandboxInfo> {
    const { data, error, response } = await this.client.GET("/sandboxes/{sandboxId}", {
      params: { path: { sandboxId } },
    });
    throwOnOpenApiFetchError({ error, response }, "Get sandbox failed");
    const ok = data as ApiGetSandboxOk | undefined;
    if (!ok || typeof ok !== "object") {
      throw new Error("Get sandbox failed: unexpected response shape");
    }
    return this.mapSandboxInfo(ok);
  }

  async listSandboxes(params: ListSandboxesParams = {}): Promise<ListSandboxesResponse> {
    const query: Record<string, string | number | boolean | undefined | null | (string | number)[]> = {};
    if (params.states?.length) query.state = params.states;
    if (params.metadata && Object.keys(params.metadata).length) {
      query.metadata = encodeMetadataFilter(params.metadata);
    }
    if (params.page != null) query.page = params.page;
    if (params.pageSize != null) query.pageSize = params.pageSize;

    const { data, error, response } = await this.client.GET("/sandboxes", {
      params: { query },
    });
    throwOnOpenApiFetchError({ error, response }, "List sandboxes failed");
    const raw = data as ApiListSandboxesOk | undefined;
    if (!raw || typeof raw !== "object") {
      throw new Error("List sandboxes failed: unexpected response shape");
    }
    const itemsRaw = raw.items;
    if (!Array.isArray(itemsRaw)) throw new Error("List sandboxes failed: unexpected items shape");
    return {
      ...(raw ?? {}),
      items: itemsRaw.map((x) => this.mapSandboxInfo(x)),
    } as ListSandboxesResponse;
  }

  async deleteSandbox(sandboxId: SandboxId): Promise<void> {
    const { error, response } = await this.client.DELETE("/sandboxes/{sandboxId}", {
      params: { path: { sandboxId } },
    });
    throwOnOpenApiFetchError({ error, response }, "Delete sandbox failed");
  }

  async pauseSandbox(sandboxId: SandboxId): Promise<void> {
    const { error, response } = await this.client.POST("/sandboxes/{sandboxId}/pause", {
      params: { path: { sandboxId } },
    });
    throwOnOpenApiFetchError({ error, response }, "Pause sandbox failed");
  }

  async resumeSandbox(sandboxId: SandboxId): Promise<void> {
    const { error, response } = await this.client.POST("/sandboxes/{sandboxId}/resume", {
      params: { path: { sandboxId } },
    });
    throwOnOpenApiFetchError({ error, response }, "Resume sandbox failed");
  }

  async renewSandboxExpiration(
    sandboxId: SandboxId,
    req: RenewSandboxExpirationRequest,
  ): Promise<RenewSandboxExpirationResponse> {
    const body: ApiRenewSandboxExpirationRequest = req as unknown as ApiRenewSandboxExpirationRequest;
    const { data, error, response } = await this.client.POST("/sandboxes/{sandboxId}/renew-expiration", {
      params: { path: { sandboxId } },
      body,
    });
    throwOnOpenApiFetchError({ error, response }, "Renew sandbox expiration failed");
    const raw = data as ApiRenewSandboxExpirationOk | undefined;
    if (!raw || typeof raw !== "object") {
      throw new Error("Renew sandbox expiration failed: unexpected response shape");
    }
    return {
      ...(raw ?? {}),
      expiresAt: raw?.expiresAt ? this.parseIsoDate("expiresAt", raw.expiresAt) : undefined,
    } as RenewSandboxExpirationResponse;
  }

  async getSandboxEndpoint(
    sandboxId: SandboxId,
    port: number,
    useServerProxy = false
  ): Promise<Endpoint> {
    const { data, error, response } = await this.client.GET("/sandboxes/{sandboxId}/endpoints/{port}", {
      params: { path: { sandboxId, port }, query: { use_server_proxy: useServerProxy } },
    });
    throwOnOpenApiFetchError({ error, response }, "Get sandbox endpoint failed");
    const ok = data as ApiEndpointOk | undefined;
    if (!ok || typeof ok !== "object") {
      throw new Error("Get sandbox endpoint failed: unexpected response shape");
    }
    return ok as unknown as Endpoint;
  }
}


================================================
FILE: sdks/sandbox/javascript/src/adapters/sse.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { SandboxApiException, SandboxError } from "../core/exceptions.js";

function tryParseJson(line: string): unknown | undefined {
  try {
    return JSON.parse(line);
  } catch {
    return undefined;
  }
}

/**
 * Parses an SSE-like stream that may be either:
 * - standard SSE frames (`data: {...}\n\n`)
 * - newline-delimited JSON (one JSON object per line)
 */
export async function* parseJsonEventStream<T>(
  res: Response,
  opts?: { fallbackErrorMessage?: string },
): AsyncIterable<T> {
  if (!res.ok) {
    const text = await res.text().catch(() => "");
    const parsed = tryParseJson(text);
    const err = parsed && typeof parsed === "object" ? (parsed as any) : undefined;
    const requestId = res.headers.get("x-request-id") ?? undefined;
    const message = err?.message ?? opts?.fallbackErrorMessage ?? `Stream request failed (status=${res.status})`;
    const code = err?.code ? String(err.code) : SandboxError.UNEXPECTED_RESPONSE;
    throw new SandboxApiException({
      message,
      statusCode: res.status,
      requestId,
      error: new SandboxError(code, err?.message ? String(err.message) : message),
      rawBody: parsed ?? text,
    });
  }

  if (!res.body) {
    return;
  }

  const reader = res.body.getReader();
  const decoder = new TextDecoder("utf-8");
  let buf = "";

  while (true) {
    const { value, done } = await reader.read();
    if (done) break;

    buf += decoder.decode(value, { stream: true });
    let idx: number;

    while ((idx = buf.indexOf("\n")) >= 0) {
      const rawLine = buf.slice(0, idx);
      buf = buf.slice(idx + 1);

      const line = rawLine.trim();
      if (!line) continue;

      // Support standard SSE "data:" prefix
      if (line.startsWith(":")) continue;
      if (line.startsWith("event:") || line.startsWith("id:") || line.startsWith("retry:")) continue;

      const jsonLine = line.startsWith("data:") ? line.slice("data:".length).trim() : line;
      if (!jsonLine) continue;

      const parsed = tryParseJson(jsonLine);
      if (!parsed) continue;
      yield parsed as T;
    }
  }

  // Flush any buffered UTF-8 bytes from the decoder.
  buf += decoder.decode();

  // flush last line if exists
  const last = buf.trim();
  if (last) {
    const jsonLine = last.startsWith("data:") ? last.slice("data:".length).trim() : last;
    const parsed = tryParseJson(jsonLine);
    if (parsed) yield parsed as T;
  }
}

================================================
FILE: sdks/sandbox/javascript/src/api/egress.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd..
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
 * This file was auto-generated by openapi-typescript.
 * Do not make direct changes to the file.
 */

export interface paths {
    "/policy": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * Get current egress policy
         * @description Returns the currently enforced egress policy and the sidecar's derived
         *     runtime mode metadata.
         */
        get: {
            parameters: {
                query?: never;
                header?: never;
                path?: never;
                cookie?: never;
            };
            requestBody?: never;
            responses: {
                /** @description Current policy returned successfully. */
                200: {
                    headers: {
                        [name: string]: unknown;
                    };
                    content: {
                        "application/json": components["schemas"]["PolicyStatusResponse"];
                    };
                };
                401: components["responses"]["Unauthorized"];
                500: components["responses"]["InternalServerError"];
            };
        };
        put?: never;
        post?: never;
        delete?: never;
        options?: never;
        head?: never;
        /**
         * Patch egress rules
         * @description Merge incoming egress rules with the currently enforced policy.
         *
         *     This endpoint uses merge semantics:
         *     - Existing rules remain unless overridden by incoming rules.
         *     - Incoming rules are applied with higher priority than existing rules.
         *     - If multiple incoming rules refer to the same `target`, the first one wins.
         */
        patch: {
            parameters: {
                query?: never;
                header?: never;
                path?: never;
                cookie?: never;
            };
            requestBody: {
                content: {
                    "application/json": components["schemas"]["NetworkRule"][];
                };
            };
            responses: {
                /** @description Patch applied successfully. */
                200: {
                    headers: {
                        [name: string]: unknown;
                    };
                    content: {
                        "application/json": components["schemas"]["PolicyStatusResponse"];
                    };
                };
                400: components["responses"]["BadRequest"];
                401: components["responses"]["Unauthorized"];
                500: components["responses"]["InternalServerError"];
            };
        };
        trace?: never;
    };
}
export type webhooks = Record<string, never>;
export interface components {
    schemas: {
        PolicyStatusResponse: {
            /**
             * @description Operation status reported by the sidecar.
             * @example ok
             */
            status?: string;
            /**
             * @description Derived runtime mode for the current policy.
             * @example deny_all
             */
            mode?: string;
            /**
             * @description Egress sidecar enforcement backend mode.
             * @example dns
             */
            enforcementMode?: string;
            /** @description Optional human-readable reason when the sidecar returns extra context. */
            reason?: string;
            policy?: components["schemas"]["NetworkPolicy"];
        };
        /**
         * @description Egress network policy matching the sidecar `/policy` request body.
         *     If `defaultAction` is omitted, the sidecar defaults to "deny"; passing an empty
         *     object or null results in allow-all behavior at startup.
         */
        NetworkPolicy: {
            /**
             * @description Default action when no egress rule matches. Defaults to "deny".
             * @enum {string}
             */
            defaultAction?: "allow" | "deny";
            /** @description List of egress rules evaluated in order. */
            egress?: components["schemas"]["NetworkRule"][];
        };
        NetworkRule: {
            /**
             * @description Whether to allow or deny matching targets.
             * @enum {string}
             */
            action: "allow" | "deny";
            /**
             * @description FQDN or wildcard domain (e.g., "example.com", "*.example.com").
             *     IP/CIDR not yet supported in the egress MVP.
             */
            target: string;
        };
    };
    responses: {
        /** @description The request was invalid or malformed. */
        BadRequest: {
            headers: {
                [name: string]: unknown;
            };
            content: {
                "text/plain": string;
            };
        };
        /** @description Authentication failed for the egress sidecar. */
        Unauthorized: {
            headers: {
                [name: string]: unknown;
            };
            content: {
                "text/plain": string;
            };
        };
        /** @description The sidecar failed to apply or fetch policy state. */
        InternalServerError: {
            headers: {
                [name: string]: unknown;
            };
            content: {
                "text/plain": string;
            };
        };
    };
    parameters: never;
    requestBodies: never;
    headers: never;
    pathItems: never;
}
export type $defs = Record<string, never>;
export type operations = Record<string, never>;


================================================
FILE: sdks/sandbox/javascript/src/api/execd.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd..
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
 * This file was auto-generated by openapi-typescript.
 * Do not make direct changes to the file.
 */

export interface paths {
    "/ping": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * Health check endpoint
         * @description Performs a simple health check to verify that the server is running and responsive.
         *     Returns HTTP 200 OK status if the server is healthy. This endpoint is typically used
         *     by load balancers, monitoring systems, and orchestration platforms (like Kubernetes)
         *     to check service availability.
         */
        get: operations["ping"];
        put?: never;
        post?: never;
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/code/contexts": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * List active code execution contexts
         * @description Lists all active/available code execution contexts.
         *     If `language` is provided, only contexts under that language/runtime are returned.
         */
        get: operations["listContexts"];
        put?: never;
        post?: never;
        /**
         * Delete all contexts under a language
         * @description Deletes all existing code execution contexts under the specified `language`/runtime.
         *     This is a bulk operation intended for code-interpreter context cleanup.
         */
        delete: operations["deleteContextsByLanguage"];
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/code/contexts/{context_id}": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * Get a code execution context by id
         * @description Retrieves the details of an existing code execution context (session) by id.
         *     Returns the context ID, language, and any associated metadata.
         */
        get: operations["getContext"];
        put?: never;
        post?: never;
        /**
         * Delete a code execution context by id
         * @description Deletes an existing code execution context (session) by id.
         *     This should terminate the underlying context thread/process and release resources.
         */
        delete: operations["deleteContext"];
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/code/context": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        get?: never;
        put?: never;
        /**
         * Create code execution context
         * @description Creates a new code execution environment and returns a session ID that can be used
         *     for subsequent code execution requests. The context maintains state across multiple
         *     code executions within the same session.
         */
        post: operations["createCodeContext"];
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/code": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        get?: never;
        put?: never;
        /**
         * Execute code in context
         * @description Executes code using Jupyter kernel in a specified execution context and streams
         *     the output in real-time using SSE (Server-Sent Events). Supports multiple programming
         *     languages (Python, JavaScript, etc.) and maintains execution state within the session.
         *     Returns execution results, output streams, execution count, and any errors.
         */
        post: operations["runCode"];
        /**
         * Interrupt code execution
         * @description Interrupts the currently running code execution in the specified context.
         *     This sends a signal to terminate the execution process and releases associated resources.
         */
        delete: operations["interruptCode"];
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/command": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        get?: never;
        put?: never;
        /**
         * Execute shell command
         * @description Executes a shell command and streams the output in real-time using SSE (Server-Sent Events).
         *     The command can run in foreground or background mode. The response includes stdout, stderr,
         *     execution status, and completion events.
         *     Optionally specify `timeout` (milliseconds) to enforce a maximum runtime; the server will
         *     terminate the process when the timeout is reached. You can also pass `uid`/`gid` to run
         *     with specific user/group IDs, and `envs` to inject environment variables.
         */
        post: operations["runCommand"];
        /**
         * Interrupt command execution
         * @description Interrupts the currently running command execution in the specified context.
         *     This sends a signal to terminate the execution process and releases associated resources.
         */
        delete: operations["interruptCommand"];
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/command/status/{id}": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * Get command running status
         * @description Returns the current status of a command (foreground or background) by command ID.
         *     Includes running flag, exit code, error (if any), and start/finish timestamps.
         */
        get: operations["getCommandStatus"];
        put?: never;
        post?: never;
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/command/{id}/logs": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * Get background command stdout/stderr (non-streamed)
         * @description Returns stdout and stderr for a background (detached) command by command ID.
         *     Foreground commands should be consumed via SSE; this endpoint is intended for
         *     polling logs of background commands. Supports incremental reads similar to a file seek:
         *     pass a starting line via query to fetch output after that line and receive the latest
         *     tail cursor for the next poll. When no starting line is provided, the full logs are returned.
         *     Response body is plain text so it can be rendered directly in browsers; the latest line index
         *     is provided via response header `EXECD-COMMANDS-TAIL-CURSOR` for subsequent incremental requests.
         */
        get: operations["getBackgroundCommandLogs"];
        put?: never;
        post?: never;
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/files/info": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * Get file metadata
         * @description Retrieves detailed metadata for one or multiple files including permissions, owner,
         *     group, size, and modification time. Returns a map of file paths to their corresponding
         *     FileInfo objects.
         */
        get: operations["getFilesInfo"];
        put?: never;
        post?: never;
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/files": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        get?: never;
        put?: never;
        post?: never;
        /**
         * Delete files
         * @description Deletes one or multiple files from the sandbox. Only removes files, not directories.
         *     Use RemoveDirs for directory removal.
         */
        delete: operations["removeFiles"];
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/files/permissions": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        get?: never;
        put?: never;
        /**
         * Change file permissions
         * @description Changes permissions (mode), owner, and group for one or multiple files.
         *     Accepts a map of file paths to permission settings including octal mode,
         *     owner username, and group name.
         */
        post: operations["chmodFiles"];
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/files/mv": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        get?: never;
        put?: never;
        /**
         * Rename or move files
         * @description Renames or moves one or multiple files to new paths. Can be used for both
         *     renaming within the same directory and moving to different directories.
         *     Target directory must exist.
         */
        post: operations["renameFiles"];
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/files/search": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * Search for files
         * @description Searches for files matching a glob pattern within a specified directory and
         *     its subdirectories. Returns file metadata including path, permissions, owner,
         *     and group. Supports glob patterns like **, *.txt, etc. Default pattern is ** (all files).
         */
        get: operations["searchFiles"];
        put?: never;
        post?: never;
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/files/replace": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        get?: never;
        put?: never;
        /**
         * Replace file content
         * @description Performs text replacement in one or multiple files. Replaces all occurrences
         *     of the old string with the new string (similar to strings.ReplaceAll).
         *     Preserves file permissions. Useful for batch text substitution across files.
         */
        post: operations["replaceContent"];
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/files/upload": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        get?: never;
        put?: never;
        /**
         * Upload files to sandbox
         * @description Uploads one or multiple files to specified paths within the sandbox.
         *     Reads metadata and file content from multipart form parts in sequence.
         *     Each file upload consists of two parts: a metadata part (JSON) followed
         *     by the actual file part.
         */
        post: operations["uploadFile"];
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/files/download": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * Download file from sandbox
         * @description Downloads a file from the specified path within the sandbox. Supports HTTP
         *     range requests for resumable downloads and partial content retrieval.
         *     Returns file as octet-stream with appropriate headers.
         */
        get: operations["downloadFile"];
        put?: never;
        post?: never;
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/directories": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        get?: never;
        put?: never;
        /**
         * Create directories
         * @description Creates one or multiple directories with specified permissions. Creates parent
         *     directories as needed (similar to mkdir -p). Accepts a map of directory paths
         *     to permission objects.
         */
        post: operations["makeDirs"];
        /**
         * Delete directories
         * @description Recursively deletes one or multiple directories and all their contents.
         *     Similar to rm -rf. Use with caution as this operation cannot be undone.
         */
        delete: operations["removeDirs"];
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/metrics": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * Get system metrics
         * @description Retrieves current system resource metrics including CPU usage percentage,
         *     CPU core count, total memory, used memory, and timestamp. Provides a snapshot
         *     of system resource utilization at the time of request.
         */
        get: operations["getMetrics"];
        put?: never;
        post?: never;
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/metrics/watch": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * Watch system metrics in real-time
         * @description Streams system resource metrics in real-time using Server-Sent Events (SSE).
         *     Updates are sent every second, providing continuous monitoring of CPU usage,
         *     memory usage, and other system metrics. The connection remains open until
         *     the client disconnects.
         */
        get: operations["watchMetrics"];
        put?: never;
        post?: never;
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
}
export type webhooks = Record<string, never>;
export interface components {
    schemas: {
        /** @description Request to create a code execution context */
        CodeContextRequest: {
            /**
             * @description Execution runtime (python, bash, java, etc.)
             * @example python
             */
            language?: string;
        };
        /** @description Code execution context with session identifier */
        CodeContext: {
            /**
             * @description Unique session identifier returned by CreateContext
             * @example session-abc123
             */
            id?: string;
            /**
             * @description Execution runtime
             * @example python
             */
            language: string;
        };
        /** @description Request to execute code in a context */
        RunCodeRequest: {
            context?: components["schemas"]["CodeContext"];
            /**
             * @description Source code to execute
             * @example import numpy as np
             *     result = np.array([1, 2, 3])
             *     print(result)
             */
            code: string;
        };
        /** @description Request to execute a shell command */
        RunCommandRequest: {
            /**
             * @description Shell command to execute
             * @example ls -la /workspace
             */
            command: string;
            /**
             * @description Working directory for command execution
             * @example /workspace
             */
            cwd?: string;
            /**
             * @description Whether to run command in detached mode
             * @default false
             * @example false
             */
            background: boolean;
            /**
             * Format: int64
             * @description Maximum allowed execution time in milliseconds before the command is forcefully terminated by the server. If omitted, the server will not enforce any timeout.
             * @example 60000
             */
            timeout?: number;
            /**
             * Format: int32
             * @description Unix user ID used to run the command. If `gid` is provided, `uid` is required.
             * @example 1000
             */
            uid?: number;
            /**
             * Format: int32
             * @description Unix group ID used to run the command. Requires `uid` to be provided.
             * @example 1000
             */
            gid?: number;
            /**
             * @description Environment variables injected into the command process.
             * @example {
             *       "PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
             *       "PYTHONUNBUFFERED": "1"
             *     }
             */
            envs?: {
                [key: string]: string;
            };
        };
        /** @description Command execution status (foreground or background) */
        CommandStatusResponse: {
            /**
             * @description Command ID returned by RunCommand
             * @example cmd-abc123
             */
            id?: string;
            /**
             * @description Original command content
             * @example ls -la
             */
            content?: string;
            /**
             * @description Whether the command is still running
             * @example false
             */
            running?: boolean;
            /**
             * Format: int32
             * @description Exit code if the command has finished
             * @example 0
             */
            exit_code?: number | null;
            /**
             * @description Error message if the command failed
             * @example permission denied
             */
            error?: string;
            /**
             * Format: date-time
             * @description Start time in RFC3339 format
             * @example 2025-12-22T09:08:05Z
             */
            started_at?: string;
            /**
             * Format: date-time
             * @description Finish time in RFC3339 format (null if still running)
             * @example 2025-12-22T09:08:09Z
             */
            finished_at?: string | null;
        };
        /** @description Server-sent event for streaming execution output */
        ServerStreamEvent: {
            /**
             * @description Event type for client-side handling
             * @example stdout
             * @enum {string}
             */
            type?: "init" | "status" | "error" | "stdout" | "stderr" | "result" | "execution_complete" | "execution_count" | "ping";
            /**
             * @description Textual data for status, init, and stream events
             * @example Hello, World!
             */
            text?: string;
            /**
             * @description Cell execution number in the session
             * @example 1
             */
            execution_count?: number;
            /**
             * Format: int64
             * @description Execution duration in milliseconds
             * @example 150
             */
            execution_time?: number;
            /**
             * Format: int64
             * @description When the event was generated (Unix milliseconds)
             * @example 1700000000000
             */
            timestamp?: number;
            /**
             * @description Execution output in various MIME types (e.g., "text/plain", "text/html")
             * @example {
             *       "text/plain": "4"
             *     }
             */
            results?: {
                [key: string]: unknown;
            };
            /** @description Execution error details if an error occurred */
            error?: {
                /**
                 * @description Error name/type
                 * @example NameError
                 */
                ename?: string;
                /**
                 * @description Error value/message
                 * @example name 'undefined_var' is not defined
                 */
                evalue?: string;
                /**
                 * @description Stack trace lines
                 * @example [
                 *       "Traceback (most recent call last):",
                 *       "  File \"<stdin>\", line 1, in <module>",
                 *       "NameError: name 'undefined_var' is not defined"
                 *     ]
                 */
                traceback?: string[];
            };
        };
        /** @description File metadata including path and permissions */
        FileInfo: {
            /**
             * @description Absolute file path
             * @example /workspace/file.txt
             */
            path: string;
            /**
             * Format: int64
             * @description File size in bytes
             * @example 2048
             */
            size: number;
            /**
             * Format: date-time
             * @description Last modification time
             * @example 2025-11-16T14:30:45Z
             */
            modified_at: string;
            /**
             * Format: date-time
             * @description File creation time
             * @example 2025-11-16T14:30:45Z
             */
            created_at: string;
            /**
             * @description File owner username
             * @example admin
             */
            owner: string;
            /**
             * @description File group name
             * @example admin
             */
            group: string;
            /**
             * @description File permissions in octal format
             * @example 755
             */
            mode: number;
        };
        /** @description File ownership and mode settings */
        Permission: {
            /**
             * @description Owner username
             * @example root
             */
            owner?: string;
            /**
             * @description Group name
             * @example root
             */
            group?: string;
            /**
             * @description Permission mode in octal format (e.g., 644, 755)
             * @default 755
             * @example 755
             */
            mode: number;
        };
        /** @description File metadata for upload operations */
        FileMetadata: {
            /**
             * @description Target file path
             * @example /workspace/upload.txt
             */
            path?: string;
            /**
             * @description File owner
             * @example admin
             */
            owner?: string;
            /**
             * @description File group
             * @example admin
             */
            group?: string;
            /**
             * @description File permissions in octal
             * @example 755
             */
            mode?: number;
        };
        /** @description File rename/move operation */
        RenameFileItem: {
            /**
             * @description Source file path
             * @example /workspace/old.txt
             */
            src: string;
            /**
             * @description Destination file path
             * @example /workspace/new.txt
             */
            dest: string;
        };
        /** @description Content replacement operation */
        ReplaceFileContentItem: {
            /**
             * @description String to be replaced
             * @example localhost
             */
            old: string;
            /**
             * @description Replacement string
             * @example 0.0.0.0
             */
            new: string;
        };
        /** @description System resource usage metrics */
        Metrics: {
            /**
             * Format: float
             * @description Number of CPU cores
             * @example 4
             */
            cpu_count: number;
            /**
             * Format: float
             * @description CPU usage percentage
             * @example 45.5
             */
            cpu_used_pct: number;
            /**
             * Format: float
             * @description Total memory in MiB
             * @example 8192
             */
            mem_total_mib: number;
            /**
             * Format: float
             * @description Used memory in MiB
             * @example 4096
             */
            mem_used_mib: number;
            /**
             * Format: int64
             * @description Timestamp when metrics were collected (Unix milliseconds)
             * @example 1700000000000
             */
            timestamp: number;
        };
        /** @description Standard error response format */
        ErrorResponse: {
            /**
             * @description Error code for programmatic handling
             * @example INVALID_REQUEST_BODY
             */
            code: string;
            /**
             * @description Human-readable error message
             * @example error parsing request, MAYBE invalid body format
             */
            message: string;
        };
    };
    responses: {
        /** @description Invalid request body format or missing required fields */
        BadRequest: {
            headers: {
                [name: string]: unknown;
            };
            content: {
                /**
                 * @example {
                 *       "code": "INVALID_REQUEST_BODY",
                 *       "message": "error parsing request, MAYBE invalid body format"
                 *     }
                 */
                "application/json": components["schemas"]["ErrorResponse"];
            };
        };
        /** @description File or resource not found */
        NotFound: {
            headers: {
                [name: string]: unknown;
            };
            content: {
                /**
                 * @example {
                 *       "code": "FILE_NOT_FOUND",
                 *       "message": "file not found"
                 *     }
                 */
                "application/json": components["schemas"]["ErrorResponse"];
            };
        };
        /** @description Runtime server error during operation */
        InternalServerError: {
            headers: {
                [name: string]: unknown;
            };
            content: {
                /**
                 * @example {
                 *       "code": "RUNTIME_ERROR",
                 *       "message": "error running code execution"
                 *     }
                 */
                "application/json": components["schemas"]["ErrorResponse"];
            };
        };
    };
    parameters: never;
    requestBodies: never;
    headers: never;
    pathItems: never;
}
export type $defs = Record<string, never>;
export interface operations {
    ping: {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Server is alive and healthy */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content?: never;
            };
        };
    };
    listContexts: {
        parameters: {
            query: {
                /**
                 * @description Filter contexts by execution runtime (python, bash, java, etc.)
                 * @example python
                 */
                language: string;
            };
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Array of active contexts */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content: {
                    "application/json": components["schemas"]["CodeContext"][];
                };
            };
            400: components["responses"]["BadRequest"];
            500: components["responses"]["InternalServerError"];
        };
    };
    deleteContextsByLanguage: {
        parameters: {
            query: {
                /**
                 * @description Target execution runtime whose contexts should be deleted
                 * @example python
                 */
                language: string;
            };
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Contexts deleted successfully */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content?: never;
            };
            400: components["responses"]["BadRequest"];
            500: components["responses"]["InternalServerError"];
        };
    };
    getContext: {
        parameters: {
            query?: never;
            header?: never;
            path: {
                /**
                 * @description Session/context id to get
                 * @example session-abc123
                 */
                context_id: string;
            };
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Context details retrieved successfully */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content: {
                    "application/json": components["schemas"]["CodeContext"];
                };
            };
            404: components["responses"]["NotFound"];
            500: components["responses"]["InternalServerError"];
        };
    };
    deleteContext: {
        parameters: {
            query?: never;
            header?: never;
            path: {
                /**
                 * @description Session/context id to delete
                 * @example session-abc123
                 */
                context_id: string;
            };
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Context deleted successfully */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content?: never;
            };
            400: components["responses"]["BadRequest"];
            404: components["responses"]["NotFound"];
            500: components["responses"]["InternalServerError"];
        };
    };
    createCodeContext: {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody: {
            content: {
                "application/json": components["schemas"]["CodeContextRequest"];
            };
        };
        responses: {
            /** @description Successfully created context with session ID */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content: {
                    "application/json": components["schemas"]["CodeContext"];
                };
            };
            400: components["responses"]["BadRequest"];
            500: components["responses"]["InternalServerError"];
        };
    };
    runCode: {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody: {
            content: {
                "application/json": components["schemas"]["RunCodeRequest"];
            };
        };
        responses: {
            /** @description Stream of code execution events */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content: {
                    "text/event-stream": components["schemas"]["ServerStreamEvent"];
                };
            };
            400: components["responses"]["BadRequest"];
            500: components["responses"]["InternalServerError"];
        };
    };
    interruptCode: {
        parameters: {
            query: {
                /**
                 * @description Session ID of the execution context to interrupt
                 * @example session-123
                 */
                id: string;
            };
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Code execution successfully interrupted */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content?: never;
            };
            400: components["responses"]["BadRequest"];
            500: components["responses"]["InternalServerError"];
        };
    };
    runCommand: {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody: {
            content: {
                "application/json": components["schemas"]["RunCommandRequest"];
            };
        };
        responses: {
            /** @description Stream of command execution events */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content: {
                    "text/event-stream": components["schemas"]["ServerStreamEvent"];
                };
            };
            400: components["responses"]["BadRequest"];
            500: components["responses"]["InternalServerError"];
        };
    };
    interruptCommand: {
        parameters: {
            query: {
                /**
                 * @description Session ID of the execution context to interrupt
                 * @example session-456
                 */
                id: string;
            };
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Command execution successfully interrupted */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content?: never;
            };
            400: components["responses"]["BadRequest"];
            500: components["responses"]["InternalServerError"];
        };
    };
    getCommandStatus: {
        parameters: {
            query?: never;
            header?: never;
            path: {
                /**
                 * @description Command ID returned by RunCommand
                 * @example cmd-abc123
                 */
                id: string;
            };
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Command status */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content: {
                    "application/json": components["schemas"]["CommandStatusResponse"];
                };
            };
            400: components["responses"]["BadRequest"];
            404: components["responses"]["NotFound"];
            500: components["responses"]["InternalServerError"];
        };
    };
    getBackgroundCommandLogs: {
        parameters: {
            query?: {
                /**
                 * @description Optional 0-based line cursor (behaves like a file seek). When provided, only
                 *     stdout/stderr lines after this line are returned. The response includes the
                 *     latest line index (`cursor`) so the client can request incremental output
                 *     on subsequent calls. If omitted, the full log is returned.
                 * @example 120
                 */
                cursor?: number;
            };
            header?: never;
            path: {
                /**
                 * @description Command ID returned by RunCommand
                 * @example cmd-abc123
                 */
                id: string;
            };
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Command output (plain text) and status metadata via headers */
            200: {
                headers: {
                    /** @description Highest available 0-based line index after applying the request cursor (use as the next cursor for incremental reads) */
                    "EXECD-COMMANDS-TAIL-CURSOR"?: number;
                    [name: string]: unknown;
                };
                content: {
                    /**
                     * @example line1
                     *     line2
                     *     warn: something on stderr
                     */
                    "text/plain": string;
                };
            };
            400: components["responses"]["BadRequest"];
            404: components["responses"]["NotFound"];
            500: components["responses"]["InternalServerError"];
        };
    };
    getFilesInfo: {
        parameters: {
            query: {
                /** @description File path(s) to get info for (can be specified multiple times) */
                path: string[];
            };
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Map of file paths to FileInfo objects */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content: {
                    "application/json": {
                        [key: string]: components["schemas"]["FileInfo"];
                    };
                };
            };
            404: components["responses"]["NotFound"];
            500: components["responses"]["InternalServerError"];
        };
    };
    removeFiles: {
        parameters: {
            query: {
                /**
                 * @description File path(s) to delete (can be specified multiple times)
                 * @example [
                 *       "/workspace/temp.txt"
                 *     ]
                 */
                path: string[];
            };
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Files deleted successfully */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content?: never;
            };
            500: components["responses"]["InternalServerError"];
        };
    };
    chmodFiles: {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody: {
            content: {
                /**
                 * @example {
                 *       "/workspace/script.sh": {
                 *         "owner": "admin",
                 *         "group": "admin",
                 *         "mode": 755
                 *       },
                 *       "/workspace/config.json": {
                 *         "owner": "admin",
                 *         "group": "admin",
                 *         "mode": 755
                 *       }
                 *     }
                 */
                "application/json": {
                    [key: string]: components["schemas"]["Permission"];
                };
            };
        };
        responses: {
            /** @description Permissions changed successfully */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content?: never;
            };
            400: components["responses"]["BadRequest"];
            500: components["responses"]["InternalServerError"];
        };
    };
    renameFiles: {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody: {
            content: {
                /**
                 * @example [
                 *       {
                 *         "src": "/workspace/old_name.txt",
                 *         "dest": "/workspace/new_name.txt"
                 *       },
                 *       {
                 *         "src": "/workspace/file.py",
                 *         "dest": "/archive/file.py"
                 *       }
                 *     ]
                 */
                "application/json": components["schemas"]["RenameFileItem"][];
            };
        };
        responses: {
            /** @description Files renamed/moved successfully */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content?: never;
            };
            400: components["responses"]["BadRequest"];
            404: components["responses"]["NotFound"];
            500: components["responses"]["InternalServerError"];
        };
    };
    searchFiles: {
        parameters: {
            query: {
                /** @description Root directory path to search in */
                path: string;
                /** @description Glob pattern to match files (default is **) */
                pattern?: string;
            };
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Array of matching files with metadata */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content: {
                    "application/json": components["schemas"]["FileInfo"][];
                };
            };
            400: components["responses"]["BadRequest"];
            404: components["responses"]["NotFound"];
            500: components["responses"]["InternalServerError"];
        };
    };
    replaceContent: {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody: {
            content: {
                /**
                 * @example {
                 *       "/workspace/config.yaml": {
                 *         "old": "localhost:8080",
                 *         "new": "0.0.0.0:9090"
                 *       },
                 *       "/workspace/app.py": {
                 *         "old": "DEBUG = True",
                 *         "new": "DEBUG = False"
                 *       }
                 *     }
                 */
                "application/json": {
                    [key: string]: components["schemas"]["ReplaceFileContentItem"];
                };
            };
        };
        responses: {
            /** @description Content replaced successfully */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content?: never;
            };
            400: components["responses"]["BadRequest"];
            500: components["responses"]["InternalServerError"];
        };
    };
    uploadFile: {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody: {
            content: {
                "multipart/form-data": {
                    /**
                     * @description JSON-encoded file metadata (FileMetadata object)
                     * @example {"path":"/workspace/file.txt","owner":"admin","group":"admin","mode":755}
                     */
                    metadata?: string;
                    /**
                     * Format: binary
                     * @description File to upload
                     */
                    file?: string;
                };
            };
        };
        responses: {
            /** @description Files uploaded successfully */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content?: never;
            };
            400: components["responses"]["BadRequest"];
            500: components["responses"]["InternalServerError"];
        };
    };
    downloadFile: {
        parameters: {
            query: {
                /**
                 * @description Absolute or relative path of the file to download
                 * @example /workspace/data.csv
                 */
                path: string;
            };
            header?: {
                /**
                 * @description HTTP Range header for partial content requests
                 * @example bytes=0-1023
                 */
                Range?: string;
            };
            path?: never;
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description File content */
            200: {
                headers: {
                    /** @description Attachment header with filename */
                    "Content-Disposition"?: string;
                    /** @description File size in bytes */
                    "Content-Length"?: number;
                    [name: string]: unknown;
                };
                content: {
                    "application/octet-stream": string;
                };
            };
            /** @description Partial file content (when Range header is provided) */
            206: {
                headers: {
                    /** @description Range of bytes being returned */
                    "Content-Range"?: string;
                    /** @description Length of the returned range */
                    "Content-Length"?: number;
                    [name: string]: unknown;
                };
                content: {
                    "application/octet-stream": string;
                };
            };
            400: components["responses"]["BadRequest"];
            404: components["responses"]["NotFound"];
            /** @description Requested range not satisfiable */
            416: {
                headers: {
                    [name: string]: unknown;
                };
                content: {
                    "application/json": components["schemas"]["ErrorResponse"];
                };
            };
            500: components["responses"]["InternalServerError"];
        };
    };
    makeDirs: {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody: {
            content: {
                /**
                 * @example {
                 *       "/workspace/project": {
                 *         "owner": "admin",
                 *         "group": "admin",
                 *         "mode": 755
                 *       },
                 *       "/workspace/logs": {
                 *         "owner": "admin",
                 *         "group": "admin",
                 *         "mode": 755
                 *       }
                 *     }
                 */
                "application/json": {
                    [key: string]: components["schemas"]["Permission"];
                };
            };
        };
        responses: {
            /** @description Directories created successfully */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content?: never;
            };
            400: components["responses"]["BadRequest"];
            500: components["responses"]["InternalServerError"];
        };
    };
    removeDirs: {
        parameters: {
            query: {
                /**
                 * @description Directory path(s) to delete (can be specified multiple times)
                 * @example [
                 *       "/workspace/temp"
                 *     ]
                 */
                path: string[];
            };
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Directories deleted successfully */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content?: never;
            };
            500: components["responses"]["InternalServerError"];
        };
    };
    getMetrics: {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Current system metrics including CPU and memory usage */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content: {
                    "application/json": components["schemas"]["Metrics"];
                };
            };
            500: components["responses"]["InternalServerError"];
        };
    };
    watchMetrics: {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        requestBody?: never;
        responses: {
            /** @description Stream of system metrics updated every second */
            200: {
                headers: {
                    [name: string]: unknown;
                };
                content: {
                    "text/event-stream": components["schemas"]["Metrics"];
                };
            };
            500: components["responses"]["InternalServerError"];
        };
    };
}


================================================
FILE: sdks/sandbox/javascript/src/api/lifecycle.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd..
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
 * This file was auto-generated by openapi-typescript.
 * Do not make direct changes to the file.
 */

export interface paths {
    "/sandboxes": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * List sandboxes
         * @description List all sandboxes with optional filtering and pagination using query parameters.
         *     All filter conditions use AND logic. Multiple `state` parameters use OR logic within states.
         */
        get: {
            parameters: {
                query?: {
                    /**
                     * @description Filter by lifecycle state. Pass multiple times for OR logic.
                     *     Example: `?state=Running&state=Paused`
                     */
                    state?: string[];
                    /**
                     * @description Arbitrary metadata key-value pairs for filtering，keys and values must be url encoded
                     *     Example: To filter by `project=Apollo` and `note=Demo Test`: `?metadata=project%3DApollo%26note%3DDemo%252520Test`
                     */
                    metadata?: string;
                    /** @description Page number for pagination */
                    page?: number;
                    /** @description Number of items per page */
                    pageSize?: number;
                };
                header?: never;
                path?: never;
                cookie?: never;
            };
            requestBody?: never;
            responses: {
                /** @description Paginated collection of sandboxes */
                200: {
                    headers: {
                        "X-Request-ID": components["headers"]["XRequestId"];
                        [name: string]: unknown;
                    };
                    content: {
                        "application/json": components["schemas"]["ListSandboxesResponse"];
                    };
                };
                400: components["responses"]["BadRequest"];
                401: components["responses"]["Unauthorized"];
                500: components["responses"]["InternalServerError"];
            };
        };
        put?: never;
        /**
         * Create a sandbox from a container image
         * @description Creates a new sandbox from a container image with optional resource limits,
         *     environment variables, and metadata. Sandboxes are provisioned directly from
         *     the specified image without requiring a pre-created template.
         *
         *     ## Authentication
         *
         *     API Key authentication is required via:
         *     - `OPEN-SANDBOX-API-KEY: <api-key>` header
         */
        post: {
            parameters: {
                query?: never;
                header?: never;
                path?: never;
                cookie?: never;
            };
            requestBody: {
                content: {
                    "application/json": components["schemas"]["CreateSandboxRequest"];
                };
            };
            responses: {
                /**
                 * @description Sandbox created and accepted for provisioning.
                 *
                 *     The returned sandbox includes:
                 *     - `id`: Unique sandbox identifier
                 *     - `status.state: "Pending"` (auto-starting provisioning)
                 *     - `status.reason` and `status.message` indicating initialization stage
                 *     - `metadata`, `expiresAt`, `createdAt`: Core sandbox information
                 *
                 *     Note: `image` and `updatedAt` are not included in the create response.
                 *     Use GET /sandboxes/{sandboxId} to retrieve the complete sandbox information including image spec.
                 *
                 *     To track provisioning progress, poll GET /sandboxes/{sandboxId}.
                 *     The sandbox will automatically transition to `Running` state once provisioning completes.
                 */
                202: {
                    headers: {
                        "X-Request-ID": components["headers"]["XRequestId"];
                        Location: components["headers"]["Location"];
                        [name: string]: unknown;
                    };
                    content: {
                        "application/json": components["schemas"]["CreateSandboxResponse"];
                    };
                };
                400: components["responses"]["BadRequest"];
                401: components["responses"]["Unauthorized"];
                409: components["responses"]["Conflict"];
                500: components["responses"]["InternalServerError"];
            };
        };
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/sandboxes/{sandboxId}": {
        parameters: {
            query?: never;
            header?: never;
            path: {
                /** @description Unique sandbox identifier */
                sandboxId: components["parameters"]["SandboxId"];
            };
            cookie?: never;
        };
        /**
         * Fetch a sandbox by id
         * @description Returns the complete sandbox information including:
         *     - `id`, `status`, `metadata`, `expiresAt`, `createdAt`: Core information
         *     - `image`: Container image specification (not included in create response)
         *     - `entrypoint`: Entry process specification
         *
         *     This is the complete representation of the sandbox resource.
         */
        get: {
            parameters: {
                query?: never;
                header?: never;
                path: {
                    /** @description Unique sandbox identifier */
                    sandboxId: components["parameters"]["SandboxId"];
                };
                cookie?: never;
            };
            requestBody?: never;
            responses: {
                /** @description Sandbox current state and metadata */
                200: {
                    headers: {
                        "X-Request-ID": components["headers"]["XRequestId"];
                        [name: string]: unknown;
                    };
                    content: {
                        "application/json": components["schemas"]["Sandbox"];
                    };
                };
                401: components["responses"]["Unauthorized"];
                403: components["responses"]["Forbidden"];
                404: components["responses"]["NotFound"];
                500: components["responses"]["InternalServerError"];
            };
        };
        put?: never;
        post?: never;
        /**
         * Delete a sandbox
         * @description Delete a sandbox, terminating its execution. The sandbox will transition through Stopping state to Terminated.
         */
        delete: {
            parameters: {
                query?: never;
                header?: never;
                path: {
                    /** @description Unique sandbox identifier */
                    sandboxId: components["parameters"]["SandboxId"];
                };
                cookie?: never;
            };
            requestBody?: never;
            responses: {
                /**
                 * @description Sandbox successfully deleted.
                 *
                 *     Sandbox has been scheduled for termination and will transition to Stopping state, then Terminated.
                 */
                204: {
                    headers: {
                        "X-Request-ID": components["headers"]["XRequestId"];
                        [name: string]: unknown;
                    };
                    content?: never;
                };
                401: components["responses"]["Unauthorized"];
                403: components["responses"]["Forbidden"];
                404: components["responses"]["NotFound"];
                409: components["responses"]["Conflict"];
                500: components["responses"]["InternalServerError"];
            };
        };
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/sandboxes/{sandboxId}/pause": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        get?: never;
        put?: never;
        /**
         * Pause execution while retaining state
         * @description Pause a running sandbox while preserving its state. Poll GET /sandboxes/{sandboxId} to track state transition to Paused.
         */
        post: {
            parameters: {
                query?: never;
                header?: never;
                path: {
                    /** @description Unique sandbox identifier */
                    sandboxId: components["parameters"]["SandboxId"];
                };
                cookie?: never;
            };
            requestBody?: never;
            responses: {
                /**
                 * @description Pause operation accepted.
                 *
                 *     Sandbox will transition to Pausing state.
                 *     Poll GET /sandboxes/{sandboxId} to track progress.
                 */
                202: {
                    headers: {
                        "X-Request-ID": components["headers"]["XRequestId"];
                        [name: string]: unknown;
                    };
                    content?: never;
                };
                401: components["responses"]["Unauthorized"];
                403: components["responses"]["Forbidden"];
                404: components["responses"]["NotFound"];
                409: components["responses"]["Conflict"];
                500: components["responses"]["InternalServerError"];
            };
        };
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/sandboxes/{sandboxId}/resume": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        get?: never;
        put?: never;
        /**
         * Resume a paused sandbox
         * @description Resume execution of a paused sandbox. Poll GET /sandboxes/{sandboxId} to track state transition to Running.
         */
        post: {
            parameters: {
                query?: never;
                header?: never;
                path: {
                    /** @description Unique sandbox identifier */
                    sandboxId: components["parameters"]["SandboxId"];
                };
                cookie?: never;
            };
            requestBody?: never;
            responses: {
                /**
                 * @description Resume operation accepted.
                 *
                 *     Sandbox will transition from Paused → Running.
                 *     Poll GET /sandboxes/{sandboxId} to track progress.
                 */
                202: {
                    headers: {
                        "X-Request-ID": components["headers"]["XRequestId"];
                        [name: string]: unknown;
                    };
                    content?: never;
                };
                401: components["responses"]["Unauthorized"];
                403: components["responses"]["Forbidden"];
                404: components["responses"]["NotFound"];
                409: components["responses"]["Conflict"];
                500: components["responses"]["InternalServerError"];
            };
        };
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/sandboxes/{sandboxId}/renew-expiration": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        get?: never;
        put?: never;
        /**
         * Renew sandbox expiration
         * @description Renew the absolute expiration time of a sandbox.
         */
        post: {
            parameters: {
                query?: never;
                header?: never;
                path: {
                    /** @description Unique sandbox identifier */
                    sandboxId: components["parameters"]["SandboxId"];
                };
                cookie?: never;
            };
            requestBody: {
                content: {
                    "application/json": components["schemas"]["RenewSandboxExpirationRequest"];
                };
            };
            responses: {
                /**
                 * @description Sandbox expiration updated successfully.
                 *
                 *     Returns only the updated expiresAt field.
                 */
                200: {
                    headers: {
                        "X-Request-ID": components["headers"]["XRequestId"];
                        [name: string]: unknown;
                    };
                    content: {
                        "application/json": components["schemas"]["RenewSandboxExpirationResponse"];
                    };
                };
                400: components["responses"]["BadRequest"];
                401: components["responses"]["Unauthorized"];
                403: components["responses"]["Forbidden"];
                404: components["responses"]["NotFound"];
                409: components["responses"]["Conflict"];
                500: components["responses"]["InternalServerError"];
            };
        };
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
    "/sandboxes/{sandboxId}/endpoints/{port}": {
        parameters: {
            query?: never;
            header?: never;
            path?: never;
            cookie?: never;
        };
        /**
         * Get sandbox access endpoint
         * @description Get the public access endpoint URL for accessing a service running on a specific port
         *     within the sandbox. The service must be listening on the specified port inside
         *     the sandbox for the endpoint to be available.
         */
        get: {
            parameters: {
                query?: {
                    /** @description Whether to return a server-proxied URL */
                    use_server_proxy?: boolean;
                };
                header?: never;
                path: {
                    /** @description Unique sandbox identifier */
                    sandboxId: components["parameters"]["SandboxId"];
                    /** @description Port number where the service is listening inside the sandbox */
                    port: number;
                };
                cookie?: never;
            };
            requestBody?: never;
            responses: {
                /**
                 * @description Endpoint retrieved successfully.
                 *
                 *     Returns the public URL for accessing the service on the specified port.
                 */
                200: {
                    headers: {
                        "X-Request-ID": components["headers"]["XRequestId"];
                        [name: string]: unknown;
                    };
                    content: {
                        "application/json": components["schemas"]["Endpoint"];
                    };
                };
                401: components["responses"]["Unauthorized"];
                403: components["responses"]["Forbidden"];
                404: components["responses"]["NotFound"];
                500: components["responses"]["InternalServerError"];
            };
        };
        put?: never;
        post?: never;
        delete?: never;
        options?: never;
        head?: never;
        patch?: never;
        trace?: never;
    };
}
export type webhooks = Record<string, never>;
export interface components {
    schemas: {
        ListSandboxesResponse: {
            items: components["schemas"]["Sandbox"][];
            pagination: components["schemas"]["PaginationInfo"];
        };
        /** @description Pagination metadata for list responses */
        PaginationInfo: {
            /** @description Current page number */
            page: number;
            /** @description Number of items per page */
            pageSize: number;
            /** @description Total number of items matching the filter */
            totalItems: number;
            /** @description Total number of pages */
            totalPages: number;
            /** @description Whether there are more pages after the current one */
            hasNextPage: boolean;
        };
        /** @description Response from creating a new sandbox. Contains essential information without image and updatedAt. */
        CreateSandboxResponse: {
            /** @description Unique sandbox identifier */
            id: string;
            /** @description Current lifecycle status and detailed state information */
            status: components["schemas"]["SandboxStatus"];
            /** @description Custom metadata from creation request */
            metadata?: {
                [key: string]: string;
            };
            /** @description Timestamp when sandbox will auto-terminate. Null when manual cleanup is enabled. */
            expiresAt?: string | null;
            /**
             * Format: date-time
             * @description Sandbox creation timestamp
             */
            createdAt: string;
            /** @description Entry process specification from creation request */
            entrypoint: string[];
        };
        /** @description Runtime execution environment provisioned from a container image */
        Sandbox: {
            /** @description Unique sandbox identifier */
            id: string;
            /**
             * @description Container image specification used to provision this sandbox.
             *     Only present in responses for GET/LIST operations. Not returned in createSandbox response.
             */
            image: components["schemas"]["ImageSpec"];
            /** @description Current lifecycle status and detailed state information */
            status: components["schemas"]["SandboxStatus"];
            /** @description Custom metadata from creation request */
            metadata?: {
                [key: string]: string;
            };
            /**
             * @description The command to execute as the sandbox's entry process.
             *     Always present in responses since entrypoint is required in creation requests.
             */
            entrypoint: string[];
            /** @description Timestamp when sandbox will auto-terminate. Null when manual cleanup is enabled. */
            expiresAt?: string | null;
            /**
             * Format: date-time
             * @description Sandbox creation timestamp
             */
            createdAt: string;
        };
        /**
         * @description High-level lifecycle state of the sandbox.
         *
         *     Common state values:
         *     - Pending: Sandbox is being provisioned
         *     - Running: Sandbox is running and ready to accept requests
         *     - Pausing: Sandbox is in the process of pausing
         *     - Paused: Sandbox has been paused while retaining its state
         *     - Stopping: Sandbox is being terminated
         *     - Terminated: Sandbox has been successfully terminated
         *     - Failed: Sandbox encountered a critical error
         *
         *     State transitions:
         *     - Pending → Running (after creation completes)
         *     - Running → Pausing (when pause is requested)
         *     - Pausing → Paused (pause operation completes)
         *     - Paused → Running (when resume is requested)
         *     - Running/Paused → Stopping (when kill is requested or TTL expires)
         *     - Stopping → Terminated (kill/timeout operation completes)
         *     - Pending/Running/Paused → Failed (on error)
         *
         *     Note: New state values may be added in future versions.
         *     Clients should handle unknown state values gracefully.
         */
        SandboxState: string;
        /** @description Detailed status information with lifecycle state and transition details */
        SandboxStatus: {
            /** @description Current lifecycle state of the sandbox */
            state: components["schemas"]["SandboxState"];
            /**
             * @description Short machine-readable reason code for the current state.
             *     Examples: "user_delete", "ttl_expiry", "provision_timeout", "runtime_error"
             */
            reason?: string;
            /** @description Human-readable message describing the current state or reason for state transition */
            message?: string;
            /**
             * Format: date-time
             * @description Timestamp of the last state transition
             */
            lastTransitionAt?: string;
        };
        /**
         * @description Container image specification for sandbox provisioning.
         *
         *     Supports public registry images and private registry images with authentication.
         */
        ImageSpec: {
            /**
             * @description Container image URI in standard format.
             *
             *     Examples:
             *       - "python:3.11" (Docker Hub)
             *       - "ubuntu:22.04"
             *       - "gcr.io/my-project/model-server:v1.0"
             *       - "private-registry.company.com:5000/app:latest"
             */
            uri: string;
            /** @description Registry authentication credentials (required for private registries) */
            auth?: {
                /** @description Registry username or service account */
                username?: string;
                /** @description Registry password or authentication token */
                password?: string;
            };
        };
        /**
         * @description Request to create a new sandbox from a container image.
         *
         *     **Note**: API Key authentication is required via the `OPEN-SANDBOX-API-KEY` header.
         */
        CreateSandboxRequest: {
            /** @description Container image specification for the sandbox */
            image: components["schemas"]["ImageSpec"];
            /**
             * @description Sandbox timeout in seconds. The sandbox will automatically terminate after this duration.
             *     The maximum is controlled by the server configuration (`server.max_sandbox_timeout_seconds`).
             *     Omit or set null to disable automatic expiration and require explicit cleanup.
             *     Note: manual cleanup support is runtime-dependent; Kubernetes providers may reject
             *     null timeout when the underlying workload provider does not support non-expiring sandboxes.
             */
            timeout?: number | null;
            /**
             * @description Runtime resource constraints for the sandbox instance.
             *     SDK clients should provide sensible defaults (e.g., cpu: "500m", memory: "512Mi").
             */
            resourceLimits: components["schemas"]["ResourceLimits"];
            /**
             * @description Environment variables to inject into the sandbox runtime.
             * @example {
             *       "API_KEY": "secret-key",
             *       "DEBUG": "true",
             *       "LOG_LEVEL": "info"
             *     }
             */
            env?: {
                [key: string]: string;
            };
            /**
             * @description Custom key-value metadata for management, filtering, and tagging.
             *     Use "name" key for a human-readable identifier.
             * @example {
             *       "name": "Data Processing Sandbox",
             *       "project": "data-processing",
             *       "team": "ml",
             *       "environment": "staging"
             *     }
             */
            metadata?: {
                [key: string]: string;
            };
            /**
             * @description The command to execute as the sandbox's entry process (required).
             *
             *     Explicitly specifies the user's expected main process, allowing the sandbox management
             *     service to reliably inject control processes before executing this command.
             *
             *     Format: [executable, arg1, arg2, ...]
             *
             *     Examples:
             *     - ["python", "/app/main.py"]
             *     - ["/bin/bash"]
             *     - ["java", "-jar", "/app/app.jar"]
             *     - ["node", "server.js"]
             * @example [
             *       "python",
             *       "/app/main.py"
             *     ]
             */
            entrypoint: string[];
            /**
             * @description Optional outbound network policy for the sandbox.
             *     Shape matches the sidecar `/policy` endpoint. If omitted or empty,
             *     the sidecar starts in allow-all mode until updated.
             */
            networkPolicy?: components["schemas"]["NetworkPolicy"];
            /**
             * @description Storage mounts for the sandbox. Each volume entry specifies a named backend-specific
             *     storage source and common mount settings. Exactly one backend type must be specified
             *     per volume entry.
             */
            volumes?: components["schemas"]["Volume"][];
            /**
             * @description Opaque container for provider-specific or transient parameters not supported by the core API.
             *
             *     **Note**: This field is reserved for internal features, experimental flags, or temporary behaviors. Standard parameters should be proposed as core API fields.
             *
             *     **Best Practices**:
             *     - **Namespacing**: Use prefixed keys (e.g., `storage.id`) to prevent collisions.
             *     - **Pass-through**: SDKs and middleware must treat this object as opaque and pass it through transparently.
             */
            extensions?: {
                [key: string]: string;
            };
        };
        /**
         * @description Runtime resource constraints as key-value pairs. Similar to Kubernetes resource specifications,
         *     allows flexible definition of resource limits. Common resource types include:
         *     - `cpu`: CPU allocation in millicores (e.g., "250m" for 0.25 CPU cores)
         *     - `memory`: Memory allocation in bytes or human-readable format (e.g., "512Mi", "1Gi")
         *     - `gpu`: Number of GPU devices (e.g., "1")
         *
         *     New resource types can be added without API changes.
         * @example {
         *       "cpu": "500m",
         *       "memory": "512Mi",
         *       "gpu": "1"
         *     }
         */
        ResourceLimits: {
            [key: string]: string;
        };
        RenewSandboxExpirationRequest: {
            /**
             * Format: date-time
             * @description New absolute expiration time in UTC (RFC 3339 format).
             *     Must be in the future and after the current expiresAt time.
             *
             *     Example: "2025-11-16T14:30:45Z"
             */
            expiresAt: string;
        };
        RenewSandboxExpirationResponse: {
            /**
             * Format: date-time
             * @description The new absolute expiration time in UTC (RFC 3339 format).
             *
             *     Example: "2025-11-16T14:30:45Z"
             */
            expiresAt: string;
        };
        /**
         * @description Standard error response for all non-2xx HTTP responses.
         *     HTTP status code indicates the error category; code and message provide details.
         */
        ErrorResponse: {
            /**
             * @description Machine-readable error code (e.g., INVALID_REQUEST, NOT_FOUND, INTERNAL_ERROR).
             *     Use this for programmatic error handling.
             */
            code: string;
            /** @description Human-readable error message describing what went wrong and how to fix it. */
            message: string;
        };
        /**
         * @description Endpoint for accessing a service running in the sandbox.
         *     The service must be listening on the specified port inside the sandbox for the endpoint to be available.
         */
        Endpoint: {
            /**
             * @description Public URL to access the service from outside the sandbox.
             *     Format: {endpoint-host}/sandboxes/{sandboxId}/port/{port}
             *     Example: endpoint.opensandbox.io/sandboxes/abc123/port/8080
             */
            endpoint: string;
            /** @description Requests targeting the sandbox must include the corresponding header(s). */
            headers?: {
                [key: string]: string;
            };
        };
        /**
         * @description Egress network policy matching the sidecar `/policy` request body.
         *     If `defaultAction` is omitted, the sidecar defaults to "deny"; passing an empty
         *     object or null results in allow-all behavior at startup.
         */
        NetworkPolicy: {
            /**
             * @description Default action when no egress rule matches. Defaults to "deny".
             * @enum {string}
             */
            defaultAction?: "allow" | "deny";
            /** @description List of egress rules evaluated in order. */
            egress?: components["schemas"]["NetworkRule"][];
        };
        NetworkRule: {
            /**
             * @description Whether to allow or deny matching targets.
             * @enum {string}
             */
            action: "allow" | "deny";
            /**
             * @description FQDN or wildcard domain (e.g., "example.com", "*.example.com").
             *     IP/CIDR not yet supported in the egress MVP.
             */
            target: string;
        };
        /**
         * @description Storage mount definition for a sandbox. Each volume entry contains:
         *     - A unique name identifier
         *     - Exactly one backend struct (host, pvc, ossfs, etc.) with backend-specific fields
         *     - Common mount settings (mountPath, readOnly, subPath)
         */
        Volume: {
            /**
             * @description Unique identifier for the volume within the sandbox.
             *     Must be a valid DNS label (lowercase alphanumeric, hyphens allowed, max 63 chars).
             */
            name: string;
            host?: components["schemas"]["Host"];
            pvc?: components["schemas"]["PVC"];
            ossfs?: components["schemas"]["OSSFS"];
            /**
             * @description Absolute path inside the container where the volume is mounted.
             *     Must start with '/'.
             */
            mountPath: string;
            /**
             * @description If true, the volume is mounted as read-only. Defaults to false (read-write).
             * @default false
             */
            readOnly: boolean;
            /**
             * @description Optional subdirectory under the backend path to mount.
             *     For `ossfs` backend, this field is used as the bucket prefix.
             *     Must be a relative path without '..' components.
             */
            subPath?: string;
        };
        /**
         * @description Host path bind mount backend. Maps a directory on the host filesystem
         *     into the container. Only available when the runtime supports host mounts.
         *
         *     Security note: Host paths are restricted by server-side allowlist.
         *     Users must specify paths under permitted prefixes.
         */
        Host: {
            /**
             * @description Absolute path on the host filesystem to mount.
             *     Must start with '/' and be under an allowed prefix.
             */
            path: string;
        };
        /**
         * @description Platform-managed named volume backend. A runtime-neutral abstraction
         *     for referencing a pre-existing, platform-managed named volume.
         *
         *     - Kubernetes: maps to a PersistentVolumeClaim in the same namespace.
         *     - Docker: maps to a Docker named volume (created via `docker volume create`).
         *
         *     The volume must already exist on the target platform before sandbox
         *     creation.
         */
        PVC: {
            /**
             * @description Name of the volume on the target platform.
             *     In Kubernetes this is the PVC name; in Docker this is the named
             *     volume name. Must be a valid DNS label.
             */
            claimName: string;
        };
        /**
         * @description Alibaba Cloud OSS mount backend via ossfs.
         *
         *     The runtime mounts a host-side OSS path under `storage.ossfs_mount_root`
         *     and bind-mounts the resolved path into the sandbox container.
         *     Prefix selection is expressed via `Volume.subPath`.
         *     In Docker runtime, OSSFS backend requires OpenSandbox Server to run on a Linux host with FUSE support.
         */
        OSSFS: {
            /** @description OSS bucket name. */
            bucket: string;
            /** @description OSS endpoint (e.g., `oss-cn-hangzhou.aliyuncs.com`). */
            endpoint: string;
            /**
             * @description ossfs major version used by runtime mount integration.
             * @default 2.0
             * @enum {string}
             */
            version: "1.0" | "2.0";
            /**
             * @description Additional ossfs mount options.
             *     Runtime encodes options by `version`:
             *     - `1.0`: mounts with `ossfs ... -o <option>`
             *     - `2.0`: mounts with `ossfs2 mount ... -c <config-file>` and encodes options as `--<option>` lines in the config file
             *     Option values must be provided as raw payloads without leading `-`.
             */
            options?: string[];
            /** @description OSS access key ID for inline credentials mode. */
            accessKeyId: string;
            /** @description OSS access key secret for inline credentials mode. */
            accessKeySecret: string;
        };
    };
    responses: {
        /** @description Error response envelope */
        Error: {
            headers: {
                [name: string]: unknown;
            };
            content: {
                "application/json": components["schemas"]["ErrorResponse"];
            };
        };
        /** @description The request was invalid or malformed */
        BadRequest: {
            headers: {
                "X-Request-ID": components["headers"]["XRequestId"];
                [name: string]: unknown;
            };
            content: {
                "application/json": components["schemas"]["ErrorResponse"];
            };
        };
        /** @description Authentication credentials are missing or invalid */
        Unauthorized: {
            headers: {
                "X-Request-ID": components["headers"]["XRequestId"];
                [name: string]: unknown;
            };
            content: {
                "application/json": components["schemas"]["ErrorResponse"];
            };
        };
        /** @description The authenticated user lacks permission for this operation */
        Forbidden: {
            headers: {
                "X-Request-ID": components["headers"]["XRequestId"];
                [name: string]: unknown;
            };
            content: {
                "application/json": components["schemas"]["ErrorResponse"];
            };
        };
        /** @description The requested resource does not exist */
        NotFound: {
            headers: {
                "X-Request-ID": components["headers"]["XRequestId"];
                [name: string]: unknown;
            };
            content: {
                "application/json": components["schemas"]["ErrorResponse"];
            };
        };
        /** @description The operation conflicts with the current state */
        Conflict: {
            headers: {
                "X-Request-ID": components["headers"]["XRequestId"];
                [name: string]: unknown;
            };
            content: {
                "application/json": components["schemas"]["ErrorResponse"];
            };
        };
        /** @description An unexpected server error occurred */
        InternalServerError: {
            headers: {
                "X-Request-ID": components["headers"]["XRequestId"];
                [name: string]: unknown;
            };
            content: {
                "application/json": components["schemas"]["ErrorResponse"];
            };
        };
    };
    parameters: {
        /** @description Unique sandbox identifier */
        SandboxId: string;
    };
    requestBodies: never;
    headers: {
        /** @description Unique request identifier for tracing */
        XRequestId: string;
        /** @description URI of the newly created or related resource */
        Location: string;
        /** @description Suggested delay in seconds before retrying */
        RetryAfter: number;
    };
    pathItems: never;
}
export type $defs = Record<string, never>;
export type operations = Record<string, never>;


================================================
FILE: sdks/sandbox/javascript/src/config/connection.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import {DEFAULT_USER_AGENT} from "../core/constants.js";

export type ConnectionProtocol = "http" | "https";

/**
 * Options for {@link ConnectionConfig}.
 *
 * Most users only need `domain`, `protocol`, and `apiKey`.
 */
export interface ConnectionConfigOptions {
  /**
   * API server domain (host[:port]) without scheme.
   * Examples:
   * - "localhost:8080"
   * - "api.opensandbox.io"
   *
   * You may also pass a full URL (e.g. "http://localhost:8080" or "https://api.example.com").
   * If the URL includes a path, it will be preserved and `/v1` will be appended automatically.
   */
  domain?: string;
  protocol?: ConnectionProtocol;
  apiKey?: string;
  headers?: Record<string, string>;

  /**
   * Request timeout applied to all SDK HTTP calls (best-effort; wraps fetch).
   * Defaults to 30 seconds.
   */
  requestTimeoutSeconds?: number;
  /**
   * Enable basic debug logging for HTTP requests (best-effort).
   */
  debug?: boolean;
  /**
   * Use sandbox server as proxy for process execd requests.
   * Useful when the client SDK cannot access the created sandbox directly.
   */
  useServerProxy?: boolean;
}

function isNodeRuntime(): boolean {
  const p = (globalThis as any)?.process;
  return !!p?.versions?.node;
}

function redactHeaders(
  headers: Record<string, string>
): Record<string, string> {
  const out: Record<string, string> = { ...headers };
  for (const k of Object.keys(out)) {
    if (k.toLowerCase() === "open-sandbox-api-key") out[k] = "***";
  }
  return out;
}

function readEnv(name: string): string | undefined {
  const env = (globalThis as any)?.process?.env;
  const v = env?.[name];
  return typeof v === "string" && v.length ? v : undefined;
}

function stripTrailingSlashes(s: string): string {
  return s.replace(/\/+$/, "");
}

function stripV1Suffix(s: string): string {
  const trimmed = stripTrailingSlashes(s);
  return trimmed.endsWith("/v1") ? trimmed.slice(0, -3) : trimmed;
}

const DEFAULT_KEEPALIVE_TIMEOUT_MS = 30_000;

function normalizeDomainBase(input: string): {
  protocol?: ConnectionProtocol;
  domainBase: string;
} {
  // Accept a full URL and preserve its path prefix (if any).
  if (input.startsWith("http://") || input.startsWith("https://")) {
    const u = new URL(input);
    const proto = u.protocol === "https:" ? "https" : "http";
    // Keep origin + pathname, drop query/hash.
    const base = `${u.origin}${u.pathname}`;
    return { protocol: proto, domainBase: stripV1Suffix(base) };
  }

  // No scheme: treat as "host[:port]" or "host[:port]/prefix" and normalize trailing "/v1" or "/".
  return { domainBase: stripV1Suffix(input) };
}

function createNodeFetch(): {
  fetch: typeof fetch;
  close: () => Promise<void>;
} {
  if (!isNodeRuntime()) {
    return {
      fetch,
      close: async () => {
        // Browser fetch has no pooled dispatcher to close.
      },
    };
  }

  const baseFetch = fetch;
  let dispatcher: unknown | undefined;
  let dispatcherPromise: Promise<unknown> | null = null;

  const nodeFetch: typeof fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
    dispatcherPromise ??= (async () => {
      try {
        const mod = await import("undici");
        const Agent = (mod as { Agent?: new (...args: any[]) => unknown }).Agent;
        if (!Agent) {
          return undefined;
        }
        dispatcher = new Agent({
          keepAliveTimeout: DEFAULT_KEEPALIVE_TIMEOUT_MS,
          keepAliveMaxTimeout: DEFAULT_KEEPALIVE_TIMEOUT_MS,
        });
        return dispatcher;
      } catch {
        return undefined;
      }
    })();

    if (dispatcherPromise) {
      await dispatcherPromise;
    }

    if (dispatcher) {
      const mergedInit = { ...(init ?? {}), dispatcher } as RequestInit & {
        dispatcher?: unknown;
      };
      return baseFetch(input, mergedInit as RequestInit);
    }

    return baseFetch(input, init);
  };

  return {
    fetch: nodeFetch,
    close: async () => {
      if (dispatcherPromise) {
        await dispatcherPromise.catch(() => undefined);
      }
      if (
        dispatcher &&
        typeof dispatcher === "object" &&
        typeof (dispatcher as any).close === "function"
      ) {
        try {
          await (dispatcher as any).close();
        } catch {
          // swallow close errors
        }
      }
    },
  };
}

function createTimedFetch(opts: {
  baseFetch: typeof fetch;
  timeoutSeconds: number;
  debug: boolean;
  defaultHeaders?: Record<string, string>;
  label: string;
}): typeof fetch {
  const baseFetch = opts.baseFetch;
  const timeoutSeconds = opts.timeoutSeconds;
  const debug = opts.debug;
  const defaultHeaders = opts.defaultHeaders ?? {};
  const label = opts.label;

  return async (input: RequestInfo | URL, init?: RequestInit) => {
    const method = init?.method ?? "GET";
    const url =
      typeof input === "string"
        ? input
        : (input as any)?.toString?.() ?? String(input);

    const ac = new AbortController();
    const timeoutMs = Math.floor(timeoutSeconds * 1000);
    const t =
      Number.isFinite(timeoutMs) && timeoutMs > 0
        ? setTimeout(
            () =>
              ac.abort(
                new Error(
                  `[${label}] Request timed out (timeoutSeconds=${timeoutSeconds})`
                )
              ),
            timeoutMs
          )
        : undefined;

    const onAbort = () =>
      ac.abort((init?.signal as any)?.reason ?? new Error("Aborted"));
    if (init?.signal) {
      if (init.signal.aborted) onAbort();
      else
        init.signal.addEventListener("abort", onAbort, { once: true } as any);
    }

    const mergedInit: RequestInit = {
      ...init,
      signal: ac.signal,
    };

    if (debug) {
      const mergedHeaders = {
        ...defaultHeaders,
        ...((init?.headers ?? {}) as any),
      };
      // eslint-disable-next-line no-console
      console.log(
        `[opensandbox:${label}] ->`,
        method,
        url,
        redactHeaders(mergedHeaders)
      );
    }

    try {
      const res = await baseFetch(input, mergedInit);
      if (debug) {
        // eslint-disable-next-line no-console
        console.log(`[opensandbox:${label}] <-`, method, url, res.status);
      }
      return res;
    } finally {
      if (t) clearTimeout(t);
      if (init?.signal)
        init.signal.removeEventListener("abort", onAbort as any);
    }
  };
}

export class ConnectionConfig {
  readonly protocol: ConnectionProtocol;
  readonly domain: string;
  readonly apiKey?: string;
  readonly headers: Record<string, string>;
  private _fetch: typeof fetch | null;
  private _sseFetch: typeof fetch | null;
  readonly requestTimeoutSeconds: number;
  readonly debug: boolean;
  readonly userAgent: string = DEFAULT_USER_AGENT;
  /**
   * Use sandbox server as proxy for endpoint requests (default false).
   */
  readonly useServerProxy: boolean;
  private _closeTransport: () => Promise<void>;
  private _closePromise: Promise<void> | null = null;
  private _transportInitialized = false;

  /**
   * Create a connection configuration.
   *
   * Environment variables (optional):
   * - `OPEN_SANDBOX_DOMAIN` (default: `localhost:8080`)
   * - `OPEN_SANDBOX_API_KEY`
   */
  constructor(opts: ConnectionConfigOptions = {}) {
    const envDomain = readEnv("OPEN_SANDBOX_DOMAIN");
    const envApiKey = readEnv("OPEN_SANDBOX_API_KEY");

    const rawDomain = opts.domain ?? envDomain ?? "localhost:8080";
    const normalized = normalizeDomainBase(rawDomain);

    // If the domain includes a scheme, it overrides `protocol`.
    this.protocol = normalized.protocol ?? opts.protocol ?? "http";
    this.domain = normalized.domainBase;
    this.apiKey = opts.apiKey ?? envApiKey;
    this.requestTimeoutSeconds =
      typeof opts.requestTimeoutSeconds === "number"
        ? opts.requestTimeoutSeconds
        : 30;
    this.debug = !!opts.debug;
    this.useServerProxy = !!opts.useServerProxy;

    const headers: Record<string, string> = { ...(opts.headers ?? {}) };
    // Attach API key via header unless the user already provided one.
    if (this.apiKey && !headers["OPEN-SANDBOX-API-KEY"]) {
      headers["OPEN-SANDBOX-API-KEY"] = this.apiKey;
    }
    // Best-effort user-agent (Node only).
    if (
      isNodeRuntime() &&
      this.userAgent &&
      !headers["user-agent"] &&
      !headers["User-Agent"]
    ) {
      headers["user-agent"] = this.userAgent;
    }
    this.headers = headers;
    this._fetch = null;
    this._sseFetch = null;
    this._closeTransport = async () => {
      // Init with empty close call
    };
    this._transportInitialized = false;
  }

  get fetch(): typeof fetch {
    return this._fetch ?? fetch;
  }

  get sseFetch(): typeof fetch {
    return this._sseFetch ?? fetch;
  }

  getBaseUrl(): string {
    // If `domain` already contains a scheme, treat it as a full base URL prefix.
    if (
      this.domain.startsWith("http://") ||
      this.domain.startsWith("https://")
    ) {
      return `${stripV1Suffix(this.domain)}/v1`;
    }
    return `${this.protocol}://${stripV1Suffix(this.domain)}/v1`;
  }

  private initializeTransport(): void {
    if (this._transportInitialized) return;

    const { fetch: baseFetch, close } = createNodeFetch();
    this._fetch = createTimedFetch({
      baseFetch,
      timeoutSeconds: this.requestTimeoutSeconds,
      debug: this.debug,
      defaultHeaders: this.headers,
      label: "http",
    });
    this._sseFetch = createTimedFetch({
      baseFetch,
      timeoutSeconds: 0,
      debug: this.debug,
      defaultHeaders: this.headers,
      label: "sse",
    });
    this._closeTransport = close;
    this._transportInitialized = true;
  }
  /**
   * Ensure this configuration has transport helpers (fetch/SSE) allocated.
   *
   * On Node.js this creates a dedicated `undici` dispatcher; on browsers it
   * simply reuses the global fetch. Returns either `this` or a cloned config
   * with the transport initialized.
   */
  withTransportIfMissing(): ConnectionConfig {
    if (this._transportInitialized) {
      return this;
    }

    const clone = new ConnectionConfig({
      domain: this.domain,
      protocol: this.protocol,
      apiKey: this.apiKey,
      headers: { ...this.headers },
      requestTimeoutSeconds: this.requestTimeoutSeconds,
      debug: this.debug,
      useServerProxy: this.useServerProxy,
    });
    clone.initializeTransport();
    return clone;
  }

  /**
   * Close the Node.js agent owned by this configuration.
   */
  async closeTransport(): Promise<void> {
    if (!this._transportInitialized) return;
    this._closePromise ??= this._closeTransport();
    await this._closePromise;
  }
}


================================================
FILE: sdks/sandbox/javascript/src/core/constants.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

export const DEFAULT_EXECD_PORT = 44772;
export const DEFAULT_EGRESS_PORT = 18080;

export const DEFAULT_ENTRYPOINT: string[] = ["tail", "-f", "/dev/null"];

export const DEFAULT_RESOURCE_LIMITS: Record<string, string> = {
  cpu: "1",
  memory: "2Gi",
};

export const DEFAULT_TIMEOUT_SECONDS = 600;
export const DEFAULT_READY_TIMEOUT_SECONDS = 30;
export const DEFAULT_HEALTH_CHECK_POLLING_INTERVAL_MILLIS = 200;

export const DEFAULT_REQUEST_TIMEOUT_SECONDS = 30;
export const DEFAULT_USER_AGENT = "OpenSandbox-JS-SDK/0.1.5";


================================================
FILE: sdks/sandbox/javascript/src/core/exceptions.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

export type SandboxErrorCode =
  | "INTERNAL_UNKNOWN_ERROR"
  | "READY_TIMEOUT"
  | "UNHEALTHY"
  | "INVALID_ARGUMENT"
  | "UNEXPECTED_RESPONSE"
  // Allow server-defined codes as well.
  | (string & {});

/**
 * Structured error payload carried by {@link SandboxException}.
 *
 * - `code`: stable programmatic identifier
 * - `message`: optional human-readable message
 */
export class SandboxError {
  static readonly INTERNAL_UNKNOWN_ERROR: SandboxErrorCode = "INTERNAL_UNKNOWN_ERROR";
  static readonly READY_TIMEOUT: SandboxErrorCode = "READY_TIMEOUT";
  static readonly UNHEALTHY: SandboxErrorCode = "UNHEALTHY";
  static readonly INVALID_ARGUMENT: SandboxErrorCode = "INVALID_ARGUMENT";
  static readonly UNEXPECTED_RESPONSE: SandboxErrorCode = "UNEXPECTED_RESPONSE";

  constructor(
    readonly code: SandboxErrorCode,
    readonly message?: string,
  ) {}
}

interface SandboxExceptionOpts {
  message?: string;
  cause?: unknown;
  error?: SandboxError;
  requestId?: string;
}

/**
 * Base exception class for all SDK errors.
 *
 * All errors thrown by this SDK are subclasses of {@link SandboxException}.
 */
export class SandboxException extends Error {
  readonly name: string = "SandboxException";
  readonly error: SandboxError;
  readonly cause?: unknown;
  readonly requestId?: string;

  constructor(opts: SandboxExceptionOpts = {}) {
    super(opts.message);
    this.cause = opts.cause;
    this.error = opts.error ?? new SandboxError(SandboxError.INTERNAL_UNKNOWN_ERROR);
    this.requestId = opts.requestId;
  }
}

export class SandboxApiException extends SandboxException {
  readonly name: string = "SandboxApiException";
  readonly statusCode?: number;
  readonly rawBody?: unknown;

  constructor(opts: SandboxExceptionOpts & {
    statusCode?: number;
    rawBody?: unknown;
  }) {
    super({
      message: opts.message,
      cause: opts.cause,
      error: opts.error ?? new SandboxError(SandboxError.UNEXPECTED_RESPONSE, opts.message),
      requestId: opts.requestId,
    });
    this.statusCode = opts.statusCode;
    this.rawBody = opts.rawBody;
  }
}

export class SandboxInternalException extends SandboxException {
  readonly name: string = "SandboxInternalException";

  constructor(opts: { message?: string; cause?: unknown }) {
    super({
      message: opts.message,
      cause: opts.cause,
      error: new SandboxError(SandboxError.INTERNAL_UNKNOWN_ERROR, opts.message),
    });
  }
}

export class SandboxUnhealthyException extends SandboxException {
  readonly name: string = "SandboxUnhealthyException";

  constructor(opts: { message?: string; cause?: unknown }) {
    super({
      message: opts.message,
      cause: opts.cause,
      error: new SandboxError(SandboxError.UNHEALTHY, opts.message),
    });
  }
}

export class SandboxReadyTimeoutException extends SandboxException {
  readonly name: string = "SandboxReadyTimeoutException";

  constructor(opts: { message?: string; cause?: unknown }) {
    super({
      message: opts.message,
      cause: opts.cause,
      error: new SandboxError(SandboxError.READY_TIMEOUT, opts.message),
    });
  }
}

export class InvalidArgumentException extends SandboxException {
  readonly name: string = "InvalidArgumentException";

  constructor(opts: { message?: string; cause?: unknown }) {
    super({
      message: opts.message,
      cause: opts.cause,
      error: new SandboxError(SandboxError.INVALID_ARGUMENT, opts.message),
    });
  }
}


================================================
FILE: sdks/sandbox/javascript/src/factory/adapterFactory.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { ConnectionConfig } from "../config/connection.js";
import type { SandboxFiles } from "../services/filesystem.js";
import type { Egress } from "../services/egress.js";
import type { ExecdCommands } from "../services/execdCommands.js";
import type { ExecdHealth } from "../services/execdHealth.js";
import type { ExecdMetrics } from "../services/execdMetrics.js";
import type { Sandboxes } from "../services/sandboxes.js";

export interface CreateLifecycleStackOptions {
  connectionConfig: ConnectionConfig;
  lifecycleBaseUrl: string;
}

export interface LifecycleStack {
  sandboxes: Sandboxes;
}

export interface CreateExecdStackOptions {
  connectionConfig: ConnectionConfig;
  execdBaseUrl: string;
  endpointHeaders?: Record<string, string>;
}

export interface ExecdStack {
  commands: ExecdCommands;
  files: SandboxFiles;
  health: ExecdHealth;
  metrics: ExecdMetrics;
}

export interface CreateEgressStackOptions {
  connectionConfig: ConnectionConfig;
  egressBaseUrl: string;
  endpointHeaders?: Record<string, string>;
}

export interface EgressStack {
  egress: Egress;
}

/**
 * Factory abstraction to keep `Sandbox` and `SandboxManager` decoupled from concrete adapter implementations.
 *
 * This is primarily useful for advanced integrations (custom transports, dependency injection, testing).
 */
export interface AdapterFactory {
  createLifecycleStack(opts: CreateLifecycleStackOptions): LifecycleStack;
  createExecdStack(opts: CreateExecdStackOptions): ExecdStack;
  createEgressStack(opts: CreateEgressStackOptions): EgressStack;
}


================================================
FILE: sdks/sandbox/javascript/src/factory/defaultAdapterFactory.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { createExecdClient } from "../openapi/execdClient.js";
import { createEgressClient } from "../openapi/egressClient.js";
import { createLifecycleClient } from "../openapi/lifecycleClient.js";

import { CommandsAdapter } from "../adapters/commandsAdapter.js";
import { EgressAdapter } from "../adapters/egressAdapter.js";
import { FilesystemAdapter } from "../adapters/filesystemAdapter.js";
import { HealthAdapter } from "../adapters/healthAdapter.js";
import { MetricsAdapter } from "../adapters/metricsAdapter.js";
import { SandboxesAdapter } from "../adapters/sandboxesAdapter.js";

import type {
  AdapterFactory,
  CreateEgressStackOptions,
  CreateExecdStackOptions,
  CreateLifecycleStackOptions,
  EgressStack,
  ExecdStack,
  LifecycleStack,
} from "./adapterFactory.js";

export class DefaultAdapterFactory implements AdapterFactory {
  createLifecycleStack(opts: CreateLifecycleStackOptions): LifecycleStack {
    const lifecycleClient = createLifecycleClient({
      baseUrl: opts.lifecycleBaseUrl,
      apiKey: opts.connectionConfig.apiKey,
      headers: opts.connectionConfig.headers,
      fetch: opts.connectionConfig.fetch,
    });
    const sandboxes = new SandboxesAdapter(lifecycleClient);
    return { sandboxes };
  }

  createExecdStack(opts: CreateExecdStackOptions): ExecdStack {
    const headers: Record<string, string> = {
      ...(opts.connectionConfig.headers ?? {}),
      ...(opts.endpointHeaders ?? {}),
    };
    const execdClient = createExecdClient({
      baseUrl: opts.execdBaseUrl,
      headers,
      fetch: opts.connectionConfig.fetch,
    });

    const health = new HealthAdapter(execdClient);
    const metrics = new MetricsAdapter(execdClient);
    const files = new FilesystemAdapter(execdClient, {
      baseUrl: opts.execdBaseUrl,
      fetch: opts.connectionConfig.fetch,
      headers,
    });
    const commands = new CommandsAdapter(execdClient, {
      baseUrl: opts.execdBaseUrl,
      fetch: opts.connectionConfig.sseFetch,
      headers,
    });

    return {
      commands,
      files,
      health,
      metrics,
    };
  }

  createEgressStack(opts: CreateEgressStackOptions): EgressStack {
    const headers: Record<string, string> = {
      ...(opts.connectionConfig.headers ?? {}),
      ...(opts.endpointHeaders ?? {}),
    };
    const egressClient = createEgressClient({
      baseUrl: opts.egressBaseUrl,
      headers,
      fetch: opts.connectionConfig.fetch,
    });
    return {
      egress: new EgressAdapter(egressClient),
    };
  }
}

export function createDefaultAdapterFactory(): AdapterFactory {
  return new DefaultAdapterFactory();
}


================================================
FILE: sdks/sandbox/javascript/src/index.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

export {
  InvalidArgumentException,
  SandboxApiException,
  SandboxError,
  SandboxException,
  SandboxInternalException,
  SandboxReadyTimeoutException,
  SandboxUnhealthyException,
} from "./core/exceptions.js";

// Factory pattern (stable public interface; does NOT expose OpenAPI generated models).
export type { AdapterFactory } from "./factory/adapterFactory.js";
export { DefaultAdapterFactory, createDefaultAdapterFactory } from "./factory/defaultAdapterFactory.js";

export { ConnectionConfig } from "./config/connection.js";
export type { ConnectionConfigOptions, ConnectionProtocol } from "./config/connection.js";

export type {
  CreateSandboxRequest,
  CreateSandboxResponse,
  Endpoint,
  Host,
  ListSandboxesParams,
  ListSandboxesResponse,
  NetworkPolicy,
  NetworkRule,
  NetworkRuleAction,
  PVC,
  RenewSandboxExpirationRequest,
  RenewSandboxExpirationResponse,
  SandboxId,
  SandboxInfo,
  Volume,
} from "./models/sandboxes.js";

export type { Sandboxes } from "./services/sandboxes.js";

export { SandboxManager } from "./manager.js";
export type { SandboxFilter, SandboxManagerOptions } from "./manager.js";

export type { ExecdHealth } from "./services/execdHealth.js";
export type { ExecdMetrics } from "./services/execdMetrics.js";
export type {
  FileInfo,
  FileMetadata,
  Permission,
  RenameFileItem,
  ReplaceFileContentItem,
  SearchFilesResponse,
  FilesInfoResponse,
} from "./models/filesystem.js";

export type {
  CommandExecution,
  CommandLogs,
  CommandStatus,
  RunCommandOpts,
  ServerStreamEvent,
  CodeContextRequest,
  SupportedLanguage,
  Metrics,
  SandboxMetrics,
  PingResponse,
} from "./models/execd.js";
export type { ExecdCommands } from "./services/execdCommands.js";

export type {
  Execution,
  ExecutionComplete,
  ExecutionError,
  ExecutionHandlers,
  ExecutionInit,
  ExecutionResult,
  OutputMessage,
} from "./models/execution.js";
export { ExecutionEventDispatcher } from "./models/executionEventDispatcher.js";

export {
  DEFAULT_ENTRYPOINT,
  DEFAULT_EGRESS_PORT,
  DEFAULT_EXECD_PORT,
  DEFAULT_RESOURCE_LIMITS,
  DEFAULT_TIMEOUT_SECONDS,
  DEFAULT_READY_TIMEOUT_SECONDS,
  DEFAULT_HEALTH_CHECK_POLLING_INTERVAL_MILLIS,
  DEFAULT_REQUEST_TIMEOUT_SECONDS,
} from "./core/constants.js";

export type {
  SandboxConnectOptions,
  SandboxCreateOptions,
} from "./sandbox.js";
export { Sandbox } from "./sandbox.js";

export type {
  ContentReplaceEntry,
  MoveEntry,
  SearchEntry,
  SetPermissionEntry,
  WriteEntry,
} from "./models/filesystem.js";
export type { SandboxFiles } from "./services/filesystem.js";


================================================
FILE: sdks/sandbox/javascript/src/internal.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
 * INTERNAL / ADVANCED ENTRYPOINT
 *
 * This subpath exposes low-level OpenAPI clients and adapters for advanced integrations.
 * It is intentionally NOT exported from the root entrypoint (`@alibaba-group/opensandbox`),
 * because generated OpenAPI types are not considered stable public API.
 *
 * Import path:
 * - `@alibaba-group/opensandbox/internal`
 */

export { createLifecycleClient } from "./openapi/lifecycleClient.js";
export type { LifecycleClient } from "./openapi/lifecycleClient.js";
export { createExecdClient } from "./openapi/execdClient.js";
export type { ExecdClient } from "./openapi/execdClient.js";

// OpenAPI schema types (NOT stable public API; internal-only).
export type { paths as LifecyclePaths } from "./api/lifecycle.js";
export type { paths as ExecdPaths } from "./api/execd.js";

export { SandboxesAdapter } from "./adapters/sandboxesAdapter.js";
export { HealthAdapter } from "./adapters/healthAdapter.js";
export { MetricsAdapter } from "./adapters/metricsAdapter.js";
export { FilesystemAdapter } from "./adapters/filesystemAdapter.js";
export { CommandsAdapter } from "./adapters/commandsAdapter.js";

================================================
FILE: sdks/sandbox/javascript/src/manager.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { ConnectionConfig, type ConnectionConfigOptions } from "./config/connection.js";
import { createDefaultAdapterFactory } from "./factory/defaultAdapterFactory.js";
import type { AdapterFactory } from "./factory/adapterFactory.js";

import type { ListSandboxesResponse, SandboxId, SandboxInfo } from "./models/sandboxes.js";
import type { Sandboxes } from "./services/sandboxes.js";

export interface SandboxManagerOptions {
  /**
   * Connection configuration for calling the OpenSandbox Lifecycle API.
   */
  connectionConfig?: ConnectionConfig | ConnectionConfigOptions;
  /**
   * Advanced override: inject a custom adapter factory (custom transports, dependency injection).
   */
  adapterFactory?: AdapterFactory;
}

export interface SandboxFilter {
  /**
   * Filter by sandbox lifecycle states.
   */
  states?: string[];
  /**
   * Filter by metadata key-value pairs.
   */
  metadata?: Record<string, string>;
  /**
   * Pagination page number (1-indexed).
   */
  page?: number;
  /**
   * Number of items per page.
   */
  pageSize?: number;
}

/**
 * Administrative interface for managing sandboxes (list/get/pause/resume/kill/renew).
 *
 * For interacting *inside* a sandbox, use {@link Sandbox}.
 */
export class SandboxManager {
  private readonly sandboxes: Sandboxes;
  private readonly connectionConfig: ConnectionConfig;

  private constructor(opts: { sandboxes: Sandboxes; connectionConfig: ConnectionConfig }) {
    this.sandboxes = opts.sandboxes;
    this.connectionConfig = opts.connectionConfig;
  }

  static create(opts: SandboxManagerOptions = {}): SandboxManager {
    const baseConnectionConfig = opts.connectionConfig instanceof ConnectionConfig
      ? opts.connectionConfig
      : new ConnectionConfig(opts.connectionConfig);
    const connectionConfig = baseConnectionConfig.withTransportIfMissing();
    const lifecycleBaseUrl = connectionConfig.getBaseUrl();
    const adapterFactory = opts.adapterFactory ?? createDefaultAdapterFactory();
    let sandboxes: Sandboxes;
    try {
      sandboxes = adapterFactory.createLifecycleStack({
        connectionConfig,
        lifecycleBaseUrl,
      }).sandboxes;
    } catch (err) {
      void connectionConfig.closeTransport().catch(() => undefined);
      throw err;
    }
    return new SandboxManager({ sandboxes, connectionConfig });
  }

  listSandboxInfos(filter: SandboxFilter = {}): Promise<ListSandboxesResponse> {
    return this.sandboxes.listSandboxes({
      states: filter.states,
      metadata: filter.metadata,
      page: filter.page,
      pageSize: filter.pageSize,
    });
  }

  getSandboxInfo(sandboxId: SandboxId): Promise<SandboxInfo> {
    return this.sandboxes.getSandbox(sandboxId);
  }

  killSandbox(sandboxId: SandboxId): Promise<void> {
    return this.sandboxes.deleteSandbox(sandboxId);
  }

  pauseSandbox(sandboxId: SandboxId): Promise<void> {
    return this.sandboxes.pauseSandbox(sandboxId);
  }

  resumeSandbox(sandboxId: SandboxId): Promise<void> {
    return this.sandboxes.resumeSandbox(sandboxId);
  }

  /**
   * Renew expiration by setting expiresAt to now + timeoutSeconds.
   */
  async renewSandbox(sandboxId: SandboxId, timeoutSeconds: number): Promise<void> {
    const expiresAt = new Date(Date.now() + timeoutSeconds * 1000).toISOString();
    await this.sandboxes.renewSandboxExpiration(sandboxId, { expiresAt });
  }

  /**
   * Release the HTTP agent resources allocated for this manager instance.
   *
   * Each manager clone owns a scoped `ConnectionConfig` clone.
   *
   * This mirrors the Python SDK's default transport lifecycle.
   */
  async close(): Promise<void> {
    await this.connectionConfig.closeTransport();
  }
}

================================================
FILE: sdks/sandbox/javascript/src/models/execd.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { Execution } from "./execution.js";

/**
 * Domain models for execd interactions.
 *
 * IMPORTANT:
 * - These are NOT OpenAPI-generated types.
 * - They are intentionally stable and JS-friendly.
 */
export interface ServerStreamEvent extends Record<string, unknown> {
  type:
    | "init"
    | "stdout"
    | "stderr"
    | "result"
    | "execution_count"
    | "execution_complete"
    | "error"
    | string;
  timestamp?: number;
  text?: string;
  results?: Record<string, unknown>;
  error?: Record<string, unknown>;
}

export interface CodeContextRequest extends Record<string, unknown> {
  language: string;
}

export type SupportedLanguage =
  | "python"
  | "go"
  | "javascript"
  | "typescript"
  | "bash"
  | "java";

export interface RunCommandOpts {
  /**
   * Working directory for command execution (maps to API `cwd`).
   */
  workingDirectory?: string;
  /**
   * Run command in detached mode.
   */
  background?: boolean;
  /**
   * Maximum execution time in seconds; server will terminate the command when reached.
   * If omitted, the server will not enforce any timeout.
   */
  timeoutSeconds?: number;
  /**
   * Unix user ID used to run the command process.
   */
  uid?: number;
  /**
   * Unix group ID used to run the command process. Requires `uid`.
   */
  gid?: number;
  /**
   * Environment variables injected into the command process.
   */
  envs?: Record<string, string>;
}

export interface CommandStatus {
  id?: string;
  content?: string;
  running?: boolean;
  exitCode?: number | null;
  error?: string;
  startedAt?: Date;
  finishedAt?: Date | null;
}

export interface CommandLogs {
  content: string;
  cursor?: number;
}

export type CommandExecution = Execution;

export interface Metrics extends Record<string, unknown> {
  cpu_count?: number;
  cpu_used_pct?: number;
  mem_total_mib?: number;
  mem_used_mib?: number;
  timestamp?: number;
}

/**
 * Normalized, JS-friendly metrics.
 */
export interface SandboxMetrics {
  cpuCount: number;
  cpuUsedPercentage: number;
  memoryTotalMiB: number;
  memoryUsedMiB: number;
  timestamp: number;
}

export type PingResponse = Record<string, unknown>;

================================================
FILE: sdks/sandbox/javascript/src/models/execution.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

export interface OutputMessage {
  text: string;
  timestamp: number;
  isError?: boolean;
}

export interface ExecutionResult {
  text?: string;
  timestamp: number;
  /**
   * Raw mime map from execd event (e.g. "text/plain", "text/html", ...)
   */
  raw?: Record<string, unknown>;
}

export interface ExecutionError {
  name: string;
  value: string;
  timestamp: number;
  traceback: string[];
}

export interface ExecutionComplete {
  timestamp: number;
  executionTimeMs: number;
}

export interface ExecutionInit {
  id: string;
  timestamp: number;
}

export interface Execution {
  id?: string;
  executionCount?: number;
  logs: {
    stdout: OutputMessage[];
    stderr: OutputMessage[];
  };
  result: ExecutionResult[];
  error?: ExecutionError;
  complete?: ExecutionComplete;
}

export interface ExecutionHandlers {
  /**
   * Optional low-level hook for every server-sent event (SSE) received.
   * Kept as `unknown` to avoid coupling to a specific OpenAPI schema module.
   */
  onEvent?: (ev: unknown) => void | Promise<void>;
  onStdout?: (msg: OutputMessage) => void | Promise<void>;
  onStderr?: (msg: OutputMessage) => void | Promise<void>;
  onResult?: (res: ExecutionResult) => void | Promise<void>;
  onExecutionComplete?: (c: ExecutionComplete) => void | Promise<void>;
  onError?: (err: ExecutionError) => void | Promise<void>;
  onInit?: (init: ExecutionInit) => void | Promise<void>;
}

================================================
FILE: sdks/sandbox/javascript/src/models/executionEventDispatcher.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { Execution, ExecutionComplete, ExecutionError, ExecutionHandlers, ExecutionInit, ExecutionResult, OutputMessage } from "./execution.js";
import type { ServerStreamEvent } from "./execd.js";

function extractText(results: ServerStreamEvent["results"] | undefined): string | undefined {
  if (!results || typeof results !== "object") return undefined;
  const r = results as any;
  const v = r["text/plain"] ?? r.text ?? r.textPlain;
  return v == null ? undefined : String(v);
}

/**
 * Dispatches streamed execution events to handlers.
 *
 * This mutates the provided `execution` object (appending logs/results and setting fields like
 * `id`, `executionCount`, and `complete`) and invokes optional callbacks in {@link ExecutionHandlers}.
 */
export class ExecutionEventDispatcher {
  constructor(
    private readonly execution: Execution,
    private readonly handlers?: ExecutionHandlers,
  ) {}

  async dispatch(ev: ServerStreamEvent): Promise<void> {
    await this.handlers?.onEvent?.(ev);

    const ts = ev.timestamp ?? Date.now();
    switch (ev.type) {
      case "init": {
        const id = ev.text ?? "";
        if (id) this.execution.id = id;
        const init: ExecutionInit = { id, timestamp: ts };
        await this.handlers?.onInit?.(init);
        return;
      }
      case "stdout": {
        const msg: OutputMessage = { text: ev.text ?? "", timestamp: ts, isError: false };
        this.execution.logs.stdout.push(msg);
        await this.handlers?.onStdout?.(msg);
        return;
      }
      case "stderr": {
        const msg: OutputMessage = { text: ev.text ?? "", timestamp: ts, isError: true };
        this.execution.logs.stderr.push(msg);
        await this.handlers?.onStderr?.(msg);
        return;
      }
      case "result": {
        const r: ExecutionResult = { text: extractText(ev.results), timestamp: ts, raw: ev.results as any };
        this.execution.result.push(r);
        await this.handlers?.onResult?.(r);
        return;
      }
      case "execution_count": {
        const c = (ev as any).execution_count;
        if (typeof c === "number") this.execution.executionCount = c;
        return;
      }
      case "execution_complete": {
        const ms = (ev as any).execution_time;
        const complete: ExecutionComplete = { timestamp: ts, executionTimeMs: typeof ms === "number" ? ms : 0 };
        this.execution.complete = complete;
        await this.handlers?.onExecutionComplete?.(complete);
        return;
      }
      case "error": {
        const e = ev.error as any;
        if (e) {
          const err: ExecutionError = {
            name: String(e.ename ?? e.name ?? ""),
            value: String(e.evalue ?? e.value ?? ""),
            timestamp: ts,
            traceback: Array.isArray(e.traceback) ? e.traceback.map(String) : [],
          };
          this.execution.error = err;
          await this.handlers?.onError?.(err);
        }
        return;
      }
      default:
        return;
    }
  }
}

================================================
FILE: sdks/sandbox/javascript/src/models/filesystem.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
 * Domain models for filesystem.
 *
 * IMPORTANT:
 * - These are NOT OpenAPI-generated types.
 * - They are intentionally stable and JS-friendly.
 */

export interface FileInfo extends Record<string, unknown> {
  path: string;
  size?: number;
  /**
   * Last modification time.
   */
  modifiedAt?: Date;
  /**
   * Creation time.
   */
  createdAt?: Date;
  mode?: number;
  owner?: string;
  group?: string;
}

export interface Permission extends Record<string, unknown> {
  mode: number;
  owner?: string;
  group?: string;
}

export interface FileMetadata extends Record<string, unknown> {
  path: string;
  mode?: number;
  owner?: string;
  group?: string;
}

export interface RenameFileItem extends Record<string, unknown> {
  src: string;
  dest: string;
}

export interface ReplaceFileContentItem extends Record<string, unknown> {
  old: string;
  new: string;
}

export type FilesInfoResponse = Record<string, FileInfo>;

export type SearchFilesResponse = FileInfo[];

// High-level filesystem facade models used by `sandbox.files`.
export interface WriteEntry {
  path: string;
  /**
   * File data to upload.
   *
   * Supports:
   * - string / bytes / Blob (in-memory)
   * - AsyncIterable<Uint8Array> or ReadableStream<Uint8Array> (streaming upload for large files)
   */
  data?: string | Uint8Array | ArrayBuffer | Blob | AsyncIterable<Uint8Array> | ReadableStream<Uint8Array>;
  mode?: number;
  owner?: string;
  group?: string;
}

export interface SearchEntry {
  path: string;
  pattern?: string;
}

export interface MoveEntry {
  src: string;
  dest: string;
}

export interface ContentReplaceEntry {
  path: string;
  oldContent: string;
  newContent: string;
}

export interface SetPermissionEntry {
  path: string;
  mode: number;
  owner?: string;
  group?: string;
}

================================================
FILE: sdks/sandbox/javascript/src/models/sandboxes.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
 * Domain models for sandbox lifecycle.
 *
 * IMPORTANT:
 * - These are NOT OpenAPI-generated types.
 * - They are intentionally stable and JS-friendly.
 *
 * The internal OpenAPI schemas may change frequently; adapters map responses into these models.
 */

export type SandboxId = string;

export interface ImageAuth extends Record<string, unknown> {
  username?: string;
  password?: string;
  token?: string;
}

export interface ImageSpec {
  uri: string;
  auth?: ImageAuth;
}

export type ResourceLimits = Record<string, string>;

export type NetworkRuleAction = "allow" | "deny";

export interface NetworkRule extends Record<string, unknown> {
  /**
   * Whether to allow or deny matching targets.
   */
  action: NetworkRuleAction;
  /**
   * FQDN or wildcard domain (e.g., "example.com", "*.example.com").
   * IP/CIDR is not supported in the egress MVP.
   */
  target: string;
}

export interface NetworkPolicy extends Record<string, unknown> {
  /**
   * Default action when no egress rule matches. Defaults to "deny".
   */
  defaultAction?: NetworkRuleAction;
  /**
   * List of egress rules evaluated in order.
   */
  egress?: NetworkRule[];
}

// ============================================================================
// Volume Models
// ============================================================================

/**
 * Host path bind mount backend.
 *
 * Maps a directory on the host filesystem into the container.
 * Only available when the runtime supports host mounts.
 */
export interface Host extends Record<string, unknown> {
  /**
   * Absolute path on the host filesystem to mount.
   */
  path: string;
}

/**
 * Kubernetes PersistentVolumeClaim mount backend.
 *
 * References an existing PVC in the same namespace as the sandbox pod.
 * Only available in Kubernetes runtime.
 */
export interface PVC extends Record<string, unknown> {
  /**
   * Name of the PersistentVolumeClaim in the same namespace.
   */
  claimName: string;
}

/**
 * Storage mount definition for a sandbox.
 *
 * Each volume entry contains:
 * - A unique name identifier
 * - Exactly one backend (host, pvc) with backend-specific fields
 * - Common mount settings (mountPath, readOnly, subPath)
 */
export interface Volume extends Record<string, unknown> {
  /**
   * Unique identifier for the volume within the sandbox.
   */
  name: string;
  /**
   * Host path bind mount backend (mutually exclusive with pvc).
   */
  host?: Host;
  /**
   * Kubernetes PVC mount backend (mutually exclusive with host).
   */
  pvc?: PVC;
  /**
   * Absolute path inside the container where the volume is mounted.
   */
  mountPath: string;
  /**
   * If true, the volume is mounted as read-only. Defaults to false (read-write).
   */
  readOnly?: boolean;
  /**
   * Optional subdirectory under the backend path to mount.
   */
  subPath?: string;
}

export type SandboxState =
  | "Creating"
  | "Running"
  | "Pausing"
  | "Paused"
  | "Resuming"
  | "Deleting"
  | "Deleted"
  | "Error"
  | string;

export interface SandboxStatus extends Record<string, unknown> {
  state: SandboxState;
  reason?: string;
  message?: string;
}

export interface SandboxInfo extends Record<string, unknown> {
  id: SandboxId;
  image: ImageSpec;
  entrypoint: string[];
  metadata?: Record<string, string>;
  status: SandboxStatus;
  /**
   * Sandbox creation time.
   */
  createdAt: Date;
  /**
   * Sandbox expiration time (server-side TTL).
   */
  expiresAt: Date | null;
}

export interface CreateSandboxRequest extends Record<string, unknown> {
  image: ImageSpec;
  entrypoint: string[];
  /**
   * Timeout in seconds (server semantics).
   */
  timeout?: number | null;
  resourceLimits: ResourceLimits;
  env?: Record<string, string>;
  metadata?: Record<string, string>;
  /**
   * Optional outbound network policy for the sandbox.
   */
  networkPolicy?: NetworkPolicy;
  /**
   * Optional list of volume mounts for persistent storage.
   */
  volumes?: Volume[];
  extensions?: Record<string, unknown>;
}

export interface CreateSandboxResponse extends Record<string, unknown> {
  id: SandboxId;
  status: SandboxStatus;
  metadata?: Record<string, string>;
  /**
   * Sandbox expiration time after creation.
   */
  expiresAt: Date | null;
  /**
   * Sandbox creation time.
   */
  createdAt: Date;
  entrypoint: string[];
}

export interface PaginationInfo extends Record<string, unknown> {
  page: number;
  pageSize: number;
  totalItems: number;
  totalPages: number;
  hasNextPage: boolean;
}

export interface ListSandboxesResponse extends Record<string, unknown> {
  items: SandboxInfo[];
  pagination?: PaginationInfo;
}

export interface RenewSandboxExpirationRequest {
  expiresAt: string;
}

export interface RenewSandboxExpirationResponse extends Record<string, unknown> {
  /**
   * Updated expiration time (if the server returns it).
   */
  expiresAt?: Date;
}

export interface Endpoint extends Record<string, unknown> {
  endpoint: string;
  /**
   * Headers that must be included on every request targeting this endpoint
   * (e.g. when the server requires them for routing or auth). Omit or empty if not required.
   */
  headers?: Record<string, string>;
}

export interface ListSandboxesParams {
  /**
   * Filter by lifecycle state (the API supports multiple `state` query params).
   * Example: `{ states: ["Running", "Paused"] }`
   */
  states?: string[];
  /**
   * Filter by metadata key-value pairs.
   * NOTE: This will be encoded to a single `metadata` query parameter as described in the spec.
   */
  metadata?: Record<string, string>;
  page?: number;
  pageSize?: number;
};


================================================
FILE: sdks/sandbox/javascript/src/openapi/egressClient.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import createClient from "openapi-fetch";
import type { Client } from "openapi-fetch";

import type { paths as EgressPaths } from "../api/egress.js";

export type EgressClient = Client<EgressPaths>;

export interface CreateEgressClientOptions {
  /**
   * Base URL to the sandbox egress sidecar API.
   */
  baseUrl: string;
  /**
   * Extra headers applied to every request.
   */
  headers?: Record<string, string>;
  /**
   * Custom fetch implementation.
   */
  fetch?: typeof fetch;
}

export function createEgressClient(opts: CreateEgressClientOptions): EgressClient {
  const createClientFn =
    (createClient as unknown as { default?: typeof createClient }).default ?? createClient;
  return createClientFn<EgressPaths>({
    baseUrl: opts.baseUrl,
    headers: opts.headers,
    fetch: opts.fetch,
  });
}


================================================
FILE: sdks/sandbox/javascript/src/openapi/execdClient.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import createClient from "openapi-fetch";
import type { Client } from "openapi-fetch";

import type { paths as ExecdPaths } from "../api/execd.js";

export type ExecdClient = Client<ExecdPaths>;

export interface CreateExecdClientOptions {
  /**
   * Base URL to the Execd API (no `/v1` prefix).
   * Examples:
   * - `http://localhost:44772`
   * - `http://api.opensandbox.io/sandboxes/<id>/port/44772`
   */
  baseUrl: string;
  /**
   * Extra headers applied to every request.
   */
  headers?: Record<string, string>;
  /**
   * Custom fetch implementation.
   *
   * Useful for proxies, custom TLS, request tracing, retries, or running in environments
   * where a global `fetch` is not available.
   */
  fetch?: typeof fetch;
}

export function createExecdClient(opts: CreateExecdClientOptions): ExecdClient {
  const createClientFn =
      (createClient as unknown as { default?: typeof createClient }).default ?? createClient;
  return createClientFn<ExecdPaths>({
    baseUrl: opts.baseUrl,
    headers: opts.headers,
    fetch: opts.fetch,
  });
}

================================================
FILE: sdks/sandbox/javascript/src/openapi/lifecycleClient.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import createClient from "openapi-fetch";
import type { Client } from "openapi-fetch";

import type { paths as LifecyclePaths } from "../api/lifecycle.js";

export type LifecycleClient = Client<LifecyclePaths>;

export interface CreateLifecycleClientOptions {
  /**
   * Base URL to OpenSandbox Lifecycle API, including the `/v1` prefix.
   * Example: `http://localhost:8080/v1`
   */
  baseUrl?: string;
  /**
   * API key for `OPEN-SANDBOX-API-KEY` header.
   * If omitted, reads from `process.env.OPEN_SANDBOX_API_KEY` when available.
   */
  apiKey?: string;
  /**
   * Extra headers applied to every request.
   */
  headers?: Record<string, string>;
  /**
   * Custom fetch implementation.
   *
   * Useful for proxies, custom TLS, request tracing, retries, or running in environments
   * where a global `fetch` is not available.
   */
  fetch?: typeof fetch;
}

function readEnvApiKey(): string | undefined {
  // Avoid requiring @types/node by not referencing `process` directly.
  // In Node, `globalThis.process.env` exists; in browsers it won't.
  const env = (globalThis as any)?.process?.env;
  const v = env?.OPEN_SANDBOX_API_KEY;
  return typeof v === "string" && v.length ? v : undefined;
}

export function createLifecycleClient(opts: CreateLifecycleClientOptions = {}): LifecycleClient {
  const apiKey = opts.apiKey ?? readEnvApiKey();

  const headers: Record<string, string> = {
    ...(opts.headers ?? {}),
  };

  if (apiKey && !headers["OPEN-SANDBOX-API-KEY"]) {
    headers["OPEN-SANDBOX-API-KEY"] = apiKey;
  }

  const createClientFn =
      (createClient as unknown as { default?: typeof createClient }).default ?? createClient;
  return createClientFn<LifecyclePaths>({
    baseUrl: opts.baseUrl ?? "http://localhost:8080/v1",
    headers,
    fetch: opts.fetch,
  });
}

================================================
FILE: sdks/sandbox/javascript/src/sandbox.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import {
  DEFAULT_ENTRYPOINT,
  DEFAULT_EGRESS_PORT,
  DEFAULT_EXECD_PORT,
  DEFAULT_HEALTH_CHECK_POLLING_INTERVAL_MILLIS,
  DEFAULT_READY_TIMEOUT_SECONDS,
  DEFAULT_RESOURCE_LIMITS,
  DEFAULT_TIMEOUT_SECONDS,
} from "./core/constants.js";
import { ConnectionConfig, type ConnectionConfigOptions } from "./config/connection.js";
import type { SandboxFiles } from "./services/filesystem.js";
import type { Egress } from "./services/egress.js";
import { createDefaultAdapterFactory } from "./factory/defaultAdapterFactory.js";
import type { AdapterFactory } from "./factory/adapterFactory.js";

import type { Sandboxes } from "./services/sandboxes.js";
import type { ExecdCommands } from "./services/execdCommands.js";
import type { ExecdHealth } from "./services/execdHealth.js";
import type { ExecdMetrics } from "./services/execdMetrics.js";
import type {
  CreateSandboxRequest,
  Endpoint,
  NetworkPolicy,
  NetworkRule,
  RenewSandboxExpirationResponse,
  SandboxId,
  SandboxInfo,
  Volume,
} from "./models/sandboxes.js";
import { SandboxReadyTimeoutException } from "./core/exceptions.js";

export interface SandboxCreateOptions {
  /**
   * Connection configuration for calling the OpenSandbox Lifecycle API and the sandbox's execd API.
   */
  connectionConfig?: ConnectionConfig | ConnectionConfigOptions;
  /**
   * Advanced override: inject a custom adapter factory (custom transports, dependency injection).
   */
  adapterFactory?: AdapterFactory;

  /**
   * Container image uri, e.g. `python:3.11`
   */
  image:
    | string
    | { uri: string; auth?: { username: string; password: string } };

  /**
   * Entrypoint command for the sandbox (defaults to tail -f /dev/null).
   */
  entrypoint?: string[];
  /**
   * Environment variables to inject into the sandbox runtime.
   */
  env?: Record<string, string>;
  /**
   * Custom metadata tags (used for filtering/management).
   */
  metadata?: Record<string, string>;
  /**
   * Optional outbound network policy for the sandbox.
   * If provided without defaultAction, defaults to "deny".
   */
  networkPolicy?: NetworkPolicy;
  /**
   * Optional list of volume mounts for persistent storage.
   * Each volume specifies a backend (host path or PVC) and mount configuration.
   */
  volumes?: Volume[];
  /**
   * Opaque extension parameters passed through to the server as-is.
   */
  extensions?: Record<string, string>;

  /**
   * Resource limits applied to the sandbox container.
   *
   * This is forwarded to the Lifecycle API as `resourceLimits`.
   */
  resource?: Record<string, string>;
  /**
   * Sandbox timeout in seconds. Set to `null` to require explicit cleanup.
   */
  timeoutSeconds?: number | null;

  /**
   * Skip readiness checks during create/connect.
   *
   * When true, the SDK will not wait for lifecycle state `Running` or perform the health check.
   * The returned sandbox instance may not be ready yet.
   */
  skipHealthCheck?: boolean;
  /**
   * Optional custom readiness check used by {@link Sandbox.waitUntilReady}.
   *
   * If provided, the SDK will call this function during readiness checks instead of
   * using the default `execd` ping check.
   */
  healthCheck?: (sbx: Sandbox) => boolean | Promise<boolean>;
  readyTimeoutSeconds?: number;
  healthCheckPollingInterval?: number;
}

export interface SandboxConnectOptions {
  /**
   * Connection configuration for calling the OpenSandbox APIs.
   */
  connectionConfig?: ConnectionConfig | ConnectionConfigOptions;
  /**
   * Advanced override: inject a custom adapter factory (custom transports, dependency injection).
   */
  adapterFactory?: AdapterFactory;
  /**
   * ID of the existing sandbox to connect to.
   */
  sandboxId: SandboxId;

  /**
   * Skip readiness checks after connecting.
   */
  skipHealthCheck?: boolean;
  /**
   * Optional custom readiness check used by {@link Sandbox.waitUntilReady}.
   */
  healthCheck?: (sbx: Sandbox) => boolean | Promise<boolean>;
  /**
   * Max time to wait for readiness.
   */
  readyTimeoutSeconds?: number;
  /**
   * Polling interval for readiness checks (milliseconds).
   */
  healthCheckPollingInterval?: number;
}

function sleep(ms: number): Promise<void> {
  return new Promise((r) => setTimeout(r, ms));
}

function toImageSpec(
  image: SandboxCreateOptions["image"]
): CreateSandboxRequest["image"] {
  if (typeof image === "string") return { uri: image };
  return { uri: image.uri, auth: image.auth };
}

export class Sandbox {
  readonly id: SandboxId;
  readonly connectionConfig: ConnectionConfig;

  /**
   * Lifecycle (sandbox management) service.
   */
  readonly sandboxes: Sandboxes;

  /**
   * Execd services.
   */
  readonly commands: ExecdCommands;
  /**
   * High-level filesystem facade (JS-friendly).
   */
  readonly files: SandboxFiles;
  readonly health: ExecdHealth;
  readonly metrics: ExecdMetrics;

  /**
   * Internal state kept out of the public instance shape.
   *
   * This avoids nominal typing issues when multiple copies of the SDK exist in a dependency graph.
   */
  private static readonly _priv = new WeakMap<
    Sandbox,
    {
      adapterFactory: AdapterFactory;
      lifecycleBaseUrl: string;
      execdBaseUrl: string;
      egress: Egress;
    }
  >();

  private constructor(opts: {
    id: SandboxId;
    connectionConfig: ConnectionConfig;
    adapterFactory: AdapterFactory;
    lifecycleBaseUrl: string;
    execdBaseUrl: string;
    sandboxes: Sandboxes;
    commands: ExecdCommands;
    files: SandboxFiles;
    health: ExecdHealth;
    metrics: ExecdMetrics;
    egress: Egress;
  }) {
    this.id = opts.id;
    this.connectionConfig = opts.connectionConfig;
    Sandbox._priv.set(this, {
      adapterFactory: opts.adapterFactory,
      lifecycleBaseUrl: opts.lifecycleBaseUrl,
      execdBaseUrl: opts.execdBaseUrl,
      egress: opts.egress,
    });

    this.sandboxes = opts.sandboxes;
    this.commands = opts.commands;
    this.files = opts.files;
    this.health = opts.health;
    this.metrics = opts.metrics;
  }

  static async create(opts: SandboxCreateOptions): Promise<Sandbox> {
    const baseConnectionConfig =
      opts.connectionConfig instanceof ConnectionConfig
        ? opts.connectionConfig
        : new ConnectionConfig(opts.connectionConfig);
    const connectionConfig = baseConnectionConfig.withTransportIfMissing();
    const lifecycleBaseUrl = connectionConfig.getBaseUrl();
    const adapterFactory = opts.adapterFactory ?? createDefaultAdapterFactory();

    let sandboxes: Sandboxes;
    try {
      sandboxes = adapterFactory.createLifecycleStack({
        connectionConfig,
        lifecycleBaseUrl,
      }).sandboxes;
    } catch (err) {
      await connectionConfig.closeTransport();
      throw err;
    }

    // Validate volumes: exactly one backend must be specified per volume
    if (opts.volumes) {
      for (const vol of opts.volumes) {
        const backendsSpecified = [vol.host, vol.pvc].filter((b) => b !== undefined).length;
        if (backendsSpecified === 0) {
          throw new Error(
            `Volume '${vol.name}' must specify exactly one backend (host, pvc), but none was provided.`
          );
        }
        if (backendsSpecified > 1) {
          throw new Error(
            `Volume '${vol.name}' must specify exactly one backend (host, pvc), but multiple were provided.`
          );
        }
      }
    }

    const rawTimeout = opts.timeoutSeconds ?? DEFAULT_TIMEOUT_SECONDS;
    const timeoutSeconds =
      opts.timeoutSeconds === null
        ? null
        : Math.floor(rawTimeout);
    if (timeoutSeconds !== null && !Number.isFinite(timeoutSeconds)) {
      throw new Error(
        `timeoutSeconds must be a finite number, got ${opts.timeoutSeconds}`
      );
    }

    const req: CreateSandboxRequest = {
      image: toImageSpec(opts.image),
      entrypoint: opts.entrypoint ?? DEFAULT_ENTRYPOINT,
      resourceLimits: opts.resource ?? DEFAULT_RESOURCE_LIMITS,
      env: opts.env ?? {},
      metadata: opts.metadata ?? {},
      networkPolicy: opts.networkPolicy
        ? {
            ...opts.networkPolicy,
            defaultAction: opts.networkPolicy.defaultAction ?? "deny",
          }
        : undefined,
      volumes: opts.volumes,
      extensions: opts.extensions ?? {},
    };
    if (timeoutSeconds !== null) {
      req.timeout = timeoutSeconds;
    }

    let sandboxId: SandboxId | undefined;
    try {
      const created = await sandboxes.createSandbox(req);
      sandboxId = created.id as SandboxId;

      const endpoint = await sandboxes.getSandboxEndpoint(
        sandboxId,
        DEFAULT_EXECD_PORT,
        connectionConfig.useServerProxy
      );
      const egressEndpoint = await sandboxes.getSandboxEndpoint(
        sandboxId,
        DEFAULT_EGRESS_PORT,
        connectionConfig.useServerProxy
      );
      const execdBaseUrl = `${connectionConfig.protocol}://${endpoint.endpoint}`;
      const egressBaseUrl = `${connectionConfig.protocol}://${egressEndpoint.endpoint}`;

      const { commands, files, health, metrics } =
        adapterFactory.createExecdStack({
          connectionConfig,
          execdBaseUrl,
          endpointHeaders: endpoint.headers,
        });
      const { egress } = adapterFactory.createEgressStack({
        connectionConfig,
        egressBaseUrl,
        endpointHeaders: egressEndpoint.headers,
      });

      const sbx = new Sandbox({
        id: sandboxId,
        connectionConfig,
        adapterFactory,
        lifecycleBaseUrl,
        execdBaseUrl,
        sandboxes,
        commands,
        files,
        health,
        metrics,
        egress,
      });

      if (!(opts.skipHealthCheck ?? false)) {
        await sbx.waitUntilReady({
          readyTimeoutSeconds:
            opts.readyTimeoutSeconds ?? DEFAULT_READY_TIMEOUT_SECONDS,
          pollingIntervalMillis:
            opts.healthCheckPollingInterval ??
            DEFAULT_HEALTH_CHECK_POLLING_INTERVAL_MILLIS,
          healthCheck: opts.healthCheck,
        });
      }

      return sbx;
    } catch (err) {
      if (sandboxId) {
        try {
          await sandboxes.deleteSandbox(sandboxId);
        } catch {
          // Ignore cleanup failure; surface original error.
        }
      }
      await connectionConfig.closeTransport();
      throw err;
    }
  }

  static async connect(opts: SandboxConnectOptions): Promise<Sandbox> {
    const baseConnectionConfig =
      opts.connectionConfig instanceof ConnectionConfig
        ? opts.connectionConfig
        : new ConnectionConfig(opts.connectionConfig);
    const connectionConfig = baseConnectionConfig.withTransportIfMissing();
    const adapterFactory = opts.adapterFactory ?? createDefaultAdapterFactory();
    const lifecycleBaseUrl = connectionConfig.getBaseUrl();

    let sandboxes: Sandboxes;
    try {
      sandboxes = adapterFactory.createLifecycleStack({
        connectionConfig,
        lifecycleBaseUrl,
      }).sandboxes;
    } catch (err) {
      await connectionConfig.closeTransport();
      throw err;
    }

    try {
      const endpoint = await sandboxes.getSandboxEndpoint(
        opts.sandboxId,
        DEFAULT_EXECD_PORT,
        connectionConfig.useServerProxy
      );
      const egressEndpoint = await sandboxes.getSandboxEndpoint(
        opts.sandboxId,
        DEFAULT_EGRESS_PORT,
        connectionConfig.useServerProxy
      );
      const execdBaseUrl = `${connectionConfig.protocol}://${endpoint.endpoint}`;
      const egressBaseUrl = `${connectionConfig.protocol}://${egressEndpoint.endpoint}`;
      const { commands, files, health, metrics } =
        adapterFactory.createExecdStack({
          connectionConfig,
          execdBaseUrl,
          endpointHeaders: endpoint.headers,
        });
      const { egress } = adapterFactory.createEgressStack({
        connectionConfig,
        egressBaseUrl,
        endpointHeaders: egressEndpoint.headers,
      });

      const sbx = new Sandbox({
        id: opts.sandboxId,
        connectionConfig,
        adapterFactory,
        lifecycleBaseUrl,
        execdBaseUrl,
        sandboxes,
        commands,
        files,
        health,
        metrics,
        egress,
      });

      if (!(opts.skipHealthCheck ?? false)) {
        await sbx.waitUntilReady({
          readyTimeoutSeconds:
            opts.readyTimeoutSeconds ?? DEFAULT_READY_TIMEOUT_SECONDS,
          pollingIntervalMillis:
            opts.healthCheckPollingInterval ??
            DEFAULT_HEALTH_CHECK_POLLING_INTERVAL_MILLIS,
          healthCheck: opts.healthCheck,
        });
      }

      return sbx;
    } catch (err) {
      await connectionConfig.closeTransport();
      throw err;
    }
  }

  async getInfo(): Promise<SandboxInfo> {
    return await this.sandboxes.getSandbox(this.id);
  }

  async isHealthy(): Promise<boolean> {
    try {
      return await this.health.ping();
    } catch {
      return false;
    }
  }

  async getMetrics() {
    return await this.metrics.getMetrics();
  }

  async pause(): Promise<void> {
    await this.sandboxes.pauseSandbox(this.id);
  }

  /**
   * Resume a paused sandbox and return a fresh, connected Sandbox instance.
   *
   * After resume, the execd endpoint may change, so this method returns a new
   * {@link Sandbox} instance with a refreshed execd base URL.
   */
  async resume(
    opts: {
      skipHealthCheck?: boolean;
      readyTimeoutSeconds?: number;
      healthCheckPollingInterval?: number;
    } = {}
  ): Promise<Sandbox> {
    await this.sandboxes.resumeSandbox(this.id);
    return await Sandbox.connect({
      sandboxId: this.id,
      connectionConfig: this.connectionConfig,
      adapterFactory: Sandbox._priv.get(this)!.adapterFactory,
      skipHealthCheck: opts.skipHealthCheck ?? false,
      readyTimeoutSeconds: opts.readyTimeoutSeconds,
      healthCheckPollingInterval: opts.healthCheckPollingInterval,
    });
  }

  /**
   * Resume a paused sandbox by id, then connect to its execd endpoint.
   */
  static async resume(opts: SandboxConnectOptions): Promise<Sandbox> {
    const baseConnectionConfig =
      opts.connectionConfig instanceof ConnectionConfig
        ? opts.connectionConfig
        : new ConnectionConfig(opts.connectionConfig);
    const adapterFactory = opts.adapterFactory ?? createDefaultAdapterFactory();
    const resumeConnectionConfig = baseConnectionConfig.withTransportIfMissing();
    const lifecycleBaseUrl = resumeConnectionConfig.getBaseUrl();

    let sandboxes: Sandboxes;
    try {
      sandboxes = adapterFactory.createLifecycleStack({
        connectionConfig: resumeConnectionConfig,
        lifecycleBaseUrl,
      }).sandboxes;
      await sandboxes.resumeSandbox(opts.sandboxId);
    } catch (err) {
      await resumeConnectionConfig.closeTransport();
      throw err;
    }

    await resumeConnectionConfig.closeTransport();
    return await Sandbox.connect({ ...opts, connectionConfig: baseConnectionConfig, adapterFactory });
  }

  async kill(): Promise<void> {
    await this.sandboxes.deleteSandbox(this.id);
  }

  /**
   * Release any client-side resources (e.g. Node.js HTTP agents) owned by this Sandbox instance.
   */
  async close(): Promise<void> {
    await this.connectionConfig.closeTransport();
  }

  /**
   * Renew expiration by setting expiresAt to now + timeoutSeconds.
   */
  async renew(timeoutSeconds: number): Promise<RenewSandboxExpirationResponse> {
    const expiresAt = new Date(
      Date.now() + timeoutSeconds * 1000
    ).toISOString();
    return await this.sandboxes.renewSandboxExpiration(this.id, { expiresAt });
  }

  async getEgressPolicy(): Promise<NetworkPolicy> {
    return await Sandbox._priv.get(this)!.egress.getPolicy();
  }

  async patchEgressRules(rules: NetworkRule[]): Promise<void> {
    await Sandbox._priv.get(this)!.egress.patchRules(rules);
  }

  /**
   * Get sandbox endpoint for a port (STRICT: no scheme), e.g. "localhost:44772" or "domain/route/.../44772".
   */
  async getEndpoint(port: number): Promise<Endpoint> {
    return await this.sandboxes.getSandboxEndpoint(
      this.id,
      port,
      this.connectionConfig.useServerProxy
    );
  }

  /**
   * Get absolute endpoint URL with scheme (convenience for HTTP clients).
   */
  async getEndpointUrl(port: number): Promise<string> {
    const ep = await this.getEndpoint(port);
    return `${this.connectionConfig.protocol}://${ep.endpoint}`;
  }

  async waitUntilReady(opts: {
    readyTimeoutSeconds: number;
    pollingIntervalMillis: number;
    healthCheck?: (sbx: Sandbox) => boolean | Promise<boolean>;
  }): Promise<void> {
    const deadline = Date.now() + opts.readyTimeoutSeconds * 1000;
    let attempt = 0;
    let errorDetail = "Health check returned false continuously.";

    const buildTimeoutMessage = () => {
      const context = `domain=${this.connectionConfig.domain}, useServerProxy=${this.connectionConfig.useServerProxy}`;
      let suggestion =
        "If this sandbox runs in Docker bridge or remote-network mode, consider enabling useServerProxy=true.";
      if (!this.connectionConfig.useServerProxy) {
        suggestion += " You can also configure server-side [docker].host_ip for direct endpoint access.";
      }
      return `Sandbox health check timed out after ${opts.readyTimeoutSeconds}s (${attempt} attempts). ${errorDetail} Connection context: ${context}. ${suggestion}`;
    };

    // Wait until execd becomes reachable and passes health check.
    while (true) {
      if (Date.now() > deadline) {
        throw new SandboxReadyTimeoutException({
          message: buildTimeoutMessage(),
        });
      }
      attempt++;
      try {
        if (opts.healthCheck) {
          const ok = await opts.healthCheck(this);
          if (ok) {
            return;
          }
        } else {
          const ok = await this.health.ping();
          if (ok) {
            return;
          }
        }
        errorDetail = "Health check returned false continuously.";
      } catch (err) {
        const message = err instanceof Error ? err.message : String(err);
        errorDetail = `Last health check error: ${message}`;
      }
      await sleep(opts.pollingIntervalMillis);
    }
  }
}


================================================
FILE: sdks/sandbox/javascript/src/services/egress.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { NetworkPolicy, NetworkRule } from "../models/sandboxes.js";

export interface Egress {
  getPolicy(): Promise<NetworkPolicy>;
  /**
   * Patch egress rules with sidecar merge semantics.
   *
   * Incoming rules take priority over existing rules with the same target.
   * Existing rules for other targets remain unchanged. Within one patch payload,
   * the first rule for a target wins. The current defaultAction is preserved.
   */
  patchRules(rules: NetworkRule[]): Promise<void>;
}


================================================
FILE: sdks/sandbox/javascript/src/services/execdCommands.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { ExecutionHandlers } from "../models/execution.js";
import type {
  CommandExecution,
  CommandLogs,
  CommandStatus,
  RunCommandOpts,
  ServerStreamEvent,
} from "../models/execd.js";

export interface ExecdCommands {
  /**
   * Run a command and stream server events (SSE). This is the lowest-level API.
   */
  runStream(command: string, opts?: RunCommandOpts, signal?: AbortSignal): AsyncIterable<ServerStreamEvent>;

  /**
   * Convenience: run a command, consume the stream, and build a structured execution result.
   */
  run(command: string, opts?: RunCommandOpts, handlers?: ExecutionHandlers, signal?: AbortSignal): Promise<CommandExecution>;

  /**
   * Interrupt the current execution in the given context/session.
   *
   * Note: Execd spec uses `DELETE /command?id=<sessionId>`.
   */
  interrupt(sessionId: string): Promise<void>;

  /**
   * Get the current running status for a command id.
   */
  getCommandStatus(commandId: string): Promise<CommandStatus>;

  /**
   * Get background command logs (non-streamed).
   */
  getBackgroundCommandLogs(commandId: string, cursor?: number): Promise<CommandLogs>;
}

================================================
FILE: sdks/sandbox/javascript/src/services/execdHealth.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

export interface ExecdHealth {
  ping(): Promise<boolean>;
}

================================================
FILE: sdks/sandbox/javascript/src/services/execdMetrics.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { SandboxMetrics } from "../models/execd.js";

export interface ExecdMetrics {
  getMetrics(): Promise<SandboxMetrics>;
}

================================================
FILE: sdks/sandbox/javascript/src/services/filesystem.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type { SearchFilesResponse } from "../models/filesystem.js";
import type {
  ContentReplaceEntry,
  FileInfo,
  MoveEntry,
  SearchEntry,
  SetPermissionEntry,
  WriteEntry,
} from "../models/filesystem.js";

/**
 * High-level filesystem facade (JS-friendly).
 *
 * This interface provides a convenience layer over the underlying execd filesystem API:
 * it offers common operations (read/write/search/move/delete) and supports streaming I/O for large files.
 */
export interface SandboxFiles {
  getFileInfo(paths: string[]): Promise<Record<string, FileInfo>>;
  search(entry: SearchEntry): Promise<SearchFilesResponse>;

  createDirectories(entries: Pick<WriteEntry, "path" | "mode" | "owner" | "group">[]): Promise<void>;
  deleteDirectories(paths: string[]): Promise<void>;

  writeFiles(entries: WriteEntry[]): Promise<void>;
  readFile(path: string, opts?: { encoding?: string; range?: string }): Promise<string>;
  readBytes(path: string, opts?: { range?: string }): Promise<Uint8Array>;
  readBytesStream(path: string, opts?: { range?: string }): AsyncIterable<Uint8Array>;

  deleteFiles(paths: string[]): Promise<void>;
  moveFiles(entries: MoveEntry[]): Promise<void>;
  replaceContents(entries: ContentReplaceEntry[]): Promise<void>;
  setPermissions(entries: SetPermissionEntry[]): Promise<void>;
}

================================================
FILE: sdks/sandbox/javascript/src/services/sandboxes.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import type {
  CreateSandboxRequest,
  CreateSandboxResponse,
  Endpoint,
  ListSandboxesParams,
  ListSandboxesResponse,
  RenewSandboxExpirationRequest,
  RenewSandboxExpirationResponse,
  SandboxId,
  SandboxInfo,
} from "../models/sandboxes.js";

export interface Sandboxes {
  createSandbox(req: CreateSandboxRequest): Promise<CreateSandboxResponse>;
  getSandbox(sandboxId: SandboxId): Promise<SandboxInfo>;
  listSandboxes(params?: ListSandboxesParams): Promise<ListSandboxesResponse>;
  deleteSandbox(sandboxId: SandboxId): Promise<void>;

  pauseSandbox(sandboxId: SandboxId): Promise<void>;
  resumeSandbox(sandboxId: SandboxId): Promise<void>;

  renewSandboxExpiration(
    sandboxId: SandboxId,
    req: RenewSandboxExpirationRequest,
  ): Promise<RenewSandboxExpirationResponse>;

  getSandboxEndpoint(
    sandboxId: SandboxId,
    port: number,
    useServerProxy?: boolean
  ): Promise<Endpoint>;
}


================================================
FILE: sdks/sandbox/javascript/tests/sandbox.create.test.mjs
================================================
import assert from "node:assert/strict";
import test from "node:test";

import {
  DEFAULT_EGRESS_PORT,
  DEFAULT_EXECD_PORT,
  DEFAULT_TIMEOUT_SECONDS,
  Sandbox,
} from "../dist/index.js";

function createAdapterFactory() {
  const recordedRequests = [];
  const endpointCalls = [];
  const egressStackCalls = [];
  const egressService = {
    async getPolicy() {
      return {
        defaultAction: "deny",
        egress: [{ action: "allow", target: "pypi.org" }],
      };
    },
    async patchRules() {},
  };
  const sandboxes = {
    async createSandbox(req) {
      recordedRequests.push(req);
      return { id: "sandbox-test-id", expiresAt: null };
    },
    async getSandbox() {
      throw new Error("not implemented");
    },
    async listSandboxes() {
      throw new Error("not implemented");
    },
    async deleteSandbox() {},
    async pauseSandbox() {},
    async resumeSandbox() {},
    async renewSandboxExpiration() {
      throw new Error("not implemented");
    },
    async getSandboxEndpoint(_sandboxId, port) {
      endpointCalls.push(port);
      return { endpoint: `127.0.0.1:${port}`, headers: { "x-port": String(port) } };
    },
  };

  const adapterFactory = {
    createLifecycleStack() {
      return { sandboxes };
    },
    createExecdStack() {
      return {
        commands: {},
        files: {},
        health: {},
        metrics: {},
      };
    },
    createEgressStack(opts) {
      egressStackCalls.push(opts);
      return { egress: egressService };
    },
  };

  return { adapterFactory, recordedRequests, endpointCalls, egressStackCalls };
}

test("Sandbox.create omits timeout when timeoutSeconds is null", async () => {
  const { adapterFactory, recordedRequests } = createAdapterFactory();

  await Sandbox.create({
    adapterFactory,
    connectionConfig: { domain: "http://127.0.0.1:8080" },
    image: "python:3.12",
    timeoutSeconds: null,
    skipHealthCheck: true,
  });

  assert.equal(recordedRequests.length, 1);
  assert.ok(!Object.hasOwn(recordedRequests[0], "timeout"));
});

test("Sandbox.create floors finite timeoutSeconds", async () => {
  const { adapterFactory, recordedRequests } = createAdapterFactory();

  await Sandbox.create({
    adapterFactory,
    connectionConfig: { domain: "http://127.0.0.1:8080" },
    image: "python:3.12",
    timeoutSeconds: 61.9,
    skipHealthCheck: true,
  });

  assert.equal(recordedRequests.length, 1);
  assert.equal(recordedRequests[0].timeout, 61);
});

test("Sandbox.create uses the default timeout when timeoutSeconds is undefined", async () => {
  const { adapterFactory, recordedRequests } = createAdapterFactory();

  await Sandbox.create({
    adapterFactory,
    connectionConfig: { domain: "http://127.0.0.1:8080" },
    image: "python:3.12",
    skipHealthCheck: true,
  });

  assert.equal(recordedRequests.length, 1);
  assert.equal(recordedRequests[0].timeout, DEFAULT_TIMEOUT_SECONDS);
});

test("Sandbox.create rejects non-finite timeoutSeconds", async () => {
  for (const timeoutSeconds of [Number.NaN, Number.POSITIVE_INFINITY, Number.NEGATIVE_INFINITY]) {
    const { adapterFactory } = createAdapterFactory();
    await assert.rejects(
      Sandbox.create({
        adapterFactory,
        connectionConfig: { domain: "http://127.0.0.1:8080" },
        image: "python:3.12",
        timeoutSeconds,
        skipHealthCheck: true,
      }),
      /timeoutSeconds must be a finite number/
    );
  }
});

test("Sandbox creates and reuses egress service during sandbox lifecycle", async () => {
  const { adapterFactory, endpointCalls, egressStackCalls } = createAdapterFactory();

  const sandbox = await Sandbox.create({
    adapterFactory,
    connectionConfig: { domain: "http://127.0.0.1:8080" },
    image: "python:3.12",
    skipHealthCheck: true,
  });

  await sandbox.getEgressPolicy();
  await sandbox.patchEgressRules([{ action: "allow", target: "www.github.com" }]);

  assert.deepEqual(endpointCalls, [DEFAULT_EXECD_PORT, DEFAULT_EGRESS_PORT]);
  assert.equal(egressStackCalls.length, 1);
  assert.equal(egressStackCalls[0].egressBaseUrl, `http://127.0.0.1:${DEFAULT_EGRESS_PORT}`);
  assert.deepEqual(egressStackCalls[0].endpointHeaders, { "x-port": String(DEFAULT_EGRESS_PORT) });
});


================================================
FILE: sdks/sandbox/javascript/tsconfig.json
================================================
{
  "extends": "../../tsconfig.base.json",
  "compilerOptions": {
    "outDir": "dist",
    "rootDir": "src"
  },
  "include": ["src"],
  "exclude": ["node_modules", "dist", "**/*.test.ts"]
}

================================================
FILE: sdks/sandbox/javascript/tsup.config.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { defineConfig } from "tsup";

const entries = ["src/index.ts", "src/internal.ts"];

export default defineConfig([
  {
    entry: entries,
    format: ["esm"],
    dts: true,
    outDir: "dist",
    clean: true,
    sourcemap: true,
    target: "es2022",
  },
  {
    entry: entries,
    format: ["cjs"],
    outDir: "dist/cjs",
    clean: false,
    sourcemap: true,
    target: "es2022",
    outExtension: () => ({ js: ".cjs" }),
  },
]);


================================================
FILE: sdks/sandbox/kotlin/LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1.  Definitions.

    "License" shall mean the terms and conditions for use, reproduction,
    and distribution as defined by Sections 1 through 9 of this document.

    "Licensor" shall mean the copyright owner or entity authorized by
    the copyright owner that is granting the License.

    "Legal Entity" shall mean the union of the acting entity and all
    other entities that control, are controlled by, or are under common
    control with that entity. For the purposes of this definition,
    "control" means (i) the power, direct or indirect, to cause the
    direction or management of such entity, whether by contract or
    otherwise, or (ii) ownership of fifty percent (50%) or more of the
    outstanding shares, or (iii) beneficial ownership of such entity.

    "You" (or "Your") shall mean an individual or Legal Entity
    exercising permissions granted by this License.

    "Source" form shall mean the preferred form for making modifications,
    including but not limited to software source code, documentation
    source, and configuration files.

    "Object" form shall mean any form resulting from mechanical
    transformation or translation of a Source form, including but
    not limited to compiled object code, generated documentation,
    and conversions to other media types.

    "Work" shall mean the work of authorship, whether in Source or
    Object form, made available under the License, as indicated by a
    copyright notice that is included in or attached to the work
    (an example is provided in the Appendix below).

    "Derivative Works" shall mean any work, whether in Source or Object
    form, that is based on (or derived from) the Work and for which the
    editorial revisions, annotations, elaborations, or other modifications
    represent, as a whole, an original work of authorship. For the purposes
    of this License, Derivative Works shall not include works that remain
    separable from, or merely link (or bind by name) to the interfaces of,
    the Work and Derivative Works thereof.

    "Contribution" shall mean any work of authorship, including
    the original version of the Work and any modifications or additions
    to that Work or Derivative Works thereof, that is intentionally
    submitted to Licensor for inclusion in the Work by the copyright owner
    or by an individual or Legal Entity authorized to submit on behalf of
    the copyright owner. For the purposes of this definition, "submitted"
    means any form of electronic, verbal, or written communication sent
    to the Licensor or its representatives, including but not limited to
    communication on electronic mailing lists, source code control systems,
    and issue tracking systems that are managed by, or on behalf of, the
    Licensor for the purpose of discussing and improving the Work, but
    excluding communication that is conspicuously marked or otherwise
    designated in writing by the copyright owner as "Not a Contribution."

    "Contributor" shall mean Licensor and any individual or Legal Entity
    on behalf of whom a Contribution has been received by Licensor and
    subsequently incorporated within the Work.

2.  Grant of Copyright License. Subject to the terms and conditions of
    this License, each Contributor hereby grants to You a perpetual,
    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
    copyright license to reproduce, prepare Derivative Works of,
    publicly display, publicly perform, sublicense, and distribute the
    Work and such Derivative Works in Source or Object form.

3.  Grant of Patent License. Subject to the terms and conditions of
    this License, each Contributor hereby grants to You a perpetual,
    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
    (except as stated in this section) patent license to make, have made,
    use, offer to sell, sell, import, and otherwise transfer the Work,
    where such license applies only to those patent claims licensable
    by such Contributor that are necessarily infringed by their
    Contribution(s) alone or by combination of their Contribution(s)
    with the Work to which such Contribution(s) was submitted. If You
    institute patent litigation against any entity (including a
    cross-claim or counterclaim in a lawsuit) alleging that the Work
    or a Contribution incorporated within the Work constitutes direct
    or contributory patent infringement, then any patent licenses
    granted to You under this License for that Work shall terminate
    as of the date such litigation is filed.

4.  Redistribution. You may reproduce and distribute copies of the
    Work or Derivative Works thereof in any medium, with or without
    modifications, and in Source or Object form, provided that You
    meet the following conditions:

    (a) You must give any other recipients of the Work or
    Derivative Works a copy of this License; and

    (b) You must cause any modified files to carry prominent notices
    stating that You changed the files; and

    (c) You must retain, in the Source form of any Derivative Works
    that You distribute, all copyright, patent, trademark, and
    attribution notices from the Source form of the Work,
    excluding those notices that do not pertain to any part of
    the Derivative Works; and

    (d) If the Work includes a "NOTICE" text file as part of its
    distribution, then any Derivative Works that You distribute must
    include a readable copy of the attribution notices contained
    within such NOTICE file, excluding those notices that do not
    pertain to any part of the Derivative Works, in at least one
    of the following places: within a NOTICE text file distributed
    as part of the Derivative Works; within the Source form or
    documentation, if provided along with the Derivative Works; or,
    within a display generated by the Derivative Works, if and
    wherever such third-party notices normally appear. The contents
    of the NOTICE file are for informational purposes only and
    do not modify the License. You may add Your own attribution
    notices within Derivative Works that You distribute, alongside
    or as an addendum to the NOTICE text from the Work, provided
    that such additional attribution notices cannot be construed
    as modifying the License.

    You may add Your own copyright statement to Your modifications and
    may provide additional or different license terms and conditions
    for use, reproduction, or distribution of Your modifications, or
    for any such Derivative Works as a whole, provided Your use,
    reproduction, and distribution of the Work otherwise complies with
    the conditions stated in this License.

5.  Submission of Contributions. Unless You explicitly state otherwise,
    any Contribution intentionally submitted for inclusion in the Work
    by You to the Licensor shall be under the terms and conditions of
    this License, without any additional terms or conditions.
    Notwithstanding the above, nothing herein shall supersede or modify
    the terms of any separate license agreement you may have executed
    with Licensor regarding such Contributions.

6.  Trademarks. This License does not grant permission to use the trade
    names, trademarks, service marks, or product names of the Licensor,
    except as required for reasonable and customary use in describing the
    origin of the Work and reproducing the content of the NOTICE file.

7.  Disclaimer of Warranty. Unless required by applicable law or
    agreed to in writing, Licensor provides the Work (and each
    Contributor provides its Contributions) on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
    implied, including, without limitation, any warranties or conditions
    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
    PARTICULAR PURPOSE. You are solely responsible for determining the
    appropriateness of using or redistributing the Work and assume any
    risks associated with Your exercise of permissions under this License.

8.  Limitation of Liability. In no event and under no legal theory,
    whether in tort (including negligence), contract, or otherwise,
    unless required by applicable law (such as deliberate and grossly
    negligent acts) or agreed to in writing, shall any Contributor be
    liable to You for damages, including any direct, indirect, special,
    incidental, or consequential damages of any character arising as a
    result of this License or out of the use or inability to use the
    Work (including but not limited to damages for loss of goodwill,
    work stoppage, computer failure or malfunction, or any and all
    other commercial damages or losses), even if such Contributor
    has been advised of the possibility of such damages.

9.  Accepting Warranty or Additional Liability. While redistributing
    the Work or Derivative Works thereof, You may choose to offer,
    and charge a fee for, acceptance of support, warranty, indemnity,
    or other liability obligations and/or rights consistent with this
    License. However, in accepting such obligations, You may act only
    on Your own behalf and on Your sole responsibility, not on behalf
    of any other Contributor, and only if You agree to indemnify,
    defend, and hold each Contributor harmless for any liability
    incurred by, or claims asserted against, such Contributor by reason
    of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: sdks/sandbox/kotlin/README.md
================================================
# Alibaba Sandbox SDK for Kotlin

English | [中文](README_zh.md)

A Kotlin SDK for low-level interaction with OpenSandbox. It provides capabilities to create, manage, and interact with secure sandbox environments, including executing shell commands, managing files, and monitoring resources.

## Installation

### Gradle (Kotlin DSL)

```kotlin
dependencies {
    implementation("com.alibaba.opensandbox:sandbox:{latest_version}")
}
```

### Maven

```xml
<dependency>
    <groupId>com.alibaba.opensandbox</groupId>
    <artifactId>sandbox</artifactId>
    <version>{latest_version}</version>
</dependency>
```

## Quick Start

The following example shows how to create a sandbox and execute a shell command.

> **Note**: Before running this example, ensure the OpenSandbox service is running. See the root [README.md](../../../README.md) for startup instructions.

```java
import com.alibaba.opensandbox.sandbox.Sandbox;
import com.alibaba.opensandbox.sandbox.config.ConnectionConfig;
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException;
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.Execution;

public class QuickStart {
    public static void main(String[] args) {
        // 1. Configure connection
        ConnectionConfig config = ConnectionConfig.builder()
            .domain("api.opensandbox.io")
            .apiKey("your-api-key")
            .build();

        // 2. Create a Sandbox using try-with-resources
        try (Sandbox sandbox = Sandbox.builder()
                .connectionConfig(config)
                .image("ubuntu")
                .build()) {

            // 3. Execute a shell command
            Execution execution = sandbox
                    .commands()
                    .run("echo 'Hello Sandbox!'");

            // 4. Print output
            System.out.println(execution.getLogs().getStdout().get(0).getText());

            // 5. Cleanup (sandbox.close() called automatically)
            // Note: kill() must be called explicitly if you want to terminate the remote sandbox instance immediately
            sandbox.kill();
        } catch (SandboxException e) {
            // Handle Sandbox specific exceptions
            System.err.println("Sandbox Error: [" + e.getError().getCode() + "] " + e.getError().getMessage());
            System.err.println("Request ID: " + e.getRequestId());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
```

## Usage Examples

### 1. Lifecycle Management

Manage the sandbox lifecycle, including renewal, pausing, and resuming.

```java
// Renew the sandbox
// This resets the expiration time to (current time + duration)
sandbox.renew(Duration.ofMinutes(30));

// Pause execution (suspends all processes)
sandbox.pause();

// Resume execution
sandbox.resume();

// Get current status
SandboxInfo info = sandbox.getInfo();
System.out.println("State: " + info.getStatus().getState());
System.out.println("Expires: " + info.getExpiresAt()); // null when manual cleanup mode is used
```

Create a non-expiring sandbox by passing `timeout(null)`:

```java
Sandbox manual = Sandbox.builder()
    .connectionConfig(config)
    .image("ubuntu")
    .timeout(null)
    .build();
```

### 2. Custom Health Check

Define custom logic to determine if the sandbox is healthy. This overrides the default ping check.

```java
Sandbox sandbox = Sandbox.builder()
    .connectionConfig(config)
    .image("nginx:latest")
    // Custom check: Wait for port 80 to be accessible
    .healthCheck(sbx -> {
        try {
            // 1. Get the external mapped address for port 80
            SandboxEndpoint endpoint = sbx.getEndpoint(80);

            // 2. Perform your connection check (e.g. HTTP request, Socket connect)
            // return checkConnection(endpoint.getEndpoint());
            return true;
        } catch (Exception e) {
            return false;
        }
    })
    .build();
```

### 3. Command Execution & Streaming

Execute commands and handle output streams in real-time.

```java
// Create handlers for streaming output
ExecutionHandlers handlers = ExecutionHandlers.builder()
    .onStdout(msg -> System.out.println("STDOUT: " + msg.getText()))
    .onStderr(msg -> System.err.println("STDERR: " + msg.getText()))
    .onExecutionComplete(complete ->
        System.out.println("Command finished in " + complete.getExecutionTimeInMillis() + "ms")
    )
    .build();

// Execute command with handlers
RunCommandRequest request = RunCommandRequest.builder()
    .command("for i in {1..5}; do echo \"Count $i\"; sleep 0.5; done")
    .handlers(handlers)
    .build();

sandbox.commands().run(request);
```

### 4. Comprehensive File Operations

Manage files and directories, including read, write, list, delete, and search.

```java
// 1. Write file
sandbox.files().write(List.of(
    WriteEntry.builder()
        .path("/tmp/hello.txt")
        .data("Hello World")
        .mode(644)
        .build()
));

// 2. Read file
String content = sandbox.files().readFile("/tmp/hello.txt", "UTF-8", null);
System.out.println("Content: " + content);

// 3. List/Search files
List<EntryInfo> files = sandbox.files().search(
    SearchEntry.builder()
        .path("/tmp")
        .pattern("*.txt")
        .build()
);
files.forEach(f -> System.out.println("Found: " + f.getPath()));

// 4. Delete file
sandbox.files().deleteFiles(List.of("/tmp/hello.txt"));
```

### 5. Sandbox Management (Admin)

Use `SandboxManager` for administrative tasks and finding existing sandboxes.

```java
SandboxManager manager = SandboxManager.builder()
    .connectionConfig(config)
    .build();

import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxState;

// ...

// List running sandboxes
PagedSandboxInfos sandboxes = manager.listSandboxInfos(
    SandboxFilter.builder()
        .states(SandboxState.RUNNING)
        .pageSize(10)
        .page(1)
        .build()
);

sandboxes.getSandboxInfos().forEach(info -> {
    System.out.println("Found sandbox: " + info.getId());
    // Perform admin actions
    manager.killSandbox(info.getId());
});

// Try-with-resources will automatically call manager.close()
// manager.close();
```

## Configuration

### 1. Connection Configuration

The `ConnectionConfig` class manages API server connection settings.

| Parameter        | Description                                | Default                      | Environment Variable   |
| ---------------- | ------------------------------------------ | ---------------------------- | ---------------------- |
| `apiKey`         | API Key for authentication                 | Required                     | `OPEN_SANDBOX_API_KEY` |
| `domain`         | The endpoint domain of the sandbox service | Required (or localhost:8080) | `OPEN_SANDBOX_DOMAIN`  |
| `protocol`       | HTTP protocol (http/https)                 | `http`                       | -                      |
| `requestTimeout` | Timeout for API requests                   | 30 seconds                   | -                      |
| `debug`          | Enable debug logging for HTTP requests     | `false`                      | -                      |
| `headers`        | Custom HTTP headers                        | Empty                        | -                      |
| `connectionPool` | Shared OKHttp ConnectionPool               | SDK-created per instance     | -                      |
| `useServerProxy` | Use sandbox server as proxy for execd/endpoint requests (e.g. when client cannot reach the sandbox directly) | `false` | -                      |

```java
// 1. Basic configuration
ConnectionConfig config = ConnectionConfig.builder()
    .apiKey("your-key")
    .domain("api.opensandbox.io")
    .requestTimeout(Duration.ofSeconds(60))
    .build();

// 2. Advanced: Shared Connection Pool
// If you create many Sandbox instances, sharing a connection pool is recommended to save resources.
// SDK default keep-alive is 30 seconds for its own pools.
ConnectionPool sharedPool = new ConnectionPool(50, 30, TimeUnit.SECONDS);

ConnectionConfig sharedConfig = ConnectionConfig.builder()
    .apiKey("your-key")
    .domain("api.opensandbox.io")
    .headers(Map.of(
        "X-Custom-Header", "value",
        "X-Request-ID", "trace-123"
    ))
    .connectionPool(sharedPool) // Inject shared pool
    .build();
```

### 2. Sandbox Creation Configuration

The `Sandbox.builder()` allows configuring the sandbox environment.

| Parameter      | Description                              | Default                         |
| -------------- | ---------------------------------------- | ------------------------------- |
| `image`        | Docker image to use                      | Required                        |
| `timeout`      | Automatic termination timeout            | 10 minutes                      |
| `entrypoint`   | Container entrypoint command             | `["tail", "-f", "/dev/null"]`   |
| `resource`     | CPU and memory limits                    | `{"cpu": "1", "memory": "2Gi"}` |
| `env`          | Environment variables                    | Empty                           |
| `metadata`     | Custom metadata tags                     | Empty                           |
| `networkPolicy` | Optional outbound network policy (egress) | -                             |
| `readyTimeout` | Max time to wait for sandbox to be ready | 30 seconds                      |

Note: metadata keys under `opensandbox.io/` are reserved for system-managed
labels and will be rejected by the server.

```java
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy;
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkRule;

Sandbox sandbox = Sandbox.builder()
    .connectionConfig(config)
    .image("python:3.11")
    .timeout(Duration.ofMinutes(30))
    .resource(map -> {
        map.put("cpu", "2");
        map.put("memory", "4Gi");
    })
    .env("PYTHONPATH", "/app")
    .metadata("project", "demo")
    .networkPolicy(
        NetworkPolicy.builder()
            .defaultAction(NetworkPolicy.DefaultAction.DENY)
            .addEgress(
                NetworkRule.builder()
                    .action(NetworkRule.Action.ALLOW)
                    .target("pypi.org")
                    .build()
            )
            .build()
    )
    .build();
```

### 3. Runtime Egress Policy Updates

Runtime egress reads and patches go directly to the sandbox egress sidecar.
The SDK first resolves the sandbox endpoint on port `18080`, then calls the sidecar `/policy` API.

Patch uses merge semantics:
- Incoming rules take priority over existing rules with the same `target`.
- Existing rules for other targets remain unchanged.
- Within a single patch payload, the first rule for a `target` wins.
- The current `defaultAction` is preserved.

```java
NetworkPolicy policy = sandbox.getEgressPolicy();

sandbox.patchEgressRules(
    List.of(
        NetworkRule.builder().action(NetworkRule.Action.ALLOW).target("www.github.com").build(),
        NetworkRule.builder().action(NetworkRule.Action.DENY).target("pypi.org").build()
    )
);
```


================================================
FILE: sdks/sandbox/kotlin/README_zh.md
================================================
# Alibaba Sandbox SDK for Kotlin

中文 | [English](README.md)

用于与 OpenSandbox 进行底层交互的 Kotlin SDK。它提供了创建、管理和与安全沙箱环境交互的能力，包括执行 Shell 命令、管理文件和监控资源。

## 安装指南

### Gradle (Kotlin DSL)

```kotlin
dependencies {
    implementation("com.alibaba.opensandbox:sandbox:{latest_version}")
}
```

### Maven

```xml
<dependency>
    <groupId>com.alibaba.opensandbox</groupId>
    <artifactId>sandbox</artifactId>
    <version>{latest_version}</version>
</dependency>
```

## 快速开始

以下示例展示了如何创建一个沙箱并执行 Shell 命令。

> **注意**: 在运行此示例之前，请确保 OpenSandbox 服务已启动。服务启动请参考根目录的 [README_zh.md](../../../docs/README_zh.md)。

```java
import com.alibaba.opensandbox.sandbox.Sandbox;
import com.alibaba.opensandbox.sandbox.config.ConnectionConfig;
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException;
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.Execution;
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.RunCommandRequest;

public class QuickStart {
    public static void main(String[] args) {
        // 1. 配置连接信息
        ConnectionConfig config = ConnectionConfig.builder()
            .domain("api.opensandbox.io")
            .apiKey("your-api-key")
            .build();

        // 2. 使用 try-with-resources 创建 Sandbox
        try (Sandbox sandbox = Sandbox.builder()
                .connectionConfig(config)
                .image("ubuntu")
                .build()) {

            // 3. 执行 Shell 命令
            Execution execution = sandbox
                    .commands()
                    .run("echo 'Hello Sandbox!'");

            // 4. 打印输出
            System.out.println(execution.getLogs().getStdout().get(0).getText());

            // 5. 清理资源 (自动调用 sandbox.close())
            // 注意: 如果希望立即终止远程沙箱实例，仍需显式调用 kill()
            sandbox.kill();
        } catch (SandboxException e) {
            // 处理 Sandbox 特定异常
            System.err.println("沙箱错误: [" + e.getError().getCode() + "] " + e.getError().getMessage());
            System.err.println("Request ID: " + e.getRequestId());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
```

## 核心功能示例

### 1. 生命周期管理

管理沙箱的生命周期，包括续期、暂停、恢复和状态查询。

```java
// 续期沙箱
// 此操作将沙箱的过期时间重置为 (当前时间 + duration)
sandbox.renew(Duration.ofMinutes(30));

// 暂停执行 (挂起所有进程)
sandbox.pause();

// 恢复执行
sandbox.resume();

// 获取当前状态
SandboxInfo info = sandbox.getInfo();
System.out.println("当前状态: " + info.getStatus().getState());
System.out.println("过期时间: " + info.getExpiresAt()); // 使用手动清理模式时为 null
```

通过传入 `timeout(null)` 创建一个不会自动过期的沙箱：

```java
Sandbox manual = Sandbox.builder()
    .connectionConfig(config)
    .image("ubuntu")
    .timeout(null)
    .build();
```

### 2. 自定义健康检查

定义自定义逻辑来判断沙箱是否健康。这可以覆盖默认的 Ping 检查。

```java
Sandbox sandbox = Sandbox.builder()
    .connectionConfig(config)
    .image("nginx:latest")
    // 自定义检查：等待 80 端口可访问
    .healthCheck(sb -> {
        try {
            // 1. 获取沙箱 80 端口映射的外部访问地址
            SandboxEndpoint endpoint = sb.getEndpoint(80);

            // 2. 执行你的连接检查逻辑 (例如 HTTP 请求, Socket 连接等)
            // return checkConnection(endpoint.getEndpoint());
            return true;
        } catch (Exception e) {
            return false;
        }
    })
    .build();
```

### 3. 命令执行与流式响应

执行命令并实时处理输出流。

```java
// 创建流式输出处理器
ExecutionHandlers handlers = ExecutionHandlers.builder()
    .onStdout(msg -> System.out.println("STDOUT: " + msg.getText()))
    .onStderr(msg -> System.err.println("STDERR: " + msg.getText()))
    .onExecutionComplete(complete ->
        System.out.println("命令执行耗时: " + complete.getExecutionTimeInMillis() + "ms")
    )
    .build();

// 带处理器的命令执行
RunCommandRequest request = RunCommandRequest.builder()
    .command("for i in {1..5}; do echo \"Count $i\"; sleep 0.5; done")
    .handlers(handlers)
    .build();

sandbox.commands().run(request);
```

### 4. 全面的文件操作

管理文件和目录，包括读写、列表、删除和搜索。

```java
// 1. 写入文件
sandbox.files().write(List.of(
    WriteEntry.builder()
        .path("/tmp/hello.txt")
        .data("Hello World")
        .mode(644)
        .build()
));

// 2. 读取文件
String content = sandbox.files().readFile("/tmp/hello.txt", "UTF-8", null);
System.out.println("文件内容: " + content);

// 3. 搜索/列表文件
List<EntryInfo> files = sandbox.files().search(
    SearchEntry.builder()
        .path("/tmp")
        .pattern("*.txt")
        .build()
);
files.forEach(f -> System.out.println("找到文件: " + f.getPath()));

// 4. 删除文件
sandbox.files().deleteFiles(List.of("/tmp/hello.txt"));
```

### 5. 沙箱管理 (Sandbox Manager)

使用 `SandboxManager` 进行管理操作，如查询现有沙箱列表。

```java
SandboxManager manager = SandboxManager.builder()
    .connectionConfig(config)
    .build();

import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxState;

// ...

// 列出运行中的沙箱
PagedSandboxInfos sandboxes = manager.listSandboxInfos(
    SandboxFilter.builder()
        .states(SandboxState.RUNNING)
        .pageSize(10)
        .page(1)
        .build()
);

sandboxes.getSandboxInfos().forEach(info -> {
    System.out.println("Found sandbox: " + info.getId());
    // 执行管理操作
    manager.killSandbox(info.getId());
});

// Try-with-resources 会自动调用 manager.close()
// manager.close();
```

## 配置说明

### 1. 连接配置 (Connection Configuration)

`ConnectionConfig` 类管理与 API 服务器的连接设置。

| 参数             | 描述                         | 默认值                   | 环境变量               |
| ---------------- | ---------------------------- | ------------------------ | ---------------------- |
| `apiKey`         | 用于认证的 API Key           | 必填                     | `OPEN_SANDBOX_API_KEY` |
| `domain`         | 沙箱服务的端点域名           | 必填 (或 localhost:8080) | `OPEN_SANDBOX_DOMAIN`  |
| `protocol`       | HTTP 协议 (http/https)       | `http`                   | -                      |
| `requestTimeout` | API 请求超时时间             | 30 秒                    | -                      |
| `debug`          | 是否开启 HTTP 请求的调试日志 | `false`                  | -                      |
| `headers`        | 自定义 HTTP 请求头           | 空                       | -                      |
| `connectionPool` | 共享 OKHttp 连接池           | SDK 每实例创建            | -                      |
| `useServerProxy` | 是否通过沙箱服务代理访问 execd/endpoint（适用于客户端无法直连沙箱的场景） | `false` | -                      |

```java
// 1. 基础配置
ConnectionConfig config = ConnectionConfig.builder()
    .apiKey("your-key")
    .domain("api.opensandbox.io")
    .requestTimeout(Duration.ofSeconds(60))
    .build();

// 2. 进阶配置：共享连接池 (Shared Connection Pool)
// 如果你需要创建大量 Sandbox 实例，建议共享连接池以节省资源。
// SDK 默认连接保活时间为 30 秒。
ConnectionPool sharedPool = new ConnectionPool(50, 30, TimeUnit.SECONDS);

ConnectionConfig sharedConfig = ConnectionConfig.builder()
    .apiKey("your-key")
    .domain("api.opensandbox.io")
    .headers(Map.of(
        "X-Custom-Header", "value",
        "X-Request-ID", "trace-123"
    ))
    .connectionPool(sharedPool) // 注入共享连接池
    .build();
```

### 2. 沙箱创建配置 (Sandbox Creation Configuration)

`Sandbox.builder()` 用于配置沙箱环境。

| 参数           | 描述                   | 默认值                          |
| -------------- | ---------------------- | ------------------------------- |
| `image`        | 使用的 Docker 镜像     | 必填                            |
| `timeout`      | 自动终止的超时时间     | 10 分钟                         |
| `entrypoint`   | 容器启动入口命令       | `["tail", "-f", "/dev/null"]`   |
| `resource`     | CPU 和内存限制         | `{"cpu": "1", "memory": "2Gi"}` |
| `env`          | 环境变量               | 空                              |
| `metadata`     | 自定义元数据标签       | 空                              |
| `networkPolicy` | 可选的出站网络策略（egress） | -                         |
| `readyTimeout` | 等待沙箱就绪的最大时间 | 30 秒                           |

注意：`opensandbox.io/` 前缀下的 metadata key 属于系统保留标签，服务端会拒绝用户传入。

```java
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy;
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkRule;

Sandbox sandbox = Sandbox.builder()
    .connectionConfig(config)
    .image("python:3.11")
    .timeout(Duration.ofMinutes(30))
    .resource(map -> {
        map.put("cpu", "2");
        map.put("memory", "4Gi");
    })
    .env("PYTHONPATH", "/app")
    .metadata("project", "demo")
    .networkPolicy(
        NetworkPolicy.builder()
            .defaultAction(NetworkPolicy.DefaultAction.DENY)
            .addEgress(
                NetworkRule.builder()
                    .action(NetworkRule.Action.ALLOW)
                    .target("pypi.org")
                    .build()
            )
            .build()
    )
    .build();
```

### 3. 运行时 Egress 策略更新

运行时的 egress 查询和 patch 会直接访问沙箱内的 egress sidecar。
SDK 会先解析 `18080` 端口对应的 sandbox endpoint，再调用 sidecar 的 `/policy` API。

```java
NetworkPolicy policy = sandbox.getEgressPolicy();

sandbox.patchEgressRules(
    List.of(
        NetworkRule.builder().action(NetworkRule.Action.ALLOW).target("www.github.com").build(),
        NetworkRule.builder().action(NetworkRule.Action.DENY).target("pypi.org").build()
    )
);
```


================================================
FILE: sdks/sandbox/kotlin/build.gradle.kts
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

@file:Suppress("UnstableApiUsage")

import org.gradle.api.GradleException
import org.jetbrains.kotlin.gradle.dsl.KotlinJvmProjectExtension

fun Project.resolveVersionFromTag(expectedTagPrefix: String): String? {
    val refName = System.getenv("GITHUB_REF_NAME") ?: System.getenv("GITHUB_REF")?.removePrefix("refs/tags/")
    val fromEnv =
        refName
            ?.trim()
            ?.takeIf { it.startsWith(expectedTagPrefix) }
            ?.removePrefix(expectedTagPrefix)
            ?.trim()
            ?.takeIf { it.isNotEmpty() }
    return fromEnv
}

buildscript {
    repositories {
        mavenCentral()
        gradlePluginPortal()
    }

    dependencies {
        classpath(libs.bundles.jackson.build)
    }
}

plugins {
    alias(libs.plugins.kotlin.jvm) apply false
    alias(libs.plugins.kotlin.serialization) apply false
    alias(libs.plugins.dokka) apply false
    alias(libs.plugins.spotless)
    alias(libs.plugins.mavenPublish) apply false
}

val manualProjectVersion = project.findProperty("project.version") as String
val tagVersion =
    project.resolveVersionFromTag(
        expectedTagPrefix = "java/sandbox/v",
    )

if (tagVersion != null && tagVersion != manualProjectVersion) {
    throw GradleException(
        "Ref/tag version mismatch: expected version '$manualProjectVersion' from gradle.properties, " +
            "but got '$tagVersion' from tag 'java/sandbox/v...'. Please align the tag and project.version.",
    )
}

extra["project.version"] = manualProjectVersion

allprojects {
    group = project.findProperty("project.group") as String
    version = manualProjectVersion

    repositories {
        mavenCentral()
    }
}

configure<com.diffplug.gradle.spotless.SpotlessExtension> {
    kotlin {
        target("**/*.kt")
        targetExclude("**/build/**/*.kt", "**/bin/**/*.kt", "**/generated/**/*.kt")
        ktlint()
    }
    kotlinGradle {
        target("**/*.gradle.kts")
        ktlint()
    }
}

val kotlinJvmId = libs.plugins.kotlin.jvm.get().pluginId
val kotlinSerializationId = libs.plugins.kotlin.serialization.get().pluginId
val dokkaId = libs.plugins.dokka.get().pluginId
val mavenPublishId = libs.plugins.mavenPublish.get().pluginId

subprojects {
    apply(plugin = mavenPublishId)

    if (name != "sandbox-bom") {
        apply(plugin = kotlinJvmId)
        apply(plugin = kotlinSerializationId)
        apply(plugin = dokkaId)

        configure<KotlinJvmProjectExtension> {
            jvmToolchain(8)
            compilerOptions {
                javaParameters.set(true)
                freeCompilerArgs.add("-Xjvm-default=all")
            }
        }
    }

    // Include license file in published artifacts (jars/sources jars) for compliance and clarity.
    tasks.withType<Jar>().configureEach {
        from(rootProject.file("LICENSE")) {
            into("META-INF")
        }
    }

    configure<com.vanniktech.maven.publish.MavenPublishBaseExtension> {
        coordinates(project.group.toString(), project.name, project.version.toString())
        publishToMavenCentral()
        if (!gradle.startParameter.taskNames.any { it.contains("publishToMavenLocal") }) {
            signAllPublications()
        }
        pom {
            name.set(project.name)
            description.set("Alibaba Open Sandbox SDK")
            inceptionYear.set("2025")
            url.set("https://github.com/alibaba/OpenSandbox")
            licenses {
                license {
                    name.set("The Apache License, Version 2.0")
                    url.set("https://www.apache.org/licenses/LICENSE-2.0.txt")
                    distribution.set("repo")
                }
            }
            developers {
                developer {
                    id.set("alibaba")
                    name.set("Alibaba Group")
                    url.set("https://github.com/alibaba")
                    email.set("ninan.nn@alibaba-inc.com")
                }
            }
            scm {
                url.set("https://github.com/alibaba/OpenSandbox")
                connection.set("scm:git:https://github.com/alibaba/OpenSandbox.git")
                developerConnection.set("scm:git:ssh://git@github.com/alibaba/OpenSandbox.git")
            }
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/gradle/libs.versions.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[versions]
kotlin = "2.2.21"
kotlinx-serialization = "1.9.0"
okhttp = "4.12.0"
slf4j = "2.0.9"
junit = "5.10.1"
mockk = "1.13.8"
spotless = "6.23.3"
maven-publish = "0.35.0"
dokka = "1.9.10"
openapi-generator = "7.17.0"
jackson = "2.18.2"
junit-platform = "1.13.4"


[libraries]
# Kotlin
kotlin-stdlib = { module = "org.jetbrains.kotlin:kotlin-stdlib", version.ref = "kotlin" }

# HTTP
okhttp = { module = "com.squareup.okhttp3:okhttp", version.ref = "okhttp" }
okhttp-logging = { module = "com.squareup.okhttp3:logging-interceptor", version.ref = "okhttp" }
okhttp-mockwebserver = { module = "com.squareup.okhttp3:mockwebserver", version.ref = "okhttp" }

# Serialization
kotlinx-serialization-json = { module = "org.jetbrains.kotlinx:kotlinx-serialization-json", version.ref = "kotlinx-serialization" }

# Logging
slf4j-api = { module = "org.slf4j:slf4j-api", version.ref = "slf4j" }

# Testing
junit-jupiter = { module = "org.junit.jupiter:junit-jupiter", version.ref = "junit" }
mockk = { module = "io.mockk:mockk", version.ref = "mockk" }
junit-platform-launcher = { module = "org.junit.platform:junit-platform-launcher", version = "junit-platform" }


# Jackson(build-time)
jackson-core = { module = "com.fasterxml.jackson.core:jackson-core", version.ref = "jackson" }
jackson-databind = { module = "com.fasterxml.jackson.core:jackson-databind", version.ref = "jackson" }
jackson-yaml = { module = "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml", version.ref = "jackson" }
jackson-kotlin = { module = "com.fasterxml.jackson.module:jackson-module-kotlin", version.ref = "jackson" }

[plugins]
kotlin-jvm = { id = "org.jetbrains.kotlin.jvm", version.ref = "kotlin" }
kotlin-serialization = { id = "org.jetbrains.kotlin.plugin.serialization", version.ref = "kotlin" }
spotless = { id = "com.diffplug.spotless", version.ref = "spotless" }
mavenPublish = { id = "com.vanniktech.maven.publish", version.ref = "maven-publish" }
dokka = { id = "org.jetbrains.dokka", version.ref = "dokka" }
openapi-generator = { id = "org.openapi.generator", version.ref = "openapi-generator" }

[bundles]
serialization = ["kotlinx-serialization-json"]
testing = ["junit-jupiter", "mockk", "okhttp-mockwebserver"]
jackson-build = ["jackson-core", "jackson-databind", "jackson-yaml", "jackson-kotlin"]


================================================
FILE: sdks/sandbox/kotlin/gradle/wrapper/gradle-wrapper.properties
================================================
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-9.2.1-all.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists


================================================
FILE: sdks/sandbox/kotlin/gradle.properties
================================================
# Build optimization
org.gradle.jvmargs=-Xmx4g -XX:MaxMetaspaceSize=512m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8
org.gradle.caching=true
org.gradle.parallel=true

# Project metadata
project.group=com.alibaba.opensandbox
project.version=1.0.5
project.description=A Kotlin SDK for Open Sandbox API


================================================
FILE: sdks/sandbox/kotlin/gradlew
================================================
#!/bin/sh

#
# Copyright © 2015 the original authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
#

##############################################################################
#
#   Gradle start up script for POSIX generated by Gradle.
#
#   Important for running:
#
#   (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
#       noncompliant, but you have some other compliant shell such as ksh or
#       bash, then to run this script, type that shell name before the whole
#       command line, like:
#
#           ksh Gradle
#
#       Busybox and similar reduced shells will NOT work, because this script
#       requires all of these POSIX shell features:
#         * functions;
#         * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
#           «${var#prefix}», «${var%suffix}», and «$( cmd )»;
#         * compound commands having a testable exit status, especially «case»;
#         * various built-in commands including «command», «set», and «ulimit».
#
#   Important for patching:
#
#   (2) This script targets any POSIX shell, so it avoids extensions provided
#       by Bash, Ksh, etc; in particular arrays are avoided.
#
#       The "traditional" practice of packing multiple parameters into a
#       space-separated string is a well documented source of bugs and security
#       problems, so this is (mostly) avoided, by progressively accumulating
#       options in "$@", and eventually passing that to Java.
#
#       Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
#       and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
#       see the in-line comments for details.
#
#       There are tweaks for specific operating systems such as AIX, CygWin,
#       Darwin, MinGW, and NonStop.
#
#   (3) This script is generated from the Groovy template
#       https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
#       within the Gradle project.
#
#       You can find Gradle at https://github.com/gradle/gradle/.
#
##############################################################################

# Attempt to set APP_HOME

# Resolve links: $0 may be a link
app_path=$0

# Need this for daisy-chained symlinks.
while
    APP_HOME=${app_path%"${app_path##*/}"}  # leaves a trailing /; empty if no leading path
    [ -h "$app_path" ]
do
    ls=$( ls -ld "$app_path" )
    link=${ls#*' -> '}
    case $link in             #(
      /*)   app_path=$link ;; #(
      *)    app_path=$APP_HOME$link ;;
    esac
done

# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum

warn () {
    echo "$*"
} >&2

die () {
    echo
    echo "$*"
    echo
    exit 1
} >&2

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "$( uname )" in                #(
  CYGWIN* )         cygwin=true  ;; #(
  Darwin* )         darwin=true  ;; #(
  MSYS* | MINGW* )  msys=true    ;; #(
  NONSTOP* )        nonstop=true ;;
esac


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD=$JAVA_HOME/jre/sh/java
    else
        JAVACMD=$JAVA_HOME/bin/java
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD=java
    if ! command -v java >/dev/null 2>&1
    then
        die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
fi

# Increase the maximum file descriptors if we can.
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
    case $MAX_FD in #(
      max*)
        # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
        # shellcheck disable=SC2039,SC3045
        MAX_FD=$( ulimit -H -n ) ||
            warn "Could not query maximum file descriptor limit"
    esac
    case $MAX_FD in  #(
      '' | soft) :;; #(
      *)
        # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
        # shellcheck disable=SC2039,SC3045
        ulimit -n "$MAX_FD" ||
            warn "Could not set maximum file descriptor limit to $MAX_FD"
    esac
fi

# Collect all arguments for the java command, stacking in reverse order:
#   * args from the command line
#   * the main class name
#   * -classpath
#   * -D...appname settings
#   * --module-path (only if needed)
#   * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.

# For Cygwin or MSYS, switch paths to Windows format before running java
if "$cygwin" || "$msys" ; then
    APP_HOME=$( cygpath --path --mixed "$APP_HOME" )

    JAVACMD=$( cygpath --unix "$JAVACMD" )

    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    for arg do
        if
            case $arg in                                #(
              -*)   false ;;                            # don't mess with options #(
              /?*)  t=${arg#/} t=/${t%%/*}              # looks like a POSIX filepath
                    [ -e "$t" ] ;;                      #(
              *)    false ;;
            esac
        then
            arg=$( cygpath --path --ignore --mixed "$arg" )
        fi
        # Roll the args list around exactly as many times as the number of
        # args, so each arg winds up back in the position where it started, but
        # possibly modified.
        #
        # NB: a `for` loop captures its iteration list before it begins, so
        # changing the positional parameters here affects neither the number of
        # iterations, nor the values presented in `arg`.
        shift                   # remove old arg
        set -- "$@" "$arg"      # push replacement arg
    done
fi


# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Collect all arguments for the java command:
#   * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
#     and any embedded shellness will be escaped.
#   * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
#     treated as '${Hostname}' itself on the command line.

set -- \
        "-Dorg.gradle.appname=$APP_BASE_NAME" \
        -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \
        "$@"

# Stop when "xargs" is not available.
if ! command -v xargs >/dev/null 2>&1
then
    die "xargs is not available"
fi

# Use "xargs" to parse quoted args.
#
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
#
# In Bash we could simply go:
#
#   readarray ARGS < <( xargs -n1 <<<"$var" ) &&
#   set -- "${ARGS[@]}" "$@"
#
# but POSIX shell has neither arrays nor command substitution, so instead we
# post-process each arg (as a line of input to sed) to backslash-escape any
# character that might be a shell metacharacter, then use eval to reverse
# that process (while maintaining the separation between arguments), and wrap
# the whole thing up as a single "set" statement.
#
# This will of course break if any of these variables contains a newline or
# an unmatched quote.
#

eval "set -- $(
        printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
        xargs -n1 |
        sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
        tr '\n' ' '
    )" '"$@"'

exec "$JAVACMD" "$@"


================================================
FILE: sdks/sandbox/kotlin/sandbox/Module.md
================================================
# Module sandbox
The Open Sandbox SDK provides a comprehensive interface for creating and managing secure, isolated execution environments. Built with Kotlin and designed for both Kotlin and Java applications, it offers high-level abstractions for container-based sandboxing with advanced features like file system operations, command execution, and lifecycle management.

## Features

- **🔒 Secure Isolation**: Complete Linux OS access in isolated containers
- **📁 File System Operations**: Create, read, update, delete files and directories
- **⚡ Multi-language Execution**: Support for Python, Java, Bash, and other languages
- **🎛️ Real-time Command Execution**: Streaming output with timeout handling
- **📊 Resource Management**: CPU, memory, and storage constraints
- **🔄 Lifecycle Management**: Create, pause, resume, terminate operations
- **💚 Health Monitoring**: Automatic readiness detection and status tracking
- **🏗️ Fluent API**: Type-safe builder pattern with DSL support

## Quick Start

### Basic Usage

```kotlin
// Create a simple Python sandbox
val sandbox = Sandbox.builder()
    .image("python:3.11")
    .build()

// Write and execute code
sandbox.filesystem.writeFile("hello.py", "print('Hello, World!')")
val result = sandbox.commands.execute("python hello.py")
println(result.stdout) // Output: Hello, World!

// Clean up
sandbox.terminate()
```

### Advanced Configuration

```kotlin
val sandbox = Sandbox.builder()
    .image("myregistry.com/app:latest")
    .imageAuth("username", "password")
    .resource {
        put("cpu", "1000m")      // 1 CPU core
        put("memory", "2Gi")     // 2 GB RAM
        put("gpu", "1")          // 1 GPU device
    }
    .environment {
        put("DEBUG", "true")
        put("LOG_LEVEL", "info")
    }
    .metadata {
        put("project", "my-project")
        put("team", "backend")
    }
    .timeout(Duration.ofMinutes(30))
    .readyTimeout(Duration.ofSeconds(120))
    .build()
```

### File System Operations

```kotlin
// File operations
sandbox.filesystem.writeFile("config.json", """{"debug": true}""")
val content = sandbox.filesystem.readFile("config.json")
val exists = sandbox.filesystem.exists("config.json")

// Directory operations
sandbox.filesystem.createDirectory("workspace")
val files = sandbox.filesystem.listDirectory("workspace")

// Advanced operations
sandbox.filesystem.copy("source.txt", "backup.txt")
sandbox.filesystem.move("old.txt", "new.txt")
sandbox.filesystem.setPermissions("script.sh", "755")
```

### Command Execution

```kotlin
// Synchronous execution
val result = sandbox.commands.execute("ls -la")
println("Exit code: ${result.exitCode}")
println("Output: ${result.stdout}")

// With environment and working directory
val result = sandbox.commands.execute(
    command = "npm install",
    workingDirectory = "/app",
    environment = mapOf("NODE_ENV" to "production"),
    timeout = Duration.ofMinutes(5)
)

// Streaming execution
sandbox.commands.executeStreaming("long-running-task").collect { event ->
    when (event) {
        is StreamEvent.Stdout -> print(event.data)
        is StreamEvent.Stderr -> System.err.print(event.data)
        is StreamEvent.Completed -> println("Exit code: ${event.exitCode}")
        is StreamEvent.Error -> println("Error: ${event.message}")
    }
}
```

## Key Components

### Sandbox
The primary interface for interacting with sandbox environments. Provides methods for:
- Creating new sandbox instances with fluent configuration
- Connecting to existing sandboxes by ID
- Managing sandbox lifecycle (pause, resume, terminate)
- Accessing file system and command execution capabilities
- Health monitoring and status checking

### SandboxBuilder
A fluent builder for configuring sandbox creation with:
- Container image specification with authentication
- Resource limits (CPU, memory, GPU)
- Environment variables and metadata
- Timeout and readiness configuration
- API client configuration

### Operations Interfaces

#### FileSystemOperations
- **File Operations**: Read, write, copy, move, delete files
- **Directory Operations**: Create, list, navigate directories
- **Metadata Operations**: Get file info, set permissions, check existence
- **Batch Operations**: Replace multiple files atomically

#### CommandOperations
- **Synchronous Execution**: Run commands and wait for completion
- **Streaming Execution**: Real-time output streaming with Flow API
- **Background Execution**: Non-blocking command execution
- **Shell Scripts**: Execute multi-line shell scripts
- **Command Utilities**: Check command availability, get versions

### Domain Models
- **SandboxState**: Lifecycle states (PROVISIONING, RUNNING, PAUSED, etc.)
- **ExecutionResult**: Command execution output with exit code and timing
- **FileInfo**: File system entry information with permissions and metadata
- **Resource Maps**: Kubernetes-style resource specifications as key-value pairs
- **StreamEvent**: Real-time command output events

### Infrastructure Layer
- **ApiClientAdapter**: HTTP client management with authentication and retry logic
- **SandboxConfig**: Centralized configuration with environment variable support
- **ModelAdapter**: Translation between OpenAPI models and domain types
- **Exception Hierarchy**: Specific exceptions for different error scenarios

## Architecture

The SDK follows a clean architecture with clear separation of concerns:

```
┌─────────────────────────────────────────┐
│              Public API                 │
│         (Sandbox, SandboxBuilder)       │
├─────────────────────────────────────────┤
│            Operations Layer             │
│     (FileSystem, Command, Lifecycle)    │
├─────────────────────────────────────────┤
│           Infrastructure Layer          │
│      (API Clients, Configuration)       │
├─────────────────────────────────────────┤
│             Domain Layer                │
│        (Types, Exceptions, Models)      │
└─────────────────────────────────────────┘
```

## Java Interoperability

The SDK is fully compatible with Java applications:

```java
// Java usage example
Sandbox sandbox = Sandbox.builder()
    .image("openjdk:11")
    .resource(Map.of(
        "cpu", "1000m",
        "memory", "2Gi"
    ))
    .build();

ExecutionResult result = sandbox.getCommands().execute("java -version");
System.out.println("Java version: " + result.getStdout());

sandbox.terminate();
```

## Best Practices

### Resource Management
Always use try-with-resources or explicit cleanup:

```kotlin
// Using AutoCloseable
Sandbox.builder()
    .image("python:3.11")
    .build()
    .use { sandbox ->
        // Use sandbox - automatically terminated when exiting
        sandbox.filesystem.writeFile("script.py", "print('Hello')")
        sandbox.commands.execute("python script.py")
    }
```

### Error Handling
Handle specific exception types:

```kotlin
try {
    val sandbox = Sandbox.builder().image("python:3.11").build()
} catch (e: AuthenticationException) {
    // Handle auth errors
} catch (e: TimeoutException) {
    // Handle timeouts
} catch (e: SandboxException) {
    // Handle general sandbox errors
}
```

## Usage Examples

See the [samples](../../samples/) directory for comprehensive usage examples including:
- Basic sandbox creation and usage
- Advanced configuration scenarios
- File system operations
- Command execution patterns
- Error handling strategies


================================================
FILE: sdks/sandbox/kotlin/sandbox/build.gradle.kts
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

dependencies {
    implementation(project(":sandbox-api"))
    api(libs.kotlin.stdlib)
    api(libs.slf4j.api)

    implementation(libs.okhttp)
    implementation(libs.okhttp.logging)
    implementation(libs.bundles.serialization)

    testImplementation(libs.bundles.testing)
    testRuntimeOnly(libs.junit.platform.launcher)
}

// Configure test tasks to use JDK 17
tasks.withType<Test> {
    javaLauncher.set(
        javaToolchains.launcherFor {
            languageVersion.set(JavaLanguageVersion.of(17))
        },
    )
    useJUnitPlatform()
}

// Configure test compilation to use JDK 17
tasks.withType<org.jetbrains.kotlin.gradle.tasks.KotlinCompile> {
    if (name.contains("test", ignoreCase = true)) {
        compilerOptions {
            jvmTarget.set(org.jetbrains.kotlin.gradle.dsl.JvmTarget.JVM_17)
        }
    }
    compilerOptions {
        javaParameters.set(true)
    }
}

tasks.withType<JavaCompile> {
    if (name.contains("test", ignoreCase = true)) {
        javaCompiler.set(
            javaToolchains.compilerFor {
                languageVersion.set(JavaLanguageVersion.of(17))
            },
        )
    }
}

tasks.withType<org.jetbrains.dokka.gradle.DokkaTask>().configureEach {
    dokkaSourceSets {
        named("main") {
            moduleName.set("Sandbox")
            includes.from("Module.md")
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/HttpClientProvider.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox

import com.alibaba.opensandbox.sandbox.config.ConnectionConfig
import okhttp3.ConnectionPool
import okhttp3.Interceptor
import okhttp3.OkHttpClient
import okhttp3.Response
import okhttp3.logging.HttpLoggingInterceptor
import org.slf4j.LoggerFactory
import java.util.concurrent.TimeUnit

/**
 * Provider that manages HTTP client instances with proper configuration.
 */
class HttpClientProvider(
    val config: ConnectionConfig,
) : AutoCloseable {
    private val logger = LoggerFactory.getLogger(HttpClientProvider::class.java)

    private val defaultMaxIdleConnections = 32
    private val defaultKeepAliveDurationSeconds = 30L

    private val connectionPool =
        config.connectionPool ?: ConnectionPool(defaultMaxIdleConnections, defaultKeepAliveDurationSeconds, TimeUnit.SECONDS)

    private val connectionPoolOwnedBySdk: Boolean = config.connectionPool == null

    private val baseBuilder: OkHttpClient.Builder
        get() =
            OkHttpClient.Builder()
                .connectionPool(connectionPool)
                .addInterceptor(UserAgentInterceptor(config.userAgent))
                .addInterceptor(ExtraHeadersInterceptor(config.headers))

    // 1. Explicit lazy definition to allow checking initialization status
    private val httpClientLazy =
        lazy {
            baseBuilder
                .applyStandardTimeouts()
                .addLoggingInterceptor()
                .build()
        }

    val httpClient: OkHttpClient by httpClientLazy

    // 2. Explicit lazy definition for authenticated client
    private val authenticatedClientLazy =
        lazy {
            baseBuilder
                .applyStandardTimeouts()
                .addInterceptor(AuthenticationInterceptor(config.getApiKey())) // Add auth before logging
                .addLoggingInterceptor()
                .build()
        }

    val authenticatedClient: OkHttpClient by authenticatedClientLazy

    // 3. Explicit lazy definition for SSE client
    private val sseClientLazy =
        lazy {
            baseBuilder
                .connectTimeout(config.requestTimeout.toMillis(), TimeUnit.MILLISECONDS)
                .readTimeout(0, TimeUnit.MILLISECONDS)
                .writeTimeout(config.requestTimeout.toMillis(), TimeUnit.MILLISECONDS)
                .callTimeout(0, TimeUnit.MILLISECONDS)
                .addInterceptor(ExtraHeadersInterceptor(getSseHeaders()))
                .addLoggingInterceptor()
                .build()
        }

    val sseClient: OkHttpClient by sseClientLazy

    // --- Helper Extensions ---

    private fun OkHttpClient.Builder.applyStandardTimeouts(): OkHttpClient.Builder {
        val timeout = config.requestTimeout.toMillis()
        return this.connectTimeout(timeout, TimeUnit.MILLISECONDS)
            .readTimeout(timeout, TimeUnit.MILLISECONDS)
            .writeTimeout(timeout, TimeUnit.MILLISECONDS)
            .callTimeout(timeout, TimeUnit.MILLISECONDS)
    }

    private fun OkHttpClient.Builder.addLoggingInterceptor(): OkHttpClient.Builder {
        if (config.debug) {
            val loggingInterceptor =
                HttpLoggingInterceptor { message ->
                    logger.debug(message)
                }.apply {
                    level = HttpLoggingInterceptor.Level.HEADERS
                    // Redact sensitive headers in logs
                    redactHeader("OPEN-SANDBOX-API-KEY")
                    redactHeader("Authorization")
                }
            addInterceptor(loggingInterceptor)
        }
        return this
    }

    private fun getSseHeaders(): Map<String, String> {
        return mapOf(
            "Accept" to "text/event-stream",
            "Cache-Control" to "no-cache",
        )
    }

    // --- Interceptors ---

    private class UserAgentInterceptor(private val userAgent: String) : Interceptor {
        override fun intercept(chain: Interceptor.Chain): Response {
            return chain.proceed(
                chain.request().newBuilder()
                    .header("User-Agent", userAgent)
                    .build(),
            )
        }
    }

    private class AuthenticationInterceptor(private val apiKey: String) : Interceptor {
        override fun intercept(chain: Interceptor.Chain): Response {
            return chain.proceed(
                chain.request().newBuilder()
                    .header("OPEN-SANDBOX-API-KEY", apiKey)
                    .build(),
            )
        }
    }

    private class ExtraHeadersInterceptor(private val headers: Map<String, String>) : Interceptor {
        override fun intercept(chain: Interceptor.Chain): Response {
            if (headers.isEmpty()) return chain.proceed(chain.request())

            val builder = chain.request().newBuilder()
            headers.forEach { (name, value) ->
                builder.addHeader(name, value)
            }
            return chain.proceed(builder.build())
        }
    }

    // --- Cleanup ---

    /**
     * Closes the underlying HTTP client and releases resources.
     */
    override fun close() {
        // Now we can pass the specific backing fields to check initialization
        shutdownClientQuietly(httpClientLazy, "http client")
        shutdownClientQuietly(authenticatedClientLazy, "authenticated client")
        shutdownClientQuietly(sseClientLazy, "sse client")

        if (connectionPoolOwnedBySdk && !config.connectionPoolManagedByUser) {
            try {
                connectionPool.evictAll()
            } catch (e: Exception) {
                logger.warn("Error evicting connection pool", e)
            }
        }
    }

    private fun shutdownClientQuietly(
        lazyClient: Lazy<OkHttpClient>,
        name: String,
    ) {
        if (lazyClient.isInitialized()) {
            try {
                val client = lazyClient.value
                client.dispatcher.cancelAll()
                client.dispatcher.executorService.shutdownNow()
            } catch (e: Exception) {
                logger.warn("Error closing $name", e)
            }
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/Sandbox.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox

import com.alibaba.opensandbox.sandbox.config.ConnectionConfig
import com.alibaba.opensandbox.sandbox.domain.exceptions.InvalidArgumentException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxInternalException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxReadyTimeoutException
import com.alibaba.opensandbox.sandbox.domain.models.execd.DEFAULT_EGRESS_PORT
import com.alibaba.opensandbox.sandbox.domain.models.execd.DEFAULT_EXECD_PORT
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkRule
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxImageSpec
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxInfo
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxMetrics
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxRenewResponse
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.Volume
import com.alibaba.opensandbox.sandbox.domain.services.Commands
import com.alibaba.opensandbox.sandbox.domain.services.Egress
import com.alibaba.opensandbox.sandbox.domain.services.Filesystem
import com.alibaba.opensandbox.sandbox.domain.services.Health
import com.alibaba.opensandbox.sandbox.domain.services.Metrics
import com.alibaba.opensandbox.sandbox.domain.services.Sandboxes
import com.alibaba.opensandbox.sandbox.infrastructure.factory.AdapterFactory
import org.slf4j.LoggerFactory
import java.time.Duration
import java.time.OffsetDateTime

/**
 * Main entrypoint for the Open Sandbox SDK providing secure, isolated execution environments.
 *
 * This class provides a comprehensive interface for interacting with containerized sandbox
 * environments, combining lifecycle management with high-level operations for file system
 * access, command execution, and real-time monitoring.
 *
 * ## Key Features
 *
 * - **Secure Isolation**: Complete Linux OS access in isolated containers
 * - **File System Operations**: Create, read, update, delete files and directories
 * - **Multi-language Execution**: Support for Python, Java, Bash, and other languages
 * - **Real-time Command Execution**: Streaming output with timeout handling
 * - **Resource Management**: CPU, memory, and storage constraints
 * - **Lifecycle Management**: Create, pause, resume, terminate operations
 * - **Health Monitoring**: Automatic readiness detection and status tracking
 *
 * ## Usage Example
 *
 * ```kotlin
 * // Create and configure a sandbox
 * val sandbox = Sandbox.builder()
 *     .image("python:3.11")
 *     .resource(mapOf("cpu" to "1", "memory" to "500Mi"))
 *     .timeout(Duration.ofMinutes(30))
 *     .build()
 *
 * // Use the sandbox
 * sandbox.writeFile("script.py", "print('Hello World')")
 * val result = sandbox.execute("python script.py")
 * println(result.stdout) // Output: Hello World
 *
 * // Always clean up resources
 * sandbox.terminate()
 * ```
 *
 */
class Sandbox internal constructor(
    val id: String,
    private val sandboxService: Sandboxes,
    private val fileSystemService: Filesystem,
    private val commandService: Commands,
    private val healthService: Health,
    private val metricsService: Metrics,
    private val egressService: Egress,
    private val customHealthCheck: ((sandbox: Sandbox) -> Boolean)? = null,
    private val httpClientProvider: HttpClientProvider,
) : AutoCloseable {
    private val logger = LoggerFactory.getLogger(Sandbox::class.java)

    /**
     * Provides access to file system operations within the sandbox.
     *
     * Allows writing, reading, listing, and deleting files and directories.
     *
     * @return Service for filesystem manipulation
     */
    fun files() = fileSystemService

    /**
     * Provides access to command execution operations.
     *
     * Allows running shell commands, capturing output, and managing processes.
     *
     * @return Service for command execution
     */
    fun commands() = commandService

    /**
     * Provides access to sandbox metrics and monitoring.
     *
     * Allows retrieving resource usage statistics (CPU, memory) and other performance metrics.
     *
     * @return Service for metrics retrieval
     */
    fun metrics() = metricsService

    /**
     * Provides access to shared httpclient provider
     *
     * Allows retrieving underlying http client resources initialized with connection config
     */
    fun httpClientProvider() = httpClientProvider

    companion object {
        private val logger = LoggerFactory.getLogger(Sandbox::class.java)

        /**
         * Creates a new [Builder] for fluent sandbox configuration.
         *
         * @return A new Builder instance
         */
        @JvmStatic
        fun builder(): Builder = Builder()

        /**
         * Creates a new [Connector] for fluent sandbox configuration.
         *
         * @return A new Connector instance
         */
        @JvmStatic
        fun connector(): Connector = Connector()

        @JvmStatic
        fun resumer(): Resumer = Resumer()

        /**
         * Initialization result indicating the type of sandbox being initialized.
         */
        private sealed class InitializationResult {
            abstract val id: String

            data class NewSandbox(override val id: String) : InitializationResult()

            data class ExistingSandbox(override val id: String) : InitializationResult()
        }

        /**
         * Common initialization logic for create, connect, and resume operations.
         *
         * @param operationName Operation name for logging
         * @param connectionConfig Connection configuration
         * @param healthCheck Custom health check function
         * @param timeout Timeout for readiness check
         * @param healthCheckPollingInterval Polling interval for health check
         * @param initAction Initialization action that returns the sandbox ID and type
         * @return Fully initialized Sandbox instance
         * @throws SandboxException if initialization fails
         */
        private fun initializeSandbox(
            operationName: String,
            connectionConfig: ConnectionConfig,
            healthCheck: ((Sandbox) -> Boolean)?,
            timeout: Duration,
            healthCheckPollingInterval: Duration,
            skipHealthCheck: Boolean,
            initAction: (Sandboxes) -> InitializationResult,
        ): Sandbox {
            logger.info("Starting {} operation", operationName)

            val httpClientProvider = HttpClientProvider(connectionConfig)
            val factory = AdapterFactory(httpClientProvider)
            var initResult: InitializationResult? = null
            var sandboxService: Sandboxes? = null

            try {
                sandboxService = factory.createSandboxes()
                initResult = initAction(sandboxService)

                val sandboxId = initResult.id

                val execdEndpoint =
                    sandboxService.getSandboxEndpoint(
                        sandboxId,
                        DEFAULT_EXECD_PORT,
                        connectionConfig.useServerProxy,
                    )
                val fileSystemService = factory.createFilesystem(execdEndpoint)
                val commandService = factory.createCommands(execdEndpoint)
                val metricsService = factory.createMetrics(execdEndpoint)
                val healthService = factory.createHealth(execdEndpoint)
                val egressEndpoint =
                    sandboxService.getSandboxEndpoint(
                        sandboxId,
                        DEFAULT_EGRESS_PORT,
                        connectionConfig.useServerProxy,
                    )
                val egressService = factory.createEgress(egressEndpoint)

                val sandbox =
                    Sandbox(
                        id = sandboxId,
                        sandboxService = sandboxService,
                        fileSystemService = fileSystemService,
                        commandService = commandService,
                        metricsService = metricsService,
                        healthService = healthService,
                        egressService = egressService,
                        customHealthCheck = healthCheck,
                        httpClientProvider = httpClientProvider,
                    )

                if (!skipHealthCheck) {
                    sandbox.checkReady(timeout, healthCheckPollingInterval)
                    logger.info("{} operation completed for sandbox {}", operationName, sandboxId)
                } else {
                    logger.info(
                        "{} operation completed for sandbox {} (skipHealthCheck=true, sandbox may not be ready yet)",
                        operationName,
                        sandboxId,
                    )
                }

                return sandbox
            } catch (e: Exception) {
                if (initResult is InitializationResult.NewSandbox && sandboxService != null) {
                    try {
                        logger.warn(
                            "Sandbox creation failed during initialization. Attempting to terminate zombie sandbox: {}",
                            initResult.id,
                        )
                        sandboxService.killSandbox(initResult.id)
                    } catch (cleanupEx: Exception) {
                        logger.error("Failed to clean up sandbox {} after creation failure", initResult.id, cleanupEx)
                        e.addSuppressed(cleanupEx)
                    }
                }

                httpClientProvider.close()
                when (e) {
                    is SandboxException -> throw e
                    else -> {
                        logger.error("Unexpected exception during {}", operationName, e)
                        throw SandboxInternalException(
                            message = "Failed to $operationName: ${e.message}",
                            cause = e,
                        )
                    }
                }
            }
        }

        /**
         * Creates a sandbox instance with the provided configuration.
         *
         * @param imageSpec Container image specification
         * @param entrypoint Sandbox entrypoint command
         * @param env Environment variables (optional)
         * @param metadata Metadata for the sandbox (optional)
         * @param timeout Sandbox timeout (automatic termination time)
         * @param readyTimeout Timeout for waiting for sandbox readiness
         * @param resource Resource limits (optional)
         * @param networkPolicy Optional outbound network policy (egress)
         * @param connectionConfig Connection configuration
         * @param healthCheck Custom health check function (optional)
         * @param healthCheckPollingInterval Polling interval for readiness/health check
         * @param extensions Optional extension parameters for server-side customized behaviors
         * @param volumes Optional list of volume mounts for persistent storage
         * @return Fully configured and ready Sandbox instance
         * @throws SandboxException if sandbox creation or initialization fails
         */
        private fun create(
            imageSpec: SandboxImageSpec,
            entrypoint: List<String>,
            env: Map<String, String>,
            metadata: Map<String, String>,
            timeout: Duration?,
            readyTimeout: Duration,
            resource: Map<String, String>,
            networkPolicy: NetworkPolicy?,
            connectionConfig: ConnectionConfig,
            healthCheck: ((Sandbox) -> Boolean)? = null,
            healthCheckPollingInterval: Duration,
            extensions: Map<String, String>,
            skipHealthCheck: Boolean,
            volumes: List<Volume>?,
        ): Sandbox {
            val timeoutLabel = if (timeout != null) "${timeout.seconds}s" else "manual-cleanup"
            return initializeSandbox(
                operationName = "create sandbox with image ${imageSpec.image} (timeout: $timeoutLabel)",
                connectionConfig = connectionConfig,
                healthCheck = healthCheck,
                timeout = readyTimeout,
                healthCheckPollingInterval = healthCheckPollingInterval,
                skipHealthCheck = skipHealthCheck,
            ) { sandboxService ->
                val response =
                    sandboxService.createSandbox(
                        imageSpec,
                        entrypoint,
                        env,
                        metadata,
                        timeout,
                        resource,
                        networkPolicy,
                        extensions,
                        volumes,
                    )
                InitializationResult.NewSandbox(response.id)
            }
        }

        /**
         * Connects to an existing sandbox instance by ID.
         *
         * This method allows you to reconnect to a previously created sandbox that
         * is still running, enabling you to resume work or share sandbox access.
         *
         * @param sandboxId Unique identifier of the existing sandbox
         * @return Connected Sandbox instance
         * @throws SandboxException if connection fails
         */
        private fun connect(
            sandboxId: String,
            connectionConfig: ConnectionConfig,
            healthCheck: ((Sandbox) -> Boolean)? = null,
            connectTimeout: Duration,
            healthCheckPollingInterval: Duration,
            skipHealthCheck: Boolean,
        ): Sandbox {
            return initializeSandbox(
                operationName = "connect to sandbox $sandboxId",
                connectionConfig = connectionConfig,
                healthCheck = healthCheck,
                timeout = connectTimeout,
                healthCheckPollingInterval = healthCheckPollingInterval,
                skipHealthCheck = skipHealthCheck,
            ) { _ ->
                InitializationResult.ExistingSandbox(sandboxId)
            }
        }

        /**
         * Resumes a paused sandbox and waits until it becomes healthy.
         *
         * This method performs the following steps:
         * 1. Calls the server-side resume operation to transition the sandbox back to RUNNING.
         * 2. Re-resolves the execd endpoint (it may change across pause/resume on some backends).
         * 3. Rebuilds service adapters bound to the endpoint.
         * 4. Waits for readiness/health with polling until [resumeTimeout] elapses.
         *
         * @param sandboxId Sandbox ID to resume
         * @param connectionConfig Connection configuration
         * @param healthCheck Optional custom health check; falls back to [Sandbox.ping]
         * @param resumeTimeout Max time to wait for the sandbox to become ready after resuming
         * @param healthCheckPollingInterval Polling interval for readiness/health check
         * @return Resumed and ready Sandbox instance
         * @throws SandboxException if resume or readiness check fails
         */
        private fun resume(
            sandboxId: String,
            connectionConfig: ConnectionConfig,
            healthCheck: ((Sandbox) -> Boolean)? = null,
            resumeTimeout: Duration,
            healthCheckPollingInterval: Duration,
            skipHealthCheck: Boolean,
        ): Sandbox {
            return initializeSandbox(
                operationName = "resume sandbox $sandboxId",
                connectionConfig = connectionConfig,
                healthCheck = healthCheck,
                timeout = resumeTimeout,
                healthCheckPollingInterval = healthCheckPollingInterval,
                skipHealthCheck = skipHealthCheck,
            ) { sandboxService ->
                sandboxService.resumeSandbox(sandboxId)
                InitializationResult.ExistingSandbox(sandboxId)
            }
        }
    }

    /**
     * Gets the current status of this sandbox.
     *
     * @return Current sandbox status including state and metadata
     * @throws SandboxException if status cannot be retrieved
     */
    fun getInfo(): SandboxInfo {
        return sandboxService.getSandboxInfo(id)
    }

    /**
     * Gets the current status of this sandbox.
     *
     * @return Current sandbox status including state and metadata
     * @throws SandboxException if status cannot be retrieved
     */
    fun getEndpoint(port: Int): SandboxEndpoint {
        return sandboxService.getSandboxEndpoint(id, port, httpClientProvider.config.useServerProxy)
    }

    /**
     * Gets the current status of this sandbox.
     *
     * @return Current sandbox status including state and metadata
     */
    fun getMetrics(): SandboxMetrics {
        return metricsService.getMetrics(id)
    }

    /**
     * Renew the sandbox expiration time to delay automatic termination.
     *
     * The new expiration time will be set to the current time plus the provided duration.
     *
     * @param timeout Duration to add to the current time to set the new expiration
     * @throws SandboxException if the operation fails
     */
    fun renew(timeout: Duration): SandboxRenewResponse {
        logger.info("Renew sandbox {} timeout, estimated expiration to {}", id, OffsetDateTime.now().plus(timeout))
        return sandboxService.renewSandboxExpiration(id, OffsetDateTime.now().plus(timeout))
    }

    /**
     * Gets current egress policy for this sandbox.
     *
     * @throws SandboxException if operation fails
     */
    fun getEgressPolicy(): NetworkPolicy {
        return egressService.getPolicy()
    }

    /**
     * Patches egress rules for this sandbox using sidecar merge semantics.
     *
     * Incoming rules take priority over existing rules with the same target.
     * Existing rules for other targets remain unchanged. Within one patch payload,
     * the first rule for a target wins. The current defaultAction is preserved.
     *
     * @throws SandboxException if operation fails
     */
    fun patchEgressRules(rules: List<NetworkRule>) {
        egressService.patchRules(rules)
    }

    /**
     * Pauses the sandbox while preserving its state.
     *
     * The sandbox will transition to PAUSED state and can be resumed later.
     * All running processes will be suspended.
     *
     * @throws SandboxException if pause operation fails
     */
    fun pause() {
        logger.info("Pausing sandbox: {}", id)
        sandboxService.pauseSandbox(id)
    }

    /**
     * This method sends a termination signal to the remote sandbox instance, causing it to stop immediately.
     * This is an irreversible operation.
     *
     * Note: This method does NOT close the local `Sandbox` object resources (like connection pools).
     * You should call `close()` or use a try-with-resources block to clean up local resources.
     *
     * @throws SandboxException if termination fails
     */
    fun kill() {
        sandboxService.killSandbox(id)
    }

    /**
     * Closes this resource, relinquishing any underlying resources.
     *
     * This method closes the local HTTP client resources associated with this sandbox instance.
     * It does **NOT** terminate the remote sandbox instance. If you wish to terminate the remote
     * sandbox, call [kill] before closing.
     *
     * If this sandbox was created with a user-managed (shared) connection pool, the pool will NOT be closed.
     * If it was created with a default (dedicated) pool, the pool will be evicted and destroyed.
     */
    override fun close() {
        try {
            httpClientProvider.close()
        } catch (e: Exception) {
            logger.warn("Error closing resources", e)
        }
    }

    /**
     * Waits for the sandbox to pass a custom health check with polling.
     *
     * @param timeout Maximum time to wait for health check to pass
     * @param pollingInterval Time between health check attempts
     * @throws SandboxReadyTimeoutException if health check doesn't pass within timeout
     * @throws SandboxException if health check fails
     */
    fun checkReady(
        timeout: Duration,
        pollingInterval: Duration,
    ) {
        logger.info("Waiting for sandbox {} to pass health check (timeout: {}s)", id, timeout.seconds)

        val deadline = System.currentTimeMillis() + timeout.toMillis()
        var attempt = 0
        var lastException: Throwable? = null

        while (System.currentTimeMillis() < deadline) {
            attempt++
            logger.debug("Health check attempt #{} for sandbox {}", attempt, id)

            val isHealthy =
                try {
                    isHealthy()
                } catch (e: Exception) {
                    lastException = e
                    logger.debug("Health check attempt #{} failed with exception: {}", attempt, e.message)
                    false
                }

            if (isHealthy) {
                logger.info("Sandbox {} passed health check after {} attempts", id, attempt)
                return
            }

            if (lastException == null) {
                logger.debug("Health check attempt #{} returned false", attempt)
            }

            Thread.sleep(pollingInterval.toMillis())
        }

        val errorDetail =
            if (lastException != null) {
                "Last error: ${lastException.message}"
            } else {
                "Check returned false continuously"
            }

        val context = "domain=${httpClientProvider.config.getDomain()}, useServerProxy=${httpClientProvider.config.useServerProxy}"
        var suggestion =
            "If this sandbox runs in Docker bridge or remote-network mode, consider enabling useServerProxy=true."
        if (!httpClientProvider.config.useServerProxy) {
            suggestion += " You can also configure server-side [docker].host_ip for direct endpoint access."
        }

        val finalMessage =
            "Sandbox health check timed out after ${timeout.seconds}s ($attempt attempts). $errorDetail " +
                "Connection context: $context. $suggestion"

        logger.error(finalMessage, lastException)

        throw SandboxReadyTimeoutException(
            message = finalMessage,
        )
    }

    /**
     * Checks if the sandbox is healthy and responsive.
     *
     * @return true if sandbox is healthy, false otherwise
     */
    fun isHealthy(): Boolean {
        return customHealthCheck?.invoke(this) ?: ping()
    }

    /**
     * Ping execd
     *
     * @return `true` if execd is reachable and healthy.
     */
    fun ping(): Boolean {
        return healthService.ping(id)
    }

    /**
     * Fluent connector for connecting to existing sandbox instances.
     *
     * This class provides a type-safe, fluent interface for configuring connection
     * parameters to connect to a running sandbox instance.
     *
     * ## Basic Usage
     *
     * ```kotlin
     * val sandbox = Sandbox.connector()
     *     .sandboxId("existing-sandbox-id")
     *     .build()
     * ```
     *
     * ## Advanced Configuration
     *
     * ```kotlin
     * val sandbox = Sandbox.connector()
     *     .sandboxId("existing-sandbox-id")
     *     .apiKey("your-api-key")
     *     .domain("api.custom-domain.com/v1")
     *     .requestTimeout(Duration.ofSeconds(60))
     *     .healthCheck { sandbox -> sandbox.isHealthy() }
     *     .build()
     * ```
     */
    class Connector internal constructor() {
        /**
         * Sandbox ID to connect to
         */
        private var sandboxId: String? = null

        /**
         * Connection config
         */
        private var connectionConfig: ConnectionConfig? = null

        /**
         * Health check logic
         */
        private var healthCheck: ((Sandbox) -> Boolean)? = null

        /**
         * Max time to wait for the sandbox to become ready after connecting
         */
        private var connectTimeout: Duration = Duration.ofSeconds(30)

        /**
         * Polling interval for readiness/health check while waiting for resume
         */
        private var healthCheckPollingInterval: Duration = Duration.ofMillis(200)

        /**
         * When true, do NOT wait for sandbox readiness/health during [connect].
         *
         * Default is false (wait until ready).
         */
        private var skipHealthCheck: Boolean = false

        /**
         * Sets the sandbox ID to connect to.
         *
         * @param sandboxId ID of the existing sandbox
         * @return This connector for method chaining
         * @throws InvalidArgumentException if sandboxId is blank
         */
        fun sandboxId(sandboxId: String): Connector {
            this.sandboxId = sandboxId
            return this
        }

        fun healthCheck(healthCheck: (Sandbox) -> Boolean): Connector {
            this.healthCheck = healthCheck
            return this
        }

        fun connectionConfig(connectionConfig: ConnectionConfig): Connector {
            this.connectionConfig = connectionConfig
            return this
        }

        /**
         * Sets the max time to wait for readiness after the connect operation.
         */
        fun connectTimeout(timeout: Duration): Connector {
            this.connectTimeout = timeout
            return this
        }

        /**
         * Sets the polling interval used while waiting for readiness after connecting.
         */
        fun healthCheckPollingInterval(pollingInterval: Duration): Connector {
            this.healthCheckPollingInterval = pollingInterval
            return this
        }

        /**
         * Skip readiness/health check during [connect]. The returned sandbox may not be ready yet.
         */
        fun skipHealthCheck(skip: Boolean = true): Connector {
            this.skipHealthCheck = skip
            return this
        }

        /**
         * Connects to the existing sandbox with the configured parameters.
         *
         * This method performs the following steps:
         * 1. Validates all required configuration
         * 2. Delegates to Sandbox.connect() to connect to the sandbox
         * 3. Returns a connected Sandbox instance
         *
         * @return Connected Sandbox instance
         * @throws InvalidArgumentException if required configuration is missing or invalid
         * @throws SandboxException if sandbox connection fails
         */
        fun connect(): Sandbox {
            // Validate required configuration
            val id =
                sandboxId ?: throw InvalidArgumentException(
                    message = "Sandbox ID must be specified",
                )
            return connect(
                sandboxId = id,
                connectionConfig = connectionConfig ?: ConnectionConfig.builder().build(),
                healthCheck = healthCheck,
                connectTimeout = connectTimeout,
                healthCheckPollingInterval = healthCheckPollingInterval,
                skipHealthCheck = skipHealthCheck,
            )
        }
    }

    /**
     * Fluent builder for creating and configuring sandbox instances.
     *
     * This class provides a type-safe, fluent interface for configuring all aspects
     * of sandbox creation, from sandbox images and resource limits to environment
     * variables and lifecycle settings.
     *
     * ## Basic Usage
     *
     * ```kotlin
     * val sandbox = Sandbox.builder()
     *     .image("python:3.11")
     *     .build()
     * ```
     *
     * ## Advanced Configuration
     *
     * ```kotlin
     * val sandbox = Sandbox.builder()
     *     .image("myregistry.com/app:latest")
     *     .imageAuth("username", "password")
     *     .entrypoint("python", "-u", "app.py")
     *     .resource {
     *         put("cpu", "1000m")
     *         put("memory", "2Gi")
     *     }
     *     .env {
     *         put("LOG_LEVEL", "info")
     *     }
     *     .metadata {
     *         put("project", "my-project")
     *         put("team", "backend")
     *     }
     *     .timeout(Duration.ofMinutes(30))
     *     .readyTimeout(Duration.ofSeconds(120))
     *     .build()
     * ```
     */
    class Builder internal constructor() {
        /**
         * Image config
         */
        private var imageSpec: SandboxImageSpec? = null

        /**
         * Sandbox entrypoint
         */
        private var entrypoint: List<String> = listOf("tail", "-f", "/dev/null")

        /**
         * Resource limits config
         */
        private val resource = mutableMapOf("cpu" to "1", "memory" to "2Gi")

        /**
         * Env
         */
        private val env = mutableMapOf<String, String>()

        /**
         * Metadata
         */
        private val metadata = mutableMapOf<String, String>()

        /**
         * Optional extension parameters for server-side custom behaviors.
         *
         * This map is treated as opaque and is sent to the server as-is.
         * Prefer namespaced keys (e.g. `storage.id`) to avoid collisions.
         */
        private val extensions = mutableMapOf<String, String>()

        /**
         * Optional outbound network policy (egress).
         */
        private var networkPolicy: NetworkPolicy? = null

        /**
         * Optional list of volume mounts for persistent storage.
         */
        private val volumes = mutableListOf<Volume>()

        /**
         * Lifecycle config
         */
        private var timeout: Duration? = Duration.ofSeconds(600)
        private var readyTimeout: Duration = Duration.ofSeconds(30)
        private var healthCheckPollingInterval: Duration = Duration.ofMillis(200)
        private var healthCheck: ((Sandbox) -> Boolean)? = null

        /**
         * When true, do NOT wait for sandbox readiness/health during [build].
         *
         * Default is false (wait until ready).
         */
        private var skipHealthCheck: Boolean = false

        /**
         * Connection config
         */
        private var connectionConfig: ConnectionConfig? = null

        /**
         * Sets the sandbox image for the sandbox.
         *
         * @param image Sandbox image reference (e.g., "ubuntu:22.04", "python:3.11")
         * @return This builder for method chaining
         * @throws InvalidArgumentException if image is blank
         */
        fun image(image: String): Builder {
            if (image.isBlank()) {
                throw InvalidArgumentException(
                    message = "Image cannot be blank",
                )
            }
            this.imageSpec =
                SandboxImageSpec.builder()
                    .image(image)
                    .build()
            return this
        }

        /**
         * Sets the sandbox image specification.
         *
         * @param imageSpec Complete image specification including image and optional auth
         * @return This builder for method chaining
         */
        fun imageSpec(imageSpec: SandboxImageSpec): Builder {
            this.imageSpec = imageSpec
            return this
        }

        /**
         * Sets the entrypoint command for the sandbox.
         *
         * @param entrypoint List of command and arguments to use as entrypoint
         * @return This builder for method chaining
         */
        fun entrypoint(entrypoint: List<String>): Builder {
            this.entrypoint = entrypoint
            return this
        }

        /**
         * Sets the entrypoint command for the sandbox.
         *
         * @param entrypoint Vararg command and arguments to use as entrypoint
         * @return This builder for method chaining
         */
        fun entrypoint(vararg entrypoint: String): Builder {
            this.entrypoint = entrypoint.toList()
            return this
        }

        /**
         * Sets resource limits for the sandbox using a fluent configuration block.
         *
         * @param configure Configuration block for resource limits
         * @return This builder for method chaining
         */
        fun resource(configure: MutableMap<String, String>.() -> Unit): Builder {
            resource.configure()
            return this
        }

        /**
         * Sets resource limits for the sandbox.
         *
         * @param resource Resource limits map
         * @return This builder for method chaining
         */
        fun resource(resource: Map<String, String>): Builder {
            this.resource.clear()
            this.resource.putAll(resource)
            return this
        }

        /**
         * Adds a single environment variable.
         *
         * @param key Environment variable name
         * @param value Environment variable value
         * @return This builder for method chaining
         */
        fun env(
            key: String,
            value: String,
        ): Builder {
            if (key.isBlank()) {
                throw InvalidArgumentException(
                    message = "Environment variable key cannot be blank",
                )
            }
            env[key] = value
            return this
        }

        /**
         * Adds multiple environment variables.
         *
         * @param env Map of environment variables to add
         * @return This builder for method chaining
         */
        fun env(env: Map<String, String>): Builder {
            this.env.putAll(env)
            return this
        }

        /**
         * Configures environment variables using a fluent configuration block.
         *
         * @param configure Configuration block that receives a mutable map
         * @return This builder for method chaining
         */
        fun env(configure: MutableMap<String, String>.() -> Unit): Builder {
            env.configure()
            return this
        }

        /**
         * Adds a single metadata entry.
         *
         * @param key Metadata key
         * @param value Metadata value
         * @return This builder for method chaining
         */
        fun metadata(
            key: String,
            value: String,
        ): Builder {
            if (key.isBlank()) {
                throw InvalidArgumentException(
                    message = "Metadata key cannot be blank",
                )
            }
            metadata[key] = value
            return this
        }

        /**
         * Adds multiple metadata entries.
         *
         * @param metadata Map of metadata to add
         * @return This builder for method chaining
         */
        fun metadata(metadata: Map<String, String>): Builder {
            this.metadata.putAll(metadata)
            return this
        }

        /**
         * Configures metadata using a fluent configuration block.
         *
         * @param configure Configuration block that receives a mutable map
         * @return This builder for method chaining
         */
        fun metadata(configure: MutableMap<String, String>.() -> Unit): Builder {
            metadata.configure()
            return this
        }

        /**
         * Sets a sandbox outbound network policy (egress).
         */
        fun networkPolicy(networkPolicy: NetworkPolicy): Builder {
            this.networkPolicy = networkPolicy
            return this
        }

        /**
         * Configures a sandbox outbound network policy (egress).
         */
        fun networkPolicy(configure: NetworkPolicy.Builder.() -> Unit): Builder {
            val builder = NetworkPolicy.builder()
            builder.configure()
            this.networkPolicy = builder.build()
            return this
        }

        /**
         * Adds a single volume mount.
         *
         * @param volume Volume configuration
         * @return This builder for method chaining
         */
        fun volume(volume: Volume): Builder {
            this.volumes.add(volume)
            return this
        }

        /**
         * Adds multiple volume mounts.
         *
         * @param volumes List of volume configurations to add
         * @return This builder for method chaining
         */
        fun volumes(volumes: List<Volume>): Builder {
            this.volumes.addAll(volumes)
            return this
        }

        /**
         * Configures a volume mount using a fluent configuration block.
         *
         * @param configure Configuration block for Volume.Builder
         * @return This builder for method chaining
         */
        fun volume(configure: Volume.Builder.() -> Unit): Builder {
            val builder = Volume.builder()
            builder.configure()
            this.volumes.add(builder.build())
            return this
        }

        /**
         * Adds a single extension parameter.
         *
         * Extensions are opaque client-side and are passed through to the server.
         * Prefer stable, namespaced keys (e.g. `storage.id`).
         *
         * @throws InvalidArgumentException if [key] is blank
         */
        fun extension(
            key: String,
            value: String,
        ): Builder {
            if (key.isBlank()) {
                throw InvalidArgumentException(
                    message = "Extension key cannot be blank",
                )
            }
            extensions[key] = value
            return this
        }

        /**
         * Adds multiple extension parameters.
         *
         * Extensions are opaque client-side and are passed through to the server.
         */
        fun extensions(extensions: Map<String, String>): Builder {
            this.extensions.putAll(extensions)
            return this
        }

        /**
         * Configures extension parameters using a fluent configuration block.
         *
         * Extensions are opaque client-side and are passed through to the server.
         */
        fun extensions(configure: MutableMap<String, String>.() -> Unit): Builder {
            extensions.configure()
            return this
        }

        /**
         * Sets the sandbox timeout (automatic termination time).
         *
         * @param timeout Maximum sandbox lifetime. Pass null to require explicit cleanup.
         * @return This builder for method chaining
         * @throws InvalidArgumentException if timeout is negative or zero
         */
        fun timeout(timeout: Duration?): Builder {
            if (timeout != null && (timeout.isNegative || timeout.isZero)) {
                throw InvalidArgumentException(
                    message = "Timeout must be positive, got: $timeout",
                )
            }
            this.timeout = timeout
            return this
        }

        /**
         * Disables automatic expiration and requires explicit cleanup.
         *
         * This provides a stable Java interop entrypoint for non-expiring sandboxes.
         */
        fun manualCleanup(): Builder {
            this.timeout = null
            return this
        }

        /**
         * Sets the timeout for waiting for sandbox readiness.
         *
         * @param readyTimeout Maximum time to wait for sandbox to become ready
         * @return This builder for method chaining
         * @throws InvalidArgumentException if timeout is negative or zero
         */
        fun readyTimeout(readyTimeout: Duration): Builder {
            if (readyTimeout.isNegative || readyTimeout.isZero) {
                throw InvalidArgumentException(
                    message = "Ready timeout must be positive, got: $readyTimeout",
                )
            }
            this.readyTimeout = readyTimeout
            return this
        }

        /**
         * Sets the interval between readiness polling attempts.
         *
         * @param pollingInterval Time between readiness checks
         * @return This builder for method chaining
         * @throws InvalidArgumentException if interval is negative or zero
         */
        fun healthCheckPollingInterval(pollingInterval: Duration): Builder {
            if (pollingInterval.isNegative || pollingInterval.isZero) {
                throw InvalidArgumentException(
                    message = "Ready polling interval must be positive, got: $pollingInterval",
                )
            }
            this.healthCheckPollingInterval = pollingInterval
            return this
        }

        fun healthCheck(healthCheck: (Sandbox) -> Boolean): Builder {
            this.healthCheck = healthCheck
            return this
        }

        /**
         * Skip readiness/health check during [build]. The returned sandbox may not be ready yet.
         */
        fun skipHealthCheck(skip: Boolean = true): Builder {
            this.skipHealthCheck = skip
            return this
        }

        fun connectionConfig(connectionConfig: ConnectionConfig): Builder {
            this.connectionConfig = connectionConfig
            return this
        }

        /**
         * Creates and starts the sandbox with the configured parameters.
         *
         * This method performs the following steps:
         * 1. Validates all required configuration
         * 2. Delegates to Sandbox.create() to create the sandbox
         * 3. Returns a fully initialized Sandbox instance
         *
         * @return Fully configured and ready Sandbox instance
         * @throws InvalidArgumentException if required configuration is missing or invalid
         * @throws SandboxException if sandbox creation or initialization fails
         */
        fun build(): Sandbox {
            // Validate required configuration
            val spec =
                imageSpec ?: throw InvalidArgumentException(
                    message = "Sandbox image must be specified",
                )

            // Validate image specification
            if (spec.image.isBlank()) {
                throw InvalidArgumentException("Sandbox image cannot be blank")
            }

            return create(
                imageSpec = spec,
                entrypoint = entrypoint,
                env = env,
                metadata = metadata,
                timeout = timeout,
                readyTimeout = readyTimeout,
                resource = resource,
                networkPolicy = networkPolicy,
                extensions = extensions,
                connectionConfig = connectionConfig ?: ConnectionConfig.builder().build(),
                healthCheckPollingInterval = healthCheckPollingInterval,
                healthCheck = healthCheck,
                skipHealthCheck = skipHealthCheck,
                volumes = if (volumes.isEmpty()) null else volumes.toList(),
            )
        }
    }

    /**
     * Fluent resumer for resuming paused sandbox instances.
     *
     * This class provides a type-safe, fluent interface for configuring connection parameters
     * and readiness behavior when resuming an existing sandbox.
     *
     * ## Basic Usage
     *
     * ```kotlin
     * val sandbox = Sandbox.resumer()
     *     .sandboxId(existingSandboxId)
     *     .resume()
     * ```
     *
     * ## Advanced Configuration
     *
     * ```kotlin
     * val sandbox = Sandbox.resumer()
     *     .sandboxId(existingSandboxId)
     *     .connectionConfig(ConnectionConfig.builder().apiKey("...").build())
     *     .resumeTimeout(Duration.ofSeconds(60))
     *     .healthCheckPollingInterval(Duration.ofMillis(200))
     *     .healthCheck { it.isHealthy() }
     *     .resume()
     * ```
     */
    class Resumer internal constructor() {
        /**
         * Sandbox ID to resume
         */
        private var sandboxId: String? = null

        /**
         * Connection config
         */
        private var connectionConfig: ConnectionConfig? = null

        /**
         * Health check logic
         */
        private var healthCheck: ((Sandbox) -> Boolean)? = null

        /**
         * Max time to wait for the sandbox to become ready after resuming
         */
        private var resumeTimeout: Duration = Duration.ofSeconds(30)

        /**
         * Polling interval for readiness/health check while waiting for resume
         */
        private var healthCheckPollingInterval: Duration = Duration.ofMillis(200)

        /**
         * When true, do NOT wait for sandbox readiness/health during [resume].
         *
         * Default is false (wait until ready).
         */
        private var skipHealthCheck: Boolean = false

        /**
         * Sets the sandbox ID to resume.
         *
         * @param sandboxId ID of the paused sandbox
         * @return This resumer for method chaining
         */
        fun sandboxId(sandboxId: String): Resumer {
            this.sandboxId = sandboxId
            return this
        }

        /**
         * Sets a custom health check used by [Sandbox.checkReady] after resuming.
         *
         * If not set, [Sandbox.ping] will be used.
         */
        fun healthCheck(healthCheck: (Sandbox) -> Boolean): Resumer {
            this.healthCheck = healthCheck
            return this
        }

        /**
         * Sets the connection configuration used to talk to the Open Sandbox API.
         */
        fun connectionConfig(connectionConfig: ConnectionConfig): Resumer {
            this.connectionConfig = connectionConfig
            return this
        }

        /**
         * Sets the max time to wait for readiness after the resume operation.
         */
        fun resumeTimeout(timeout: Duration): Resumer {
            this.resumeTimeout = timeout
            return this
        }

        /**
         * Sets the polling interval used while waiting for readiness after resuming.
         */
        fun healthCheckPollingInterval(pollingInterval: Duration): Resumer {
            this.healthCheckPollingInterval = pollingInterval
            return this
        }

        /**
         * Skip readiness/health check during [resume]. The returned sandbox may not be ready yet.
         */
        fun skipHealthCheck(skip: Boolean = true): Resumer {
            this.skipHealthCheck = skip
            return this
        }

        /**
         * Resumes the sandbox with the configured parameters.
         *
         * This method validates required configuration, performs the server-side resume,
         * rebuilds service adapters, and waits for readiness.
         *
         * @return Resumed and ready Sandbox instance
         * @throws InvalidArgumentException if sandboxId is missing
         * @throws SandboxException if resume or readiness check fails
         */
        fun resume(): Sandbox {
            val id =
                sandboxId ?: throw InvalidArgumentException(
                    message = "Sandbox ID must be specified",
                )

            return resume(
                sandboxId = id,
                connectionConfig = connectionConfig ?: ConnectionConfig.builder().build(),
                healthCheck = healthCheck,
                resumeTimeout = resumeTimeout,
                healthCheckPollingInterval = healthCheckPollingInterval,
                skipHealthCheck = skipHealthCheck,
            )
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/SandboxManager.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox

import com.alibaba.opensandbox.sandbox.config.ConnectionConfig
import com.alibaba.opensandbox.sandbox.domain.exceptions.InvalidArgumentException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PagedSandboxInfos
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxFilter
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxInfo
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxRenewResponse
import com.alibaba.opensandbox.sandbox.domain.services.Sandboxes
import com.alibaba.opensandbox.sandbox.infrastructure.factory.AdapterFactory
import org.slf4j.LoggerFactory
import java.time.Duration
import java.time.OffsetDateTime

/**
 * Sandbox management interface for administrative operations and monitoring sandbox instances.
 *
 * This class provides a centralized interface for managing sandbox instances,
 * enabling administrative operations and sandbox discovery.
 * It focuses on high-level management operations rather than individual sandbox interactions.
 *
 * ## Key Features
 *
 * - **Sandbox Discovery**: List and filter sandbox instances by various criteria
 * - **Administrative Operations**: Individual sandbox management operations
 * - **Connection Pool Management**: Efficient HTTP client reuse for multiple operations
 *
 * ## Usage Example
 *
 * ```kotlin
 * val manager = SandboxManager.builder()
 *     .connectionConfig(connectionConfig)
 *     .build()
 *
 * // List all running sandboxes
 * val runningSandboxes = manager.listSandboxInfos(
 *     SandboxFilter.builder().state("RUNNING").build()
 * )
 *
 * // Individual operations
 * val sandboxId = "sandbox-id"
 * manager.getSandboxInfo(sandboxId)
 * manager.pauseSandbox(sandboxId)
 * manager.resumeSandbox(sandboxId)
 * manager.killSandbox(sandboxId)
 *
 * // Cleanup
 * manager.close()
 * ```
 *
 * **Note**: This class is designed for administrative operations.
 * For individual sandbox interactions, use the [Sandbox] class directly.
 */
class SandboxManager internal constructor(
    private val sandboxService: Sandboxes,
    private val httpClientProvider: HttpClientProvider,
) : AutoCloseable {
    private val logger = LoggerFactory.getLogger(SandboxManager::class.java)

    /**
     * Provides access to shared httpclient provider
     *
     * Allows retrieving underlying http client resources initialized with connection config
     */
    fun httpClientProvider() = httpClientProvider

    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()

        internal fun create(connectionConfig: ConnectionConfig): SandboxManager {
            val httpClientProvider = HttpClientProvider(connectionConfig)
            val factory = AdapterFactory(httpClientProvider)
            val sandboxService = factory.createSandboxes()
            return SandboxManager(sandboxService, httpClientProvider)
        }
    }

    fun listSandboxInfos(filter: SandboxFilter): PagedSandboxInfos {
        return sandboxService.listSandboxes(filter)
    }

    /**
     * Gets information for a single sandbox by its ID.
     *
     * @param sandboxId Sandbox ID to retrieve information for
     * @return SandboxInfo for the specified sandbox
     * @throws SandboxException if the operation fails
     */
    fun getSandboxInfo(sandboxId: String): SandboxInfo {
        logger.debug("Getting info for sandbox: {}", sandboxId)
        return sandboxService.getSandboxInfo(sandboxId)
    }

    /**
     * Terminates a single sandbox.
     *
     * @param sandboxId Sandbox ID to terminate
     * @throws SandboxException if the operation fails
     */
    fun killSandbox(sandboxId: String) {
        logger.info("Terminating sandbox: {}", sandboxId)
        sandboxService.killSandbox(sandboxId)
        logger.info("Successfully terminated sandbox: {}", sandboxId)
    }

    /**
     * Renew expiration time for a single sandbox.
     *
     * The new expiration time will be set to the current time plus the provided duration.
     *
     * @param sandboxId Sandbox ID to renew
     * @param timeout Duration to add to the current time to set the new expiration
     * @throws SandboxException if the operation fails
     */
    fun renewSandbox(
        sandboxId: String,
        timeout: Duration,
    ): SandboxRenewResponse {
        logger.info("Renew expiration for sandbox {} to {}", sandboxId, OffsetDateTime.now().plus(timeout))
        return sandboxService.renewSandboxExpiration(sandboxId, OffsetDateTime.now().plus(timeout))
    }

    /**
     * Pauses a single sandbox while preserving its state.
     *
     * @param sandboxId Sandbox ID to pause
     * @throws SandboxException if the operation fails
     */
    fun pauseSandbox(sandboxId: String) {
        logger.info("Pausing sandbox: {}", sandboxId)
        sandboxService.pauseSandbox(sandboxId)
    }

    /**
     * Resumes a previously paused sandbox.
     *
     * @param sandboxId Sandbox ID to resume
     * @throws SandboxException if the operation fails
     */
    fun resumeSandbox(sandboxId: String) {
        logger.info("Resuming sandbox: {}", sandboxId)
        sandboxService.resumeSandbox(sandboxId)
    }

    /**
     * Closes this resource, relinquishing any underlying resources.
     *
     * This method closes the local HTTP client resources associated with this sandbox manager instance.
     */
    override fun close() {
        try {
            httpClientProvider.close()
        } catch (e: Exception) {
            logger.warn("Error closing resources", e)
        }
    }

    class Builder internal constructor() {
        /**
         * Connection config
         */
        private var connectionConfig: ConnectionConfig? = null

        fun connectionConfig(connectionConfig: ConnectionConfig): Builder {
            this.connectionConfig = connectionConfig
            return this
        }

        /**
         * Creates the sandbox manager with the configured parameters.
         *
         * @return Fully configured SandboxManager instance
         * @throws InvalidArgumentException if required configuration is missing or invalid
         * @throws SandboxException if manager creation fails
         */
        fun build(): SandboxManager {
            return SandboxManager.create(
                connectionConfig = connectionConfig ?: ConnectionConfig.builder().build(),
            )
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/config/ConnectionConfig.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.config

import okhttp3.ConnectionPool
import java.time.Duration

/**
 * Sandbox operations connection configuration.
 */
class ConnectionConfig private constructor(
    /** API key for authentication with sandbox service */
    private val apiKey: String?,
    /** Base URL for the sandbox management API */
    private val domain: String?,
    /** Protocol to use (http/https) */
    val protocol: String,
    /** Timeout for HTTP requests to the management API */
    val requestTimeout: Duration,
    /** Enable debug logging for HTTP requests */
    val debug: Boolean = false,
    /** user agent */
    val userAgent: String = DEFAULT_USER_AGENT,
    /** User defined headers */
    val headers: Map<String, String> = mutableMapOf(),
    /** Connection pool (optional) */
    val connectionPool: ConnectionPool?,
    /** Whether the connection pool is managed by the user */
    val connectionPoolManagedByUser: Boolean,
    /**
     * Use sandbox server as proxy for process execd requests.
     * Useful when the client SDK cannot access the created sandbox directly.
     */
    val useServerProxy: Boolean = false,
) {
    companion object {
        private const val DEFAULT_DOMAIN = "localhost:8080"
        private const val DEFAULT_PROTOCOL = "http"
        private const val ENV_API_KEY = "OPEN_SANDBOX_API_KEY"
        private const val ENV_DOMAIN = "OPEN_SANDBOX_DOMAIN"

        private const val DEFAULT_USER_AGENT = "OpenSandbox-Kotlin-SDK/1.0.5"
        private const val API_VERSION = "v1"

        @JvmStatic
        fun builder(): Builder = Builder()
    }

    fun getApiKey(): String {
        return this.apiKey ?: System.getenv(ENV_API_KEY) ?: ""
    }

    fun getDomain(): String {
        return this.domain ?: System.getenv(ENV_DOMAIN) ?: DEFAULT_DOMAIN
    }

    fun getBaseUrl(): String {
        val currentDomain = getDomain()
        // Python semantics:
        // - If `domain` includes a scheme, treat it as a full base URL (without `/v1`) and append `/v1`.
        // - If `domain` does not include a scheme, build `protocol://domain/v1`.
        // Also normalize trailing slashes and avoid duplicating `/v1`.
        if (currentDomain.startsWith("http://") || currentDomain.startsWith("https://")) {
            val trimmed = currentDomain.removeSuffix("/")
            return if (trimmed.endsWith("/$API_VERSION")) trimmed else "$trimmed/$API_VERSION"
        }
        val trimmed = currentDomain.removeSuffix("/")
        return if (trimmed.endsWith(
                "/$API_VERSION",
            )
        ) {
            "$protocol://${trimmed.removeSuffix("/$API_VERSION")}/$API_VERSION"
        } else {
            "$protocol://$trimmed/$API_VERSION"
        }
    }

    /**
     * Builder for [ConnectionConfig].
     *
     * This builder is part of the public SDK surface and is intended to be used directly by end users.
     *
     * ### Defaults & environment variables
     * - If `apiKey` is not provided, the SDK will read it from environment variable `OPEN_SANDBOX_API_KEY`.
     * - If `domain` is not provided, the SDK will read it from environment variable `OPEN_SANDBOX_DOMAIN`,
     *   falling back to `localhost:8080`.
     *
     * ### Lifecycle / resource ownership
     * - If you do **not** provide a custom [ConnectionPool], the SDK creates and owns a default one
     *   per Sandbox/Manager instance. Calling `Sandbox.close()` / `SandboxManager.close()` will
     *   close SDK-owned HTTP clients and release the SDK-owned connection pool.
     * - If you **do** provide a [ConnectionPool] via [connectionPool], it is treated as user-owned
     *   and will **not** be evicted by the SDK on close.
     *
     * ### Notes
     * - `domain` may include a scheme (e.g. `https://example.com`); in that case the SDK will ignore [protocol]
     *   and append `/$API_VERSION` automatically when constructing the base URL.
     */
    class Builder internal constructor() {
        private var apiKey: String? = null

        private var domain: String? = null

        private var protocol: String = DEFAULT_PROTOCOL

        private var requestTimeout: Duration = Duration.ofSeconds(30)

        private var debug: Boolean = false

        private var headers: Map<String, String> = mutableMapOf()

        private var connectionPool: ConnectionPool? = null

        private var connectionPoolManagedByUser: Boolean = false

        private var useServerProxy: Boolean = false

        /**
         * Use sandbox server as proxy for process execd requests.
         * Useful when the client SDK cannot access the created sandbox directly.
         */
        fun useServerProxy(useServerProxy: Boolean): Builder {
            this.useServerProxy = useServerProxy
            return this
        }

        /**
         * Set the API key used for authentication.
         *
         * If not set, the SDK falls back to environment variable `OPEN_SANDBOX_API_KEY`.
         */
        fun apiKey(apiKey: String): Builder {
            require(apiKey.isNotBlank()) { "API key cannot be blank" }
            this.apiKey = apiKey
            return this
        }

        /**
         * Set the API domain (host[:port]) or a full base URL.
         *
         * Examples:
         * - `pre-agent-sandbox.alibaba-inc.com`
         * - `localhost:8080`
         * - `https://pre-agent-sandbox.alibaba-inc.com` (scheme included; [protocol] will be ignored)
         *
         * If not set, the SDK falls back to environment variable `OPEN_SANDBOX_DOMAIN`
         * and then `localhost:8080`.
         */
        fun domain(domain: String): Builder {
            require(domain.isNotBlank()) { "Domain cannot be blank" }
            this.domain = domain
            return this
        }

        /**
         * Sets the protocol
         * Defaults to "http".
         *
         * Note: if [domain] includes a scheme (starts with `http://` or `https://`),
         * the SDK will use that and ignore this value when building the base URL.
         */
        fun protocol(protocol: String): Builder {
            this.protocol = protocol.lowercase()
            return this
        }

        /**
         * Sets the request timeout used by the management API HTTP client.
         *
         * Must be a positive duration.
         */
        fun requestTimeout(requestTimeout: Duration): Builder {
            require(!requestTimeout.isNegative && !requestTimeout.isZero) {
                "Request timeout must be positive, got: $requestTimeout"
            }
            this.requestTimeout = requestTimeout
            return this
        }

        /**
         * Provide a custom OkHttp [ConnectionPool].
         *
         * Ownership semantics:
         * - When you call this method, the pool is considered user-managed, and the SDK will not
         *   evict it on close.
         */
        fun connectionPool(connectionPool: ConnectionPool): Builder {
            this.connectionPool = connectionPool
            this.connectionPoolManagedByUser = true
            return this
        }

        /**
         * Enable or disable HTTP request logging (headers).
         *
         * This is intended for local debugging. Sensitive headers will be redacted.
         */
        fun debug(enable: Boolean = true): Builder {
            this.debug = enable
            return this
        }

        /**
         * Set extra headers that will be sent with every SDK request.
         *
         * Note: authentication header is managed by the SDK; you normally should not set
         * `OPEN-SANDBOX-API-KEY` manually here.
         */
        fun headers(headers: Map<String, String>): Builder {
            this.headers = headers
            return this
        }

        /**
         * Convenience DSL for setting extra headers.
         *
         * Example:
         * ```
         * ConnectionConfig.builder()
         *   .headers {
         *     put("X-Request-ID", "trace-123")
         *   }
         *   .build()
         * ```
         */
        fun headers(configure: MutableMap<String, String>.() -> Unit): Builder {
            val map = mutableMapOf<String, String>()
            map.configure()
            this.headers = map
            return this
        }

        /**
         * Add a single extra header.
         *
         * This is equivalent to mutating [headers] and overwriting the value for the same key.
         */
        fun addHeader(
            key: String,
            value: String,
        ): Builder {
            require(key.isNotBlank()) { "Header key cannot be blank" }
            val mutableHeaders = this.headers.toMutableMap()
            mutableHeaders[key] = value
            this.headers = mutableHeaders
            return this
        }

        /**
         * Build an immutable [ConnectionConfig].
         */
        fun build(): ConnectionConfig {
            return ConnectionConfig(
                apiKey = apiKey,
                domain = domain,
                protocol = protocol,
                requestTimeout = requestTimeout,
                debug = debug,
                userAgent = DEFAULT_USER_AGENT,
                headers = headers,
                connectionPool = connectionPool,
                connectionPoolManagedByUser = connectionPoolManagedByUser,
                useServerProxy = useServerProxy,
            )
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.exceptions

/**
 * Base exception class for all sandbox-related errors.
 *
 * Inherits from [RuntimeException] (Unchecked Exception) to avoid forcing
 * Java callers to implement verbose try-catch blocks while still allowing
 * specific error handling when needed.
 */
open class SandboxException(
    message: String? = null,
    cause: Throwable? = null,
    val error: SandboxError,
    val requestId: String? = null,
) : RuntimeException(message, cause) {
    // Keep the old constructor signature for binary compatibility with already-compiled clients.
    constructor(
        message: String?,
        cause: Throwable?,
        error: SandboxError,
    ) : this(message = message, cause = cause, error = error, requestId = null)
}

/**
 * Thrown when the Sandbox API returns an error response (e.g., HTTP 4xx or 5xx) or meet unexpected error when calling api.
 */
class SandboxApiException(
    message: String? = null,
    cause: Throwable? = null,
    val statusCode: Int? = null,
    error: SandboxError = SandboxError(SandboxError.UNEXPECTED_RESPONSE),
    requestId: String? = null,
) : SandboxException(message, cause, error, requestId) {
    // Keep the old constructor signature for binary compatibility with already-compiled clients.
    constructor(
        message: String?,
        cause: Throwable?,
        statusCode: Int?,
        error: SandboxError,
    ) : this(message = message, cause = cause, statusCode = statusCode, error = error, requestId = null)
}

/**
 * Thrown when an unexpected internal error occurs within the SDK
 */
class SandboxInternalException(
    message: String? = null,
    cause: Throwable? = null,
) : SandboxException(
        message = message,
        cause = cause,
        error = SandboxError(SandboxError.INTERNAL_UNKNOWN_ERROR),
    )

/**
 * Thrown when the operation times out waiting for the sandbox to become ready.
 */
class SandboxUnhealthyException(
    message: String? = null,
    cause: Throwable? = null,
) : SandboxException(
        message = message,
        cause = cause,
        error = SandboxError(SandboxError.UNHEALTHY, message),
    )

/**
 * Thrown when the operation times out waiting for the sandbox to become ready.
 */
class SandboxReadyTimeoutException(
    message: String? = null,
    cause: Throwable? = null,
) : SandboxException(
        message = message,
        cause = cause,
        error = SandboxError(SandboxError.READY_TIMEOUT, message),
    )

/**
 * Thrown when an invalid argument is provided to an SDK method.
 * Similar to [IllegalArgumentException] but within the SDK's exception hierarchy.
 */
class InvalidArgumentException(
    message: String? = null,
    cause: Throwable? = null,
) : SandboxException(
        message = message,
        cause = cause,
        error = SandboxError(SandboxError.INVALID_ARGUMENT, message),
    )

/**
 * Defines standardized common error codes and messages for the Sandbox SDK.
 */
data class SandboxError(
    val code: String,
    val message: String? = null,
) {
    companion object {
        const val INTERNAL_UNKNOWN_ERROR = "INTERNAL_UNKNOWN_ERROR"
        const val READY_TIMEOUT = "READY_TIMEOUT"
        const val UNHEALTHY = "UNHEALTHY"
        const val INVALID_ARGUMENT = "INVALID_ARGUMENT"
        const val UNEXPECTED_RESPONSE = "UNEXPECTED_RESPONSE"
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/Constants.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.models.execd

const val DEFAULT_EXECD_PORT = 44772
const val DEFAULT_EGRESS_PORT = 18080


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/CommandModels.kt
================================================
/*
 * Copyright 2026 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.models.execd.executions

import java.time.OffsetDateTime

/**
 * Command execution status (foreground or background).
 *
 * @property id Command ID returned by run command
 * @property content Original command content
 * @property running Whether the command is still running
 * @property exitCode Exit code if the command has finished
 * @property error Error message if the command failed
 * @property startedAt Start time in RFC3339 format
 * @property finishedAt Finish time in RFC3339 format (null if still running)
 */
class CommandStatus(
    val id: String?,
    val content: String?,
    val running: Boolean?,
    val exitCode: Int?,
    val error: String?,
    val startedAt: OffsetDateTime?,
    val finishedAt: OffsetDateTime?,
)

/**
 * Background command logs with tail cursor.
 *
 * @property content Raw stdout/stderr content
 * @property cursor Latest cursor for incremental reads
 */
class CommandLogs(
    val content: String,
    val cursor: Long?,
)


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/ExecutionModels.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.models.execd.executions

/**
 * Represents a complete code execution session.
 *
 * This is the main model that tracks the entire lifecycle of code execution,
 * including results, errors, and output logs. It serves as the central container
 * for all execution-related data that is exposed to users.
 *
 * @property id Unique identifier for this execution session
 * @property executionCount Sequential execution counter for tracking execution order
 * @property result List of structured results produced by the code execution
 * @property error Error information if the execution failed
 * @property logs Container for stdout and stderr output messages
 */
class Execution(
    var id: String? = null,
    var executionCount: Long? = null,
    val result: MutableList<ExecutionResult> = mutableListOf(),
    var error: ExecutionError? = null,
    val logs: ExecutionLogs = ExecutionLogs(),
) {
    /**
     * Adds a new execution result to this execution.
     * @param result The execution result to add
     */
    fun addResult(result: ExecutionResult) {
        this.result.add(result)
    }
}

/**
 * Container for execution output logs.
 *
 * Separates standard output and error output streams for better organization
 * and allows users to process different types of output appropriately.
 *
 * @property stdout List of messages written to standard output
 * @property stderr List of messages written to standard error
 */
class ExecutionLogs(
    val stdout: MutableList<OutputMessage> = mutableListOf(),
    val stderr: MutableList<OutputMessage> = mutableListOf(),
) {
    /**
     * Adds a message to the standard output log.
     * @param outputMessage The output message to add to stdout
     */
    fun addStdout(outputMessage: OutputMessage) {
        this.stdout.add(outputMessage)
    }

    /**
     * Adds a message to the standard error log.
     * @param outputMessage The output message to add to stderr
     */
    fun addStderr(outputMessage: OutputMessage) {
        this.stderr.add(outputMessage)
    }
}

/**
 * Output message from code execution.
 *
 * Represents a single output message from either stdout or stderr streams
 * during code execution, including timing information.
 */
class OutputMessage(
    /**
     * The text content of the output message.
     * Contains the actual text that was written to the output stream.
     */
    val text: String,
    /**
     * Timestamp when this message was generated.
     * Unix timestamp in milliseconds indicating when the message was created.
     */
    val timestamp: Long,
    /**
     * Flag indicating if this is an error message.
     * True if the message came from stderr, false if from stdout.
     */
    val isError: Boolean = false,
)

/**
 * Result of code execution.
 *
 * Represents a single output result from code execution, which may include
 * text content, formatting information, and timing data.
 */
class ExecutionResult(
    /**
     * The UTF-8 encoded text content of the execution result.
     * Contains the actual output data from the executed code.
     */
    val text: String? = null,
    /**
     * Timestamp when this result was generated.
     * Unix timestamp in milliseconds indicating when the result was created.
     */
    var timestamp: Long,
    /**
     * Other result content in UTF-8 encoded format
     */
    val extraProperties: Map<String, String> = emptyMap(),
)

/**
 * Error information when code execution fails.
 *
 * Contains detailed error information following standard error reporting format,
 * including error type, message, timing, and stack trace for debugging purposes.
 *
 * @property name The error name/type (e.g., "SyntaxError", "RuntimeError", "TypeError")
 * @property value The error message or description explaining what went wrong
 * @property timestamp Unix timestamp in milliseconds when the error occurred
 * @property traceback List of traceback lines showing the complete error stack trace
 */
class ExecutionError(
    val name: String,
    val value: String,
    val timestamp: Long,
    val traceback: List<String> = emptyList(),
)

/**
 * Execution complete event.
 *
 * Represents the completion of a code execution,
 * including timing information about when the execution finished.
 */
class ExecutionComplete(
    /**
     * Timestamp when the execution completed.
     * Unix timestamp in milliseconds indicating when the execution finished.
     */
    val timestamp: Long,
    /**
     * Execution time in mills
     */
    val executionTimeInMillis: Long,
)

/**
 * Execution init event.
 *
 * Represents the initialization of a code execution.
 */
class ExecutionInit(
    /**
     * Execution id
     */
    var id: String,
    /**
     * Timestamp when the execution started.
     */
    var timestamp: Long,
)

fun interface OutputHandler<T> {
    fun handle(output: T)
}

/**
 * Handlers model for code execution output processing.
 */
class ExecutionHandlers private constructor(
    /**
     * Handler for standard output messages.
     * Called whenever text is written to stdout during execution.
     */
    val onStdout: OutputHandler<OutputMessage>? = null,
    /**
     * Handler for standard error messages.
     * Called whenever text is written to stderr during execution.
     */
    val onStderr: OutputHandler<OutputMessage>? = null,
    /**
     * Handler for execution results.
     * Called when structured results are generated from code execution.
     */
    val onResult: OutputHandler<ExecutionResult>? = null,
    /**
     * Handler for execution completion events.
     * Called when code execution finishes, regardless of success or failure.
     */
    val onExecutionComplete: OutputHandler<ExecutionComplete>? = null,
    /**
     * Handler for execution errors.
     * Called when an error occurs during code execution.
     */
    val onError: OutputHandler<ExecutionError>? = null,
    /**
     * Handler for execution initialization events.
     * Called when code execution starts.
     */
    val onInit: OutputHandler<ExecutionInit>? = null,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var onStdout: OutputHandler<OutputMessage>? = null
        private var onStderr: OutputHandler<OutputMessage>? = null
        private var onResult: OutputHandler<ExecutionResult>? = null
        private var onExecutionComplete: OutputHandler<ExecutionComplete>? = null
        private var onError: OutputHandler<ExecutionError>? = null
        private var onInit: OutputHandler<ExecutionInit>? = null

        fun onStdout(handler: OutputHandler<OutputMessage>): Builder {
            this.onStdout = handler
            return this
        }

        fun onStderr(handler: OutputHandler<OutputMessage>): Builder {
            this.onStderr = handler
            return this
        }

        fun onResult(handler: OutputHandler<ExecutionResult>): Builder {
            this.onResult = handler
            return this
        }

        fun onExecutionComplete(handler: OutputHandler<ExecutionComplete>): Builder {
            this.onExecutionComplete = handler
            return this
        }

        fun onError(handler: OutputHandler<ExecutionError>): Builder {
            this.onError = handler
            return this
        }

        fun onInit(handler: OutputHandler<ExecutionInit>): Builder {
            this.onInit = handler
            return this
        }

        fun build(): ExecutionHandlers {
            return ExecutionHandlers(
                onStdout = onStdout,
                onStderr = onStderr,
                onResult = onResult,
                onExecutionComplete = onExecutionComplete,
                onError = onError,
                onInit = onInit,
            )
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunCommandRequest.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.models.execd.executions

import kotlin.time.Duration

/**
 * Parameters for command execution.
 *
 * @property command The command content to execute
 * @property background Whether to run in background (detached)
 * @property workingDirectory Directory to execute command in
 * @property timeout Maximum execution time; server will terminate when reached.  Null means the server will not enforce any timeout.
 * @property uid Unix user ID used to run the command process
 * @property gid Unix group ID used to run the command process. Requires uid.
 * @property envs Environment variables injected into the command process
 * @property handlers Optional execution handlers
 */
class RunCommandRequest private constructor(
    val command: String,
    val background: Boolean,
    val workingDirectory: String?,
    val timeout: Duration?,
    val uid: Int?,
    val gid: Int?,
    val envs: Map<String, String>,
    val handlers: ExecutionHandlers?,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var command: String? = null
        private var background: Boolean = false
        private var workingDirectory: String? = null
        private var timeout: Duration? = null
        private var uid: Int? = null
        private var gid: Int? = null
        private val envs: MutableMap<String, String> = mutableMapOf()
        private var handlers: ExecutionHandlers? = null

        fun command(command: String): Builder {
            require(command.isNotBlank()) { "Command cannot be blank" }
            this.command = command
            return this
        }

        fun background(background: Boolean): Builder {
            this.background = background
            return this
        }

        fun workingDirectory(workingDirectory: String?): Builder {
            this.workingDirectory = workingDirectory
            return this
        }

        /**
         * Maximum execution time; server will terminate the command when reached.
         * If omitted, the server will not enforce any timeout.
         */
        fun timeout(timeout: Duration?): Builder {
            this.timeout = timeout
            return this
        }

        fun uid(uid: Int?): Builder {
            require(uid == null || uid >= 0) { "Uid must be >= 0" }
            this.uid = uid
            return this
        }

        fun gid(gid: Int?): Builder {
            require(gid == null || gid >= 0) { "Gid must be >= 0" }
            this.gid = gid
            return this
        }

        fun env(
            key: String,
            value: String,
        ): Builder {
            require(key.isNotBlank()) { "Environment variable key cannot be blank" }
            this.envs[key] = value
            return this
        }

        fun envs(envs: Map<String, String>): Builder {
            envs.keys.forEach { key ->
                require(key.isNotBlank()) { "Environment variable key cannot be blank" }
            }
            this.envs.putAll(envs)
            return this
        }

        fun handlers(handlers: ExecutionHandlers?): Builder {
            this.handlers = handlers
            return this
        }

        fun build(): RunCommandRequest {
            val commandValue = command ?: throw IllegalArgumentException("Command must be specified")
            require(gid == null || uid != null) { "Uid is required when gid is provided" }
            return RunCommandRequest(
                command = commandValue,
                background = background,
                workingDirectory = workingDirectory,
                timeout = timeout,
                uid = uid,
                gid = gid,
                envs = envs.toMap(),
                handlers = handlers,
            )
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/filesystem/FilesystemModels.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem

import java.time.OffsetDateTime

/**
 * Metadata information for a file or directory entry.
 *
 * Contains complete filesystem metadata including path, permissions, ownership,
 * size, and timestamp information for files and directories in the sandbox.
 *
 * @property path Absolute path of the file or directory
 * @property mode Unix file mode/permissions as integer (e.g., 644 for rw-r--r--)
 * @property owner Owner username of the file or directory
 * @property group Group name of the file or directory
 * @property size Size of the file in bytes (0 for directories)
 * @property modifiedAt Timestamp when the entry was last modified
 * @property createdAt Timestamp when the entry was created
 */
class EntryInfo(
    val path: String,
    val mode: Int,
    val owner: String,
    val group: String,
    val size: Long,
    val modifiedAt: OffsetDateTime,
    val createdAt: OffsetDateTime,
)

/**
 * Request to write content to a file.
 *
 * Creates or overwrites a file with the specified content, permissions, and ownership.
 * Supports both text and binary data through flexible data parameter.
 *
 * @property path Destination file path where content will be written
 * @property data Content to write - can be String or ByteArray
 * @property mode Unix file permissions as integer (default: 755)
 * @property owner Owner username to set (null to use default in sandbox)
 * @property group Group name to set (null to use default in sandbox)
 * @property encoding Character encoding for String data (default: UTF-8)
 */
class WriteEntry private constructor(
    val path: String,
    val data: Any?,
    val mode: Int,
    val owner: String?,
    val group: String?,
    val encoding: String,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var path: String? = null
        private var data: Any? = null
        private var mode: Int = 755
        private var owner: String? = null
        private var group: String? = null
        private var encoding: String = "UTF-8"

        fun path(path: String): Builder {
            require(path.isNotBlank()) { "Path cannot be blank" }
            this.path = path
            return this
        }

        fun data(data: Any): Builder {
            this.data = data
            return this
        }

        fun mode(mode: Int): Builder {
            require(mode >= 0) { "Mode must be non-negative" }
            this.mode = mode
            return this
        }

        fun owner(owner: String?): Builder {
            this.owner = owner
            return this
        }

        fun group(group: String?): Builder {
            this.group = group
            return this
        }

        fun encoding(encoding: String): Builder {
            require(encoding.isNotBlank()) { "Encoding cannot be blank" }
            this.encoding = encoding
            return this
        }

        fun build(): WriteEntry {
            return WriteEntry(
                path = path ?: throw IllegalArgumentException("Path must be specified"),
                data = data,
                mode = mode,
                owner = owner,
                group = group,
                encoding = encoding,
            )
        }
    }
}

/**
 * Request to move/rename a file or directory.
 *
 * Moves a file or directory from one location to another within the sandbox filesystem.
 * Can be used for both renaming (same directory) and moving (different directory).
 *
 * @property src Source path of the file or directory to move
 * @property dest Destination path where the file or directory should be moved
 */
class MoveEntry private constructor(
    val src: String,
    val dest: String,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var src: String? = null
        private var dest: String? = null

        fun src(src: String): Builder {
            require(src.isNotBlank()) { "Source path cannot be blank" }
            this.src = src
            return this
        }

        fun dest(dest: String): Builder {
            require(dest.isNotBlank()) { "Destination path cannot be blank" }
            this.dest = dest
            return this
        }

        fun build(): MoveEntry {
            val srcValue = src ?: throw IllegalArgumentException("Source path must be specified")
            val destValue = dest ?: throw IllegalArgumentException("Destination path must be specified")
            return MoveEntry(
                src = srcValue,
                dest = destValue,
            )
        }
    }
}

/**
 * Request to set permissions/ownership of a file or directory.
 *
 * Updates the permissions and/or ownership of an existing file or directory
 * without modifying its content. Only specified properties will be changed.
 *
 * @property path Target path of the file or directory to modify
 * @property owner New owner username (null to keep current owner)
 * @property group New group name (null to keep current group)
 * @property mode New Unix file permissions as integer (default: 755)
 */
class SetPermissionEntry private constructor(
    val path: String,
    val owner: String?,
    val group: String?,
    val mode: Int,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var path: String? = null
        private var owner: String? = null
        private var group: String? = null
        private var mode: Int = 755

        fun path(path: String): Builder {
            require(path.isNotBlank()) { "Path cannot be blank" }
            this.path = path
            return this
        }

        fun owner(owner: String?): Builder {
            this.owner = owner
            return this
        }

        fun group(group: String?): Builder {
            this.group = group
            return this
        }

        fun mode(mode: Int): Builder {
            require(mode >= 0) { "Mode must be non-negative" }
            this.mode = mode
            return this
        }

        fun build(): SetPermissionEntry {
            val pathValue = path ?: throw IllegalArgumentException("Path must be specified")
            return SetPermissionEntry(
                path = pathValue,
                owner = owner,
                group = group,
                mode = mode,
            )
        }
    }
}

/**
 * Request to replace content within a file.
 *
 * Performs string replacement within a file by finding exact matches of the old content
 * and replacing them with new content. Only affects string matches, preserving the rest.
 *
 * @property path Target file path containing content to replace
 * @property oldContent Exact string content to find and replace
 * @property newContent Replacement string content to substitute
 */
class ContentReplaceEntry private constructor(
    val path: String,
    val oldContent: String,
    val newContent: String,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var path: String? = null
        private var oldContent: String? = null
        private var newContent: String? = null

        fun path(path: String): Builder {
            require(path.isNotBlank()) { "Path cannot be blank" }
            this.path = path
            return this
        }

        fun oldContent(oldContent: String): Builder {
            this.oldContent = oldContent
            return this
        }

        fun newContent(newContent: String): Builder {
            this.newContent = newContent
            return this
        }

        fun build(): ContentReplaceEntry {
            val pathValue = path ?: throw IllegalArgumentException("Path must be specified")
            val oldContentValue = oldContent ?: throw IllegalArgumentException("Old content must be specified")
            val newContentValue = newContent ?: throw IllegalArgumentException("New content must be specified")
            return ContentReplaceEntry(
                path = pathValue,
                oldContent = oldContentValue,
                newContent = newContentValue,
            )
        }
    }
}

/**
 * Request to search for files matching a pattern.
 *
 * Searches the filesystem starting from the specified path to find files
 * that match the given pattern. Used for file discovery and filtering.
 *
 * @property path Starting directory path for the search
 * @property pattern Search pattern (supports glob patterns like *.kt, *.txt)
 */
class SearchEntry private constructor(
    val path: String,
    val pattern: String,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var path: String? = null
        private var pattern: String? = null

        fun path(path: String): Builder {
            require(path.isNotBlank()) { "Path cannot be blank" }
            this.path = path
            return this
        }

        fun pattern(pattern: String): Builder {
            require(pattern.isNotBlank()) { "Pattern cannot be blank" }
            this.pattern = pattern
            return this
        }

        fun build(): SearchEntry {
            val pathValue = path ?: throw IllegalArgumentException("Path must be specified")
            val patternValue = pattern ?: throw IllegalArgumentException("Pattern must be specified")
            return SearchEntry(
                path = pathValue,
                pattern = patternValue,
            )
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/sandboxes/SandboxModels.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.models.sandboxes

import java.time.OffsetDateTime

/**
 * High-level lifecycle state of the sandbox.
 *
 * Common state values:
 * - Pending: Sandbox is being provisioned
 * - Running: Sandbox is running and ready to accept requests
 * - Pausing: Sandbox is in the process of pausing
 * - Paused: Sandbox has been paused while retaining its state
 * - Stopping: Sandbox is being terminated
 * - Terminated: Sandbox has been successfully terminated
 * - Failed: Sandbox encountered a critical error
 *
 * State transitions:
 * - Pending → Running (after creation completes)
 * - Running → Pausing (when pause is requested)
 * - Pausing → Paused (pause operation completes)
 * - Paused → Running (when resume is requested)
 * - Running/Paused → Stopping (when kill is requested or TTL expires)
 * - Stopping → Terminated (kill/timeout operation completes)
 * - Pending/Running/Paused → Failed (on error)
 *
 * Note: New state values may be added in future versions.
 * Clients should handle unknown state values gracefully.
 */
object SandboxState {
    const val PENDING = "Pending"
    const val RUNNING = "Running"
    const val PAUSING = "Pausing"
    const val PAUSED = "Paused"
    const val STOPPING = "Stopping"
    const val TERMINATED = "Terminated"
    const val FAILED = "Failed"
    const val UNKNOWN = "Unknown"
}

/**
 * Filter criteria for listing sandboxes.
 *
 * @property states Filter by sandbox states (e.g., RUNNING, PAUSED)
 * @property metadata Filter by metadata key-value pairs
 * @property pageSize Number of items per page
 * @property page Page number (0-indexed)
 */
class SandboxFilter private constructor(
    val states: List<String>?,
    val metadata: Map<String, String>?,
    val pageSize: Int?,
    val page: Int?,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var states: List<String>? = null
        private var metadata: Map<String, String>? = null
        private var pageSize: Int? = null
        private var page: Int? = null

        fun states(states: List<String>): Builder {
            this.states = states
            return this
        }

        fun states(vararg states: String): Builder {
            this.states = states.toList()
            return this
        }

        fun metadata(metadata: Map<String, String>): Builder {
            this.metadata = metadata
            return this
        }

        fun metadata(configure: MutableMap<String, String>.() -> Unit): Builder {
            val map = mutableMapOf<String, String>()
            map.configure()
            this.metadata = map
            return this
        }

        fun pageSize(pageSize: Int): Builder {
            require(pageSize > 0) { "Page size must be positive" }
            this.pageSize = pageSize
            return this
        }

        fun page(page: Int): Builder {
            require(page > 0) { "Page must be positive" }
            this.page = page
            return this
        }

        fun build(): SandboxFilter {
            return SandboxFilter(
                states = states,
                metadata = metadata,
                pageSize = pageSize,
                page = page,
            )
        }
    }
}

/**
 * Specification for a sandbox container image.
 *
 * @property image The image reference (e.g., "ubuntu:22.04", "python:3.11")
 * @property auth Authentication credentials for private registries
 */
class SandboxImageSpec private constructor(
    val image: String,
    val auth: SandboxImageAuth?,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var image: String? = null
        private var auth: SandboxImageAuth? = null

        fun image(image: String): Builder {
            require(image.isNotBlank()) { "Image cannot be blank" }
            this.image = image
            return this
        }

        fun auth(auth: SandboxImageAuth): Builder {
            this.auth = auth
            return this
        }

        fun auth(
            username: String,
            password: String,
        ): Builder {
            this.auth =
                SandboxImageAuth.builder()
                    .username(username)
                    .password(password)
                    .build()
            return this
        }

        fun build(): SandboxImageSpec {
            val imageValue = image ?: throw IllegalArgumentException("Image must be specified")
            return SandboxImageSpec(
                image = imageValue,
                auth = auth,
            )
        }
    }
}

/**
 * Authentication credentials for container registries.
 *
 * @property username Registry username
 * @property password Registry password or access token
 */
class SandboxImageAuth private constructor(
    val username: String,
    val password: String,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var username: String? = null
        private var password: String? = null

        fun username(username: String): Builder {
            require(username.isNotBlank()) { "Username cannot be blank" }
            this.username = username
            return this
        }

        fun password(password: String): Builder {
            require(password.isNotBlank()) { "Password cannot be blank" }
            this.password = password
            return this
        }

        fun build(): SandboxImageAuth {
            val usernameValue = username ?: throw IllegalArgumentException("Username must be specified")
            val passwordValue = password ?: throw IllegalArgumentException("Password must be specified")
            return SandboxImageAuth(
                username = usernameValue,
                password = passwordValue,
            )
        }
    }
}

/**
 * Egress rule for matching network targets.
 *
 * @property action Whether to allow or deny matching targets.
 * @property target FQDN or wildcard domain (e.g., "example.com", "*.example.com")
 */
class NetworkRule private constructor(
    val action: Action,
    val target: String,
) {
    enum class Action {
        ALLOW,
        DENY,
    }

    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var action: Action? = null
        private var target: String? = null

        fun action(action: Action): Builder {
            this.action = action
            return this
        }

        fun target(target: String): Builder {
            require(target.isNotBlank()) { "Target cannot be blank" }
            this.target = target
            return this
        }

        fun build(): NetworkRule {
            val actionValue = action ?: throw IllegalArgumentException("Action must be specified")
            val targetValue = target ?: throw IllegalArgumentException("Target must be specified")
            return NetworkRule(
                action = actionValue,
                target = targetValue,
            )
        }
    }
}

/**
 * Egress network policy matching the sidecar `/policy` request body.
 *
 * @property defaultAction Default action when no egress rule matches. Defaults to "deny".
 * @property egress Egress rules evaluated in order
 */
class NetworkPolicy private constructor(
    val defaultAction: DefaultAction?,
    val egress: List<NetworkRule>?,
) {
    enum class DefaultAction {
        ALLOW,
        DENY,
    }

    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var defaultAction: DefaultAction = DefaultAction.DENY
        private val egress = mutableListOf<NetworkRule>()

        fun defaultAction(action: DefaultAction): Builder {
            this.defaultAction = action
            return this
        }

        fun addEgress(rule: NetworkRule): Builder {
            egress.add(rule)
            return this
        }

        fun egress(rules: List<NetworkRule>): Builder {
            egress.clear()
            egress.addAll(rules)
            return this
        }

        fun build(): NetworkPolicy {
            return NetworkPolicy(
                defaultAction = defaultAction,
                egress = if (egress.isEmpty()) null else egress.toList(),
            )
        }
    }
}

// ============================================================================
// Volume Models
// ============================================================================

/**
 * Host path bind mount backend.
 *
 * Maps a directory on the host filesystem into the container.
 * Only available when the runtime supports host mounts.
 *
 * @property path Absolute path on the host filesystem to mount
 */
class Host private constructor(
    val path: String,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()

        @JvmStatic
        fun of(path: String): Host = builder().path(path).build()
    }

    class Builder {
        private var path: String? = null

        fun path(path: String): Builder {
            require(path.startsWith("/")) { "Host path must be an absolute path starting with '/'" }
            this.path = path
            return this
        }

        fun build(): Host {
            val pathValue = path ?: throw IllegalArgumentException("Path must be specified")
            return Host(path = pathValue)
        }
    }
}

/**
 * Kubernetes PersistentVolumeClaim mount backend.
 *
 * References an existing PVC in the same namespace as the sandbox pod.
 * Only available in Kubernetes runtime.
 *
 * @property claimName Name of the PersistentVolumeClaim in the same namespace
 */
class PVC private constructor(
    val claimName: String,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()

        @JvmStatic
        fun of(claimName: String): PVC = builder().claimName(claimName).build()
    }

    class Builder {
        private var claimName: String? = null

        fun claimName(claimName: String): Builder {
            require(claimName.isNotBlank()) { "Claim name cannot be blank" }
            this.claimName = claimName
            return this
        }

        fun build(): PVC {
            val claimNameValue = claimName ?: throw IllegalArgumentException("Claim name must be specified")
            return PVC(claimName = claimNameValue)
        }
    }
}

/**
 * Storage mount definition for a sandbox.
 *
 * Each volume entry contains:
 * - A unique name identifier
 * - Exactly one backend (host, pvc) with backend-specific fields
 * - Common mount settings (mountPath, readOnly, subPath)
 *
 * Example usage:
 * ```kotlin
 * // Host path mount (read-write by default)
 * val volume = Volume.builder()
 *     .name("workdir")
 *     .host(Host.of("/data/opensandbox"))
 *     .mountPath("/mnt/work")
 *     .build()
 *
 * // PVC mount (read-only)
 * val volume = Volume.builder()
 *     .name("models")
 *     .pvc(PVC.of("shared-models-pvc"))
 *     .mountPath("/mnt/models")
 *     .readOnly(true)
 *     .build()
 * ```
 *
 * @property name Unique identifier for the volume within the sandbox
 * @property host Host path bind mount backend (mutually exclusive with pvc)
 * @property pvc Kubernetes PVC mount backend (mutually exclusive with host)
 * @property mountPath Absolute path inside the container where the volume is mounted
 * @property readOnly If true, the volume is mounted as read-only. Defaults to false (read-write).
 * @property subPath Optional subdirectory under the backend path to mount
 */
class Volume private constructor(
    val name: String,
    val host: Host?,
    val pvc: PVC?,
    val mountPath: String,
    val readOnly: Boolean,
    val subPath: String?,
) {
    companion object {
        @JvmStatic
        fun builder(): Builder = Builder()
    }

    class Builder {
        private var name: String? = null
        private var host: Host? = null
        private var pvc: PVC? = null
        private var mountPath: String? = null
        private var readOnly: Boolean = false
        private var subPath: String? = null

        fun name(name: String): Builder {
            require(name.isNotBlank()) { "Volume name cannot be blank" }
            this.name = name
            return this
        }

        fun host(host: Host): Builder {
            this.host = host
            return this
        }

        fun pvc(pvc: PVC): Builder {
            this.pvc = pvc
            return this
        }

        fun mountPath(mountPath: String): Builder {
            require(mountPath.startsWith("/")) { "Mount path must be an absolute path starting with '/'" }
            this.mountPath = mountPath
            return this
        }

        fun readOnly(readOnly: Boolean): Builder {
            this.readOnly = readOnly
            return this
        }

        fun subPath(subPath: String): Builder {
            this.subPath = subPath
            return this
        }

        fun build(): Volume {
            val nameValue = name ?: throw IllegalArgumentException("Name must be specified")
            val mountPathValue = mountPath ?: throw IllegalArgumentException("Mount path must be specified")

            // Validate exactly one backend is specified
            val backendsSpecified = listOfNotNull(host, pvc).size
            if (backendsSpecified == 0) {
                throw IllegalArgumentException("Exactly one backend (host, pvc) must be specified, but none was provided")
            }
            if (backendsSpecified > 1) {
                throw IllegalArgumentException("Exactly one backend (host, pvc) must be specified, but multiple were provided")
            }

            return Volume(
                name = nameValue,
                host = host,
                pvc = pvc,
                mountPath = mountPathValue,
                readOnly = readOnly,
                subPath = subPath,
            )
        }
    }
}

/**
 * Detailed information about a sandbox instance.
 *
 * @property id Unique identifier of the sandbox
 * @property status Current status of the sandbox
 * @property entrypoint Command line arguments used to start the sandbox
 * @property expiresAt Timestamp when the sandbox is scheduled for automatic termination. Null means manual cleanup mode.
 * @property createdAt Timestamp when the sandbox was created
 * @property image Image specification used to create this sandbox
 * @property metadata Custom metadata attached to the sandbox
 */
class SandboxInfo(
    val id: String,
    val status: SandboxStatus,
    val entrypoint: List<String>,
    val expiresAt: OffsetDateTime?,
    val createdAt: OffsetDateTime,
    val image: SandboxImageSpec,
    val metadata: Map<String, String>? = null,
)

/**
 * Status information for a sandbox.
 *
 * @property state Current state (e.g., RUNNING, PENDING, PAUSED, TERMINATED)
 * @property reason Short reason code for the current state
 * @property message Human-readable message explaining the status
 * @property lastTransitionAt Timestamp of the last state transition
 */
class SandboxStatus(
    val state: String,
    val reason: String?,
    val message: String?,
    val lastTransitionAt: java.time.OffsetDateTime?,
)

/**
 * Response returned when a sandbox is created.
 *
 * @property id Unique identifier of the newly created sandbox
 */
class SandboxCreateResponse(
    val id: String,
)

/**
 * Response returned when a sandbox is renewed
 *
 * @property expiresAt new expire time after renewal
 */
class SandboxRenewResponse(
    val expiresAt: java.time.OffsetDateTime,
)

/**
 * Connection endpoint information for a sandbox.
 *
 * @property endpoint Sandbox endpoint
 * @property headers Headers that must be included on every request targeting this endpoint (e.g. when the server requires them for routing or auth). Empty if not required.
 */
class SandboxEndpoint(
    val endpoint: String,
    val headers: Map<String, String> = emptyMap(),
)

/**
 * A paginated list of sandbox information.
 *
 * @property sandboxInfos List of sandbox details for the current page
 * @property pagination Pagination metadata
 */
class PagedSandboxInfos(
    val sandboxInfos: List<SandboxInfo>,
    val pagination: PaginationInfo,
)

/**
 * Pagination metadata.
 *
 * @property page Current page number (0-indexed)
 * @property pageSize Number of items per page
 * @property totalItems Total number of items across all pages
 * @property totalPages Total number of pages
 * @property hasNextPage True if there is a next page available
 */
class PaginationInfo(
    val page: Int,
    val pageSize: Int,
    val totalItems: Int,
    val totalPages: Int,
    val hasNextPage: Boolean,
)

/**
 * Real-time resource usage metrics for a sandbox.
 *
 * @property cpuCount Number of CPU cores available/allocated
 * @property cpuUsedPercentage Current CPU usage as a percentage (0.0 - 100.0)
 * @property memoryTotalInMiB Total memory available in Mebibytes
 * @property memoryUsedInMiB Memory currently used in Mebibytes
 * @property timestamp Timestamp of the metric collection (Unix epoch milliseconds)
 */
class SandboxMetrics(
    val cpuCount: Float,
    val cpuUsedPercentage: Float,
    val memoryTotalInMiB: Float,
    val memoryUsedInMiB: Float,
    val timestamp: Long,
)


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Commands.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.services

import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.CommandLogs
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.CommandStatus
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.Execution
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.RunCommandRequest

/**
 * Command execution operations for sandbox environments.
 *
 * This service provides secure command execution capabilities within sandbox
 * environments, with support for streaming output, timeout handling, and
 * session management.
 */
interface Commands {
    /**
     * Executes a shell command in the sandbox environment.
     *
     * The command can be executed in foreground (streaming) or background mode
     * based on the request configuration.
     *
     * @param request Configuration for the command execution including command text,
     *                working directory, and timeout settings
     * @return An [Execution] handle representing the running command instance
     */
    fun run(request: RunCommandRequest): Execution

    /**
     * Convenience overload for simple command execution.
     *
     * Equivalent to:
     * `run(RunCommandRequest.builder().command(command).build())`
     */
    fun run(command: String): Execution {
        return run(RunCommandRequest.builder().command(command).build())
    }

    /**
     * Interrupts and terminates a running command execution.
     *
     * This sends a termination signal (usually SIGTERM/SIGKILL) to the process
     * associated with the given execution ID.
     *
     * @param executionId Unique identifier of the execution to interrupt
     */
    fun interrupt(executionId: String)

    /**
     * Get the current running status for a command.
     *
     * @param executionId Unique identifier of the execution to query
     * @return Command status information
     */
    fun getCommandStatus(executionId: String): CommandStatus

    /**
     * Get background command logs (non-streamed).
     *
     * @param executionId Unique identifier of the execution to query
     * @param cursor Optional line cursor for incremental reads
     * @return Command logs content and tail cursor
     */
    fun getBackgroundCommandLogs(
        executionId: String,
        cursor: Long? = null,
    ): CommandLogs
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Egress.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.services

import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkRule

interface Egress {
    fun getPolicy(): NetworkPolicy

    fun patchRules(rules: List<NetworkRule>)
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Filesystem.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.services

import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.ContentReplaceEntry
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.EntryInfo
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.MoveEntry
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.SearchEntry
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.SetPermissionEntry
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.WriteEntry
import java.io.InputStream
import java.util.Collections

/**
 * Filesystem operations for sandbox environments.
 *
 * This service provides comprehensive file system management capabilities
 * within sandbox environments, including file operations, directory management,
 * and metadata handling with proper security controls.
 */
interface Filesystem {
    /**
     * Reads the content of a file as a string with specified encoding.
     *
     * @param path The absolute or relative path to the file to read
     * @param encoding Character encoding for the file content (default: UTF-8)
     * @param range HTTP byte range to read (e.g., "bytes=0-1023").
     * @return The file content as a string
     * @throws SandboxException if the operation fails
     */
    fun readFile(
        path: String,
        encoding: String = "UTF-8",
        range: String? = null,
    ): String

    /**
     * Convenience overload for reading a file as a string using UTF-8.
     *
     * Equivalent to: `readFile(path, "UTF-8", null)`
     *
     * @param path The absolute or relative path to the file to read
     * @return The file content as a UTF-8 string
     */
    fun readFile(path: String): String {
        return readFile(path, "UTF-8", null)
    }

    /**
     * Reads the content of a file as a byte array.
     *
     * @param path The absolute or relative path to the file to read
     * @param range HTTP byte range to read (e.g., "bytes=0-1023").
     * @return The file content as a byte array
     * @throws SandboxException if the operation fails
     */
    fun readByteArray(
        path: String,
        range: String? = null,
    ): ByteArray

    /**
     * Convenience overload for reading a file as a byte array.
     *
     * Equivalent to: `readByteArray(path, null)`
     *
     * @param path The absolute or relative path to the file to read
     * @return The full file content as a byte array
     */
    fun readByteArray(path: String): ByteArray {
        return readByteArray(path, null)
    }

    /**
     * Opens a file for reading as an InputStream.
     *
     * @param path The absolute or relative path to the file to read
     * @param range HTTP byte range to read (e.g., "bytes=0-1023").
     * @return An InputStream for reading the file content
     * @throws SandboxException if the operation fails
     */
    fun readStream(
        path: String,
        range: String? = null,
    ): InputStream

    /**
     * Convenience overload for opening a file stream.
     *
     * Equivalent to: `readStream(path, null)`
     *
     * @param path The absolute or relative path to the file to read
     * @return An InputStream for reading the file content
     */
    fun readStream(path: String): InputStream {
        return readStream(path, null)
    }

    /**
     * Writes content to files based on the provided write entries.
     *
     * @param entries List of WriteEntry objects specifying files to write and their content
     * @throws SandboxException if the operation fails
     */
    fun write(entries: List<WriteEntry>)

    /**
     * Writes a single file based on the provided [WriteEntry].
     */
    fun writeFile(entry: WriteEntry) {
        write(Collections.singletonList(entry))
    }

    /**
     * Convenience overload for writing a single text file with custom options.
     */
    fun writeFile(
        path: String,
        data: Any,
    ) {
        writeFile(
            WriteEntry
                .builder()
                .path(path)
                .data(data)
                .build(),
        )
    }

    /**
     * Creates directories based on the provided entries.
     *
     * @param entries List of WriteEntry objects specifying directories to create
     * @throws SandboxException if the operation fails
     */
    fun createDirectories(entries: List<WriteEntry>)

    /**
     * Deletes the specified files.
     *
     * @param paths List of file paths to delete
     * @throws SandboxException if the operation fails
     */
    fun deleteFiles(paths: List<String>)

    /**
     * Deletes the specified directories.
     *
     * @param paths List of directory paths to delete
     * @throws SandboxException if the operation fails
     */
    fun deleteDirectories(paths: List<String>)

    /**
     * Moves files from source to destination paths.
     *
     * @param entries List of MoveEntry objects specifying source and destination paths
     * @throws SandboxException if the operation fails
     */
    fun moveFiles(entries: List<MoveEntry>)

    /**
     * Sets file system permissions for the specified entries.
     *
     * @param entries List of SetPermissionEntry objects specifying files and their new permissions
     * @throws SandboxException if the operation fails
     */
    fun setPermissions(entries: List<SetPermissionEntry>)

    /**
     * Replaces content in files based on search and replace patterns.
     *
     * @param entries List of ContentReplaceEntry objects specifying replacement operations
     * @throws SandboxException if the operation fails
     */
    fun replaceContents(entries: List<ContentReplaceEntry>)

    /**
     * Searches for files and directories based on the specified criteria.
     *
     * @param entry SearchEntry object containing search parameters and criteria
     * @return List of EntryInfo objects containing metadata for matching files/directories
     * @throws SandboxException if the operation fails
     */
    fun search(entry: SearchEntry): List<EntryInfo>

    /**
     * Retrieves file information for the specified paths.
     *
     * @param paths List of file/directory paths to get information for
     * @return Map where keys are file paths and values are EntryInfo objects containing file metadata
     * @throws SandboxException if the operation fails
     */
    fun readFileInfo(paths: List<String>): Map<String, EntryInfo>
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Health.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.services

/**
 * Health monitoring operations for sandbox environments.
 */
interface Health {
    /**
     * Performs a basic health check on the specified sandbox.
     *
     * @param sandboxId Unique identifier of the target sandbox
     * @return true if sandbox is healthy and responsive, false otherwise
     */
    fun ping(sandboxId: String): Boolean
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Metrics.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.services

import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxMetrics

/**
 * Metrics collection and monitoring operations for sandbox environments.
 */
interface Metrics {
    /**
     * Retrieves current resource utilization metrics for a sandbox.
     *
     * @param sandboxId Unique identifier of the target sandbox
     * @return Current resource utilization snapshot
     */
    fun getMetrics(sandboxId: String): SandboxMetrics
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Sandboxes.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.services

import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PagedSandboxInfos
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxCreateResponse
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxFilter
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxImageSpec
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxInfo
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxRenewResponse
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.Volume
import java.time.Duration
import java.time.OffsetDateTime

/**
 * Core sandbox lifecycle management service.
 *
 * This service provides a clean abstraction over sandbox creation, management,
 * and termination operations, completely isolating business logic from API implementation details.
 */
interface Sandboxes {
    /**
     * Creates a new sandbox with the specified configuration.
     *
     * @param spec Container image specification for provisioning the sandbox
     * @param entrypoint The command to run as the sandbox's main process (e.g. `["python", "/app/main.py"]`)
     * @param env Environment variables injected into the sandbox runtime
     * @param metadata User-defined metadata used for management and filtering
     * @param timeout Sandbox lifetime. Pass null to require explicit cleanup.
     * @param resource Runtime resource limits (e.g. cpu/memory). Exact semantics are server-defined
     * @param networkPolicy Optional outbound network policy (egress)
     * @param extensions Opaque extension parameters passed through to the server as-is. Prefer namespaced keys
     * @param volumes Optional list of volume mounts for persistent storage
     * @return Sandbox creation response containing the sandbox id
     */
    fun createSandbox(
        spec: SandboxImageSpec,
        entrypoint: List<String>,
        env: Map<String, String>,
        metadata: Map<String, String>,
        timeout: Duration?,
        resource: Map<String, String>,
        networkPolicy: NetworkPolicy?,
        extensions: Map<String, String>,
        volumes: List<Volume>?,
    ): SandboxCreateResponse

    /**
     * Retrieves information about an existing sandbox.
     *
     * @param sandboxId Unique identifier of the sandbox
     * @return Current sandbox information
     */
    fun getSandboxInfo(sandboxId: String): SandboxInfo

    /**
     * Lists sandboxes with optional filtering.
     *
     * @param filter Optional filter criteria
     * @return List of sandbox information matching the filter
     */
    fun listSandboxes(filter: SandboxFilter): PagedSandboxInfos

    /**
     * Get sandbox endpoint
     *
     * @param sandboxId sandbox id
     * @param port endpoint port number
     * @return Target sandbox endpoint
     */
    fun getSandboxEndpoint(
        sandboxId: String,
        port: Int,
    ): SandboxEndpoint

    /**
     * Get sandbox endpoint
     *
     * @param sandboxId sandbox id
     * @param port endpoint port number
     * @param useServerProxy whether to use server proxy for endpoint (default false)
     * @return Target sandbox endpoint
     */
    fun getSandboxEndpoint(
        sandboxId: String,
        port: Int,
        useServerProxy: Boolean,
    ): SandboxEndpoint

    /**
     * Pauses a running sandbox, preserving its state.
     *
     * @param sandboxId Unique identifier of the sandbox
     */
    fun pauseSandbox(sandboxId: String)

    /**
     * Resumes a paused sandbox.
     *
     * @param sandboxId Unique identifier of the sandbox
     */
    fun resumeSandbox(sandboxId: String)

    /**
     * Renew the expiration time of a sandbox.
     *
     * @param sandboxId Unique identifier of the sandbox
     * @param newExpirationTime New expiration timestamp
     *
     * @return Sandbox renew response with new expire info
     */
    fun renewSandboxExpiration(
        sandboxId: String,
        newExpirationTime: OffsetDateTime,
    ): SandboxRenewResponse

    /**
     * Terminates a sandbox and releases all associated resources.
     *
     * @param sandboxId Unique identifier of the sandbox
     */
    fun killSandbox(sandboxId: String)
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/ExceptionConverter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter

import com.alibaba.opensandbox.sandbox.api.infrastructure.ClientError
import com.alibaba.opensandbox.sandbox.api.infrastructure.ClientException
import com.alibaba.opensandbox.sandbox.api.infrastructure.ServerError
import com.alibaba.opensandbox.sandbox.api.infrastructure.ServerException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxApiException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxError
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxError.Companion.UNEXPECTED_RESPONSE
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxInternalException
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.JsonElement
import kotlinx.serialization.json.decodeFromJsonElement
import kotlinx.serialization.json.encodeToJsonElement
import java.io.IOException
import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.ClientError as ExecdClientError
import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.ClientException as ExecdClientException
import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.ServerError as ExecdServerError
import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.ServerException as ExecdServerException

fun Exception.toSandboxException(): SandboxException {
    return when (this) {
        is SandboxException -> this
        is ClientException, is ServerException,
        is ExecdClientException, is ExecdServerException,
        -> this.toApiException()
        is IOException ->
            SandboxInternalException(
                message = "Network connectivity error: ${this.message}",
                cause = this,
            )
        is IllegalStateException, is IllegalArgumentException ->
            SandboxInternalException(
                message = "SDK internal usage error: ${this.message}",
                cause = this,
            )
        is UnsupportedOperationException ->
            SandboxInternalException(
                message = "Operation not supported: ${this.message}",
                cause = this,
            )
        else ->
            SandboxInternalException(
                message = "Unexpected SDK error occurred: ${this.message}",
                cause = this,
            )
    }
}

private fun Exception.toApiException(): SandboxApiException {
    val (statusCode, rawResponse) =
        when (this) {
            is ClientException -> this.statusCode to this.response
            is ServerException -> this.statusCode to this.response
            is ExecdClientException -> this.statusCode to this.response
            is ExecdServerException -> this.statusCode to this.response
            else -> 0 to null
        }

    val requestId =
        when (rawResponse) {
            is ClientError<*> -> rawResponse.headers.extractRequestId()
            is ServerError<*> -> rawResponse.headers.extractRequestId()
            is ExecdClientError<*> -> rawResponse.headers.extractRequestId()
            is ExecdServerError<*> -> rawResponse.headers.extractRequestId()
            else -> null
        }

    val errorBody =
        when (rawResponse) {
            is ClientError<*> -> rawResponse.body
            is ExecdServerError<*> -> rawResponse.body
            is ServerError<*> -> rawResponse.body
            is ExecdClientError<*> -> rawResponse.body
            else -> null
        }

    val sandboxError =
        parseSandboxError(errorBody) ?: if (errorBody is String) {
            SandboxError(UNEXPECTED_RESPONSE, errorBody)
        } else {
            SandboxError(UNEXPECTED_RESPONSE)
        }

    return SandboxApiException(
        message = this.message,
        statusCode = statusCode,
        cause = this,
        error = sandboxError,
        requestId = requestId,
    )
}

private fun Map<String, List<String>>.extractRequestId(): String? {
    return entries.firstOrNull { (key, _) ->
        key.equals("X-Request-ID", ignoreCase = true)
    }?.value?.firstOrNull()?.takeIf { it.isNotBlank() }
}

fun parseSandboxError(body: Any?): SandboxError? {
    if (body == null) return null

    return runCatching {
        val jsonElement: JsonElement =
            when (body) {
                is String -> jsonParser.parseToJsonElement(body)
                else -> jsonParser.encodeToJsonElement(body)
            }

        val generic = jsonParser.decodeFromJsonElement<GenericErrorBody>(jsonElement)

        if (!generic.code.isNullOrBlank()) {
            SandboxError(code = generic.code, message = generic.message)
        } else {
            null
        }
    }.getOrNull()
}

@Serializable
private data class GenericErrorBody(
    val code: String? = null,
    val message: String? = null,
)


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/ExecutionConverter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter

import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.CommandStatus
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.RunCommandRequest
import com.alibaba.opensandbox.sandbox.api.models.execd.CommandStatusResponse as ApiCommandStatusResponse
import com.alibaba.opensandbox.sandbox.api.models.execd.RunCommandRequest as ApiRunCommandRequest

object ExecutionConverter {
    fun RunCommandRequest.toApiRunCommandRequest(): ApiRunCommandRequest {
        return ApiRunCommandRequest(
            command = command,
            background = background,
            cwd = workingDirectory,
            timeout = timeout?.inWholeMilliseconds,
            uid = uid,
            gid = gid,
            envs = envs,
        )
    }

    fun ApiCommandStatusResponse.toCommandStatus(): CommandStatus {
        return CommandStatus(
            id = id,
            content = content,
            running = running,
            exitCode = exitCode,
            error = error,
            startedAt = startedAt,
            finishedAt = finishedAt,
        )
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/ExecutionEventDispatcher.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter

import com.alibaba.opensandbox.sandbox.api.models.execd.EventNode
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.Execution
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.ExecutionComplete
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.ExecutionError
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.ExecutionHandlers
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.ExecutionInit
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.ExecutionResult
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.OutputMessage

class ExecutionEventDispatcher(
    private val execution: Execution,
    private val handlers: ExecutionHandlers? = null,
) {
    fun dispatch(eventNode: EventNode) {
        val type = eventNode.type
        val timestamp = eventNode.timestamp
        when (type) {
            "stdout" -> handleStdout(eventNode, timestamp)
            "stderr" -> handleStderr(eventNode, timestamp)
            "result" -> handleResult(eventNode, timestamp)
            "error" -> handleError(eventNode, timestamp)
            "execution_complete" -> handleExecutionComplete(eventNode, timestamp)
            "init" -> handleInit(eventNode, timestamp)
            "execution_count" -> execution.executionCount = eventNode.executionCount
        }
    }

    private fun handleInit(
        eventNode: EventNode,
        timestamp: Long,
    ) {
        val init =
            ExecutionInit(
                id = eventNode.text ?: "",
                timestamp = timestamp,
            )
        execution.id = init.id
        handlers?.onInit?.handle(init)
    }

    private fun handleStdout(
        eventNode: EventNode,
        timestamp: Long,
    ) {
        val stdoutText = eventNode.text ?: ""
        val stdoutMessage = OutputMessage(stdoutText, timestamp, false)
        execution.logs.addStdout(stdoutMessage)
        handlers?.onStdout?.handle(stdoutMessage)
    }

    private fun handleStderr(
        eventNode: EventNode,
        timestamp: Long,
    ) {
        val stderrText = eventNode.text ?: ""
        val stderrMessage = OutputMessage(stderrText, timestamp, true)
        execution.logs.addStderr(stderrMessage)
        handlers?.onStderr?.handle(stderrMessage)
    }

    private fun handleResult(
        eventNode: EventNode,
        timestamp: Long,
    ) {
        val resultText = eventNode.results?.getText() ?: ""
        val result =
            ExecutionResult(resultText, timestamp).apply {
                this.timestamp = timestamp
            }
        execution.addResult(result)
        handlers?.onResult?.handle(result)
    }

    private fun handleError(
        eventNode: EventNode,
        timestamp: Long,
    ) {
        val errorData = eventNode.error!!
        val error =
            ExecutionError(
                name = errorData.name ?: "",
                value = errorData.value ?: "",
                traceback = errorData.traceback,
                timestamp = timestamp,
            )
        execution.error = error
        handlers?.onError?.handle(error)
    }

    private fun handleExecutionComplete(
        eventNode: EventNode,
        timestamp: Long,
    ) {
        val complete =
            ExecutionComplete(
                executionTimeInMillis = eventNode.executionTimeInMillis ?: 0L,
                timestamp = timestamp,
            )
        handlers?.onExecutionComplete?.handle(complete)
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/FilesystemConverter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter

import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.ContentReplaceEntry
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.EntryInfo
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.MoveEntry
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.SetPermissionEntry
import com.alibaba.opensandbox.sandbox.api.models.execd.FileInfo as ApiFileInfo
import com.alibaba.opensandbox.sandbox.api.models.execd.Permission as ApiPermission
import com.alibaba.opensandbox.sandbox.api.models.execd.RenameFileItem as ApiRenameFileItem
import com.alibaba.opensandbox.sandbox.api.models.execd.ReplaceFileContentItem as ApiReplaceFileContentItem

/**
 * Converter between domain models and API models for filesystem operations.
 *
 * @author ninan
 * @since 2025/12/2
 */
object FilesystemConverter {
    /**
     * Converts API FileInfo to domain EntryInfo.
     */
    fun ApiFileInfo.toEntryInfo(): EntryInfo {
        return EntryInfo(
            path = this.path,
            mode = this.mode,
            owner = this.owner,
            group = this.group,
            createdAt = this.createdAt,
            modifiedAt = this.modifiedAt,
            size = this.propertySize,
        )
    }

    /**
     * Converts domain SetPermissionEntry to API Permission.
     */
    fun SetPermissionEntry.toApiPermission(): ApiPermission {
        return ApiPermission(
            owner = this.owner,
            group = this.group,
            mode = this.mode,
        )
    }

    /**
     * Converts domain MoveEntry to API RenameFileItem.
     */
    fun MoveEntry.toApiRenameFileItem(): ApiRenameFileItem {
        return ApiRenameFileItem(
            src = this.src,
            dest = this.dest,
        )
    }

    /**
     * Converts domain ContentReplaceEntry to API ReplaceFileContentItem.
     */
    fun ContentReplaceEntry.toApiReplaceFileContentItem(): ApiReplaceFileContentItem {
        return ApiReplaceFileContentItem(
            old = this.oldContent,
            new = this.newContent,
        )
    }

    /**
     * Converts list of domain MoveEntry to list of API RenameFileItem.
     */
    fun List<MoveEntry>.toApiRenameFileItems(): List<ApiRenameFileItem> {
        return this.map { it.toApiRenameFileItem() }
    }

    /**
     * Converts list of domain SetPermissionEntry to map of path to API Permission.
     */
    fun List<SetPermissionEntry>.toApiPermissionMap(): Map<String, ApiPermission> {
        return this.associate { entry ->
            entry.path to entry.toApiPermission()
        }
    }

    /**
     * Converts list of domain ContentReplaceEntry to map of path to API ReplaceFileContentItem.
     */
    fun List<ContentReplaceEntry>.toApiReplaceFileContentMap(): Map<String, ApiReplaceFileContentItem> {
        return this.associate { entry ->
            entry.path to entry.toApiReplaceFileContentItem()
        }
    }

    /**
     * Converts map of path to API FileInfo to map of path to domain EntryInfo.
     */
    fun Map<String, ApiFileInfo>.toEntryInfoMap(): Map<String, EntryInfo> {
        return this.mapValues { (_, apiFileInfo) ->
            apiFileInfo.toEntryInfo()
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/SandboxModelConverter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter

// API Models
import com.alibaba.opensandbox.sandbox.api.models.CreateSandboxRequest
import com.alibaba.opensandbox.sandbox.api.models.CreateSandboxResponse
import com.alibaba.opensandbox.sandbox.api.models.Endpoint
import com.alibaba.opensandbox.sandbox.api.models.ImageSpec
import com.alibaba.opensandbox.sandbox.api.models.ImageSpecAuth
import com.alibaba.opensandbox.sandbox.api.models.ListSandboxesResponse
import com.alibaba.opensandbox.sandbox.api.models.RenewSandboxExpirationRequest
import com.alibaba.opensandbox.sandbox.api.models.RenewSandboxExpirationResponse
import com.alibaba.opensandbox.sandbox.api.models.execd.Metrics
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.Host
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkRule
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PVC
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PagedSandboxInfos
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PaginationInfo
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxCreateResponse
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxImageAuth
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxImageSpec
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxInfo
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxMetrics
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxRenewResponse
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.Volume
import java.time.Duration
import java.time.OffsetDateTime
import com.alibaba.opensandbox.sandbox.api.models.Host as ApiHost
import com.alibaba.opensandbox.sandbox.api.models.NetworkPolicy as ApiNetworkPolicy
import com.alibaba.opensandbox.sandbox.api.models.NetworkRule as ApiNetworkRule
import com.alibaba.opensandbox.sandbox.api.models.PVC as ApiPVC
import com.alibaba.opensandbox.sandbox.api.models.PaginationInfo as ApiPaginationInfo
import com.alibaba.opensandbox.sandbox.api.models.Sandbox as ApiSandbox
import com.alibaba.opensandbox.sandbox.api.models.SandboxStatus as ApiSandboxStatus
import com.alibaba.opensandbox.sandbox.api.models.Volume as ApiVolume
import com.alibaba.opensandbox.sandbox.api.models.egress.NetworkPolicy as ApiEgressNetworkPolicy
import com.alibaba.opensandbox.sandbox.api.models.egress.NetworkRule as ApiEgressNetworkRule
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxStatus as DomainSandboxStatus

internal object SandboxModelConverter {
    /**
     * Converts Domain ImageSpec -> API ImageSpec
     */
    fun SandboxImageSpec.toApiImageSpec(): ImageSpec {
        return ImageSpec(
            uri = this.image,
            auth =
                this.auth?.let {
                    ImageSpecAuth(
                        username = it.username,
                        password = it.password,
                    )
                },
        )
    }

    /**
     * Converts Time -> API renew Request
     */
    fun OffsetDateTime.toApiRenewRequest(): RenewSandboxExpirationRequest {
        return RenewSandboxExpirationRequest(
            expiresAt = this,
        )
    }

    /**
     * Converts Domain NetworkPolicy -> API NetworkPolicy
     */
    fun NetworkPolicy.toApiNetworkPolicy(): ApiNetworkPolicy {
        val apiDefaultAction =
            defaultAction?.let { action ->
                when (action) {
                    NetworkPolicy.DefaultAction.ALLOW -> ApiNetworkPolicy.DefaultAction.allow
                    NetworkPolicy.DefaultAction.DENY -> ApiNetworkPolicy.DefaultAction.deny
                }
            }
        val apiEgress =
            egress?.map { rule ->
                ApiNetworkRule(
                    action =
                        when (rule.action) {
                            NetworkRule.Action.ALLOW -> ApiNetworkRule.Action.allow
                            NetworkRule.Action.DENY -> ApiNetworkRule.Action.deny
                        },
                    target = rule.target,
                )
            }
        return ApiNetworkPolicy(
            defaultAction = apiDefaultAction,
            egress = apiEgress,
        )
    }

    fun NetworkRule.toApiNetworkRule(): ApiNetworkRule {
        val action =
            when (this.action) {
                NetworkRule.Action.ALLOW -> ApiNetworkRule.Action.allow
                NetworkRule.Action.DENY -> ApiNetworkRule.Action.deny
            }
        return ApiNetworkRule(action = action, target = this.target)
    }

    fun NetworkRule.toApiEgressNetworkRule(): ApiEgressNetworkRule {
        val action =
            when (this.action) {
                NetworkRule.Action.ALLOW -> ApiEgressNetworkRule.Action.allow
                NetworkRule.Action.DENY -> ApiEgressNetworkRule.Action.deny
            }
        return ApiEgressNetworkRule(action = action, target = this.target)
    }

    fun ApiNetworkRule.toDomainNetworkRule(): NetworkRule {
        val action =
            when (this.action) {
                ApiNetworkRule.Action.allow -> NetworkRule.Action.ALLOW
                ApiNetworkRule.Action.deny -> NetworkRule.Action.DENY
            }
        return NetworkRule
            .builder()
            .action(action)
            .target(this.target)
            .build()
    }

    fun ApiNetworkPolicy.toDomainNetworkPolicy(): NetworkPolicy {
        val defaultAction =
            when (this.defaultAction) {
                ApiNetworkPolicy.DefaultAction.allow -> NetworkPolicy.DefaultAction.ALLOW
                ApiNetworkPolicy.DefaultAction.deny, null -> NetworkPolicy.DefaultAction.DENY
            }
        return NetworkPolicy
            .builder()
            .defaultAction(defaultAction)
            .egress(this.egress?.map { it.toDomainNetworkRule() } ?: emptyList())
            .build()
    }

    fun ApiEgressNetworkRule.toDomainEgressNetworkRule(): NetworkRule {
        val action =
            when (this.action) {
                ApiEgressNetworkRule.Action.allow -> NetworkRule.Action.ALLOW
                ApiEgressNetworkRule.Action.deny -> NetworkRule.Action.DENY
            }
        return NetworkRule
            .builder()
            .action(action)
            .target(this.target)
            .build()
    }

    fun ApiEgressNetworkPolicy.toDomainEgressNetworkPolicy(): NetworkPolicy {
        val defaultAction =
            when (this.defaultAction) {
                ApiEgressNetworkPolicy.DefaultAction.allow -> NetworkPolicy.DefaultAction.ALLOW
                ApiEgressNetworkPolicy.DefaultAction.deny, null -> NetworkPolicy.DefaultAction.DENY
            }
        return NetworkPolicy
            .builder()
            .defaultAction(defaultAction)
            .egress(this.egress?.map { it.toDomainEgressNetworkRule() } ?: emptyList())
            .build()
    }

    /**
     * Converts Domain Host -> API Host
     */
    fun Host.toApiHost(): ApiHost {
        return ApiHost(path = this.path)
    }

    /**
     * Converts Domain PVC -> API PVC
     */
    fun PVC.toApiPVC(): ApiPVC {
        return ApiPVC(claimName = this.claimName)
    }

    /**
     * Converts Domain Volume -> API Volume
     */
    fun Volume.toApiVolume(): ApiVolume {
        return ApiVolume(
            name = this.name,
            mountPath = this.mountPath,
            readOnly = this.readOnly,
            host = this.host?.toApiHost(),
            pvc = this.pvc?.toApiPVC(),
            subPath = this.subPath,
        )
    }

    fun toApiCreateSandboxRequest(
        spec: SandboxImageSpec,
        entrypoint: List<String>,
        env: Map<String, String>,
        metadata: Map<String, String>,
        timeout: Duration?,
        resource: Map<String, String>,
        networkPolicy: NetworkPolicy?,
        extensions: Map<String, String>,
        volumes: List<Volume>?,
    ): CreateSandboxRequest {
        return CreateSandboxRequest(
            image = spec.toApiImageSpec(),
            entrypoint = entrypoint,
            timeout = timeout?.seconds?.toInt(),
            env = env,
            metadata = metadata,
            resourceLimits = resource,
            networkPolicy = networkPolicy?.toApiNetworkPolicy(),
            extensions = extensions,
            volumes = volumes?.map { it.toApiVolume() },
        )
    }

    /**
     * API Sandbox -> Domain SandboxInfo
     */
    fun ApiSandbox.toSandboxInfo(): SandboxInfo {
        return SandboxInfo(
            id = this.id,
            entrypoint = this.entrypoint,
            expiresAt = this.expiresAt,
            createdAt = this.createdAt,
            image = this.image.toImageSpec(),
            status = this.status.toSandboxStatus(),
            metadata = metadata,
        )
    }

    /**
     * API ImageSpec -> Domain ImageSpec
     */
    fun ImageSpec.toImageSpec(): SandboxImageSpec {
        val builder =
            SandboxImageSpec.builder()
                .image(uri)

        auth?.let { authInfo ->
            val sandboxAuth =
                SandboxImageAuth.builder()
                    .username(authInfo.username.orEmpty())
                    .password(authInfo.password.orEmpty())
                    .build()
            builder.auth(sandboxAuth)
        }

        return builder.build()
    }

    /**
     * API Status -> Domain Status
     */
    fun ApiSandboxStatus.toSandboxStatus(): DomainSandboxStatus {
        return DomainSandboxStatus(
            state = this.state,
            reason = this.reason,
            message = this.message,
            lastTransitionAt = this.lastTransitionAt,
        )
    }

    /**
     * API Endpoint -> Domain Endpoint
     */
    fun Endpoint.toSandboxEndpoint(): SandboxEndpoint {
        return SandboxEndpoint(this.endpoint, this.headers ?: emptyMap())
    }

    /**
     * API Create Response -> Domain Create Response
     */
    fun CreateSandboxResponse.toSandboxCreateResponse(): SandboxCreateResponse {
        return SandboxCreateResponse(
            id = this.id,
        )
    }

    fun ApiPaginationInfo.toPaginationInfo(): PaginationInfo {
        return PaginationInfo(
            page = this.page,
            pageSize = this.pageSize,
            totalItems = this.totalItems,
            totalPages = this.totalPages,
            hasNextPage = this.hasNextPage,
        )
    }

    /**
     * API List Response -> Domain Paged Infos
     */
    fun ListSandboxesResponse.toPagedSandboxInfos(): PagedSandboxInfos {
        return PagedSandboxInfos(
            items.map { it.toSandboxInfo() },
            pagination.toPaginationInfo(),
        )
    }

    fun Metrics.toSandboxMetrics(): SandboxMetrics {
        return SandboxMetrics(
            cpuCount = this.cpuCount,
            cpuUsedPercentage = cpuUsedPct,
            memoryTotalInMiB = memTotalMib,
            memoryUsedInMiB = memUsedMib,
            timestamp = this.timestamp,
        )
    }

    fun RenewSandboxExpirationResponse.toSandboxRenewResponse(): SandboxRenewResponse {
        return SandboxRenewResponse(
            expiresAt = this.expiresAt,
        )
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/Serializer.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter

import kotlinx.serialization.KSerializer
import kotlinx.serialization.descriptors.elementNames
import kotlinx.serialization.encoding.Decoder
import kotlinx.serialization.encoding.Encoder
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.JsonDecoder
import kotlinx.serialization.json.JsonElement
import kotlinx.serialization.json.JsonEncoder
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.jsonObject

val jsonParser =
    Json {
        ignoreUnknownKeys = true
        isLenient = true
        encodeDefaults = true
        coerceInputValues = true
    }

abstract class AbstractUnknownPropertiesSerializer<T>(
    private val delegate: KSerializer<T>,
) : KSerializer<T> {
    override val descriptor = delegate.descriptor

    abstract fun T.withUnknownProperties(unknown: Map<String, JsonElement>): T

    abstract fun T.getUnknownProperties(): Map<String, JsonElement>

    override fun deserialize(decoder: Decoder): T {
        require(decoder is JsonDecoder)

        val jsonObject = decoder.decodeJsonElement().jsonObject

        val knownKeys = delegate.descriptor.elementNames.toSet()

        val unknownProperties = jsonObject.filterKeys { it !in knownKeys }

        val cleanJsonObject = JsonObject(jsonObject.filterKeys { it in knownKeys })
        val standardObject = decoder.json.decodeFromJsonElement(delegate, cleanJsonObject)

        return standardObject.withUnknownProperties(unknownProperties)
    }

    override fun serialize(
        encoder: Encoder,
        value: T,
    ) {
        require(encoder is JsonEncoder)

        val standardJsonElement = encoder.json.encodeToJsonElement(delegate, value)
        val standardJsonObject = standardJsonElement.jsonObject

        val unknownProperties = value.getUnknownProperties()

        val mergedMap = standardJsonObject.toMutableMap()
        mergedMap.putAll(unknownProperties)

        encoder.encodeJsonElement(JsonObject(mergedMap))
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/CommandsAdapter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.service

import com.alibaba.opensandbox.sandbox.HttpClientProvider
import com.alibaba.opensandbox.sandbox.api.execd.CommandApi
import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.ClientError
import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.ClientException
import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.ResponseType
import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.ServerError
import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.ServerException
import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.Success
import com.alibaba.opensandbox.sandbox.api.models.execd.EventNode
import com.alibaba.opensandbox.sandbox.domain.exceptions.InvalidArgumentException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxApiException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxError
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxError.Companion.UNEXPECTED_RESPONSE
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.CommandLogs
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.CommandStatus
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.Execution
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.RunCommandRequest
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import com.alibaba.opensandbox.sandbox.domain.services.Commands
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.ExecutionConverter.toApiRunCommandRequest
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.ExecutionConverter.toCommandStatus
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.ExecutionEventDispatcher
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.jsonParser
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.parseSandboxError
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.toSandboxException
import okhttp3.Headers.Companion.toHeaders
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.Request
import okhttp3.RequestBody.Companion.toRequestBody
import org.slf4j.LoggerFactory

/**
 * Implementation of [Commands] that adapts OpenAPI-generated [CommandApi].
 *
 * This adapter handles command execution within sandboxes, providing both
 * synchronous and streaming execution modes with proper session management.
 */
internal class CommandsAdapter(
    private val httpClientProvider: HttpClientProvider,
    private val execdEndpoint: SandboxEndpoint,
) : Commands {
    companion object {
        private const val RUN_COMMAND_PATH = "/command"
    }

    private val logger = LoggerFactory.getLogger(CommandsAdapter::class.java)
    private val api =
        CommandApi(
            "${httpClientProvider.config.protocol}://${execdEndpoint.endpoint}",
            httpClientProvider.httpClient.newBuilder()
                .addInterceptor { chain ->
                    val requestBuilder = chain.request().newBuilder()
                    execdEndpoint.headers.forEach { (key, value) ->
                        requestBuilder.header(key, value)
                    }
                    chain.proceed(requestBuilder.build())
                }
                .build(),
        )

    override fun run(request: RunCommandRequest): Execution {
        if (request.command.isEmpty()) {
            throw InvalidArgumentException("Command cannot be empty")
        }
        try {
            val httpRequest =
                Request.Builder()
                    .url("${httpClientProvider.config.protocol}://${execdEndpoint.endpoint}$RUN_COMMAND_PATH")
                    .post(
                        jsonParser.encodeToString(request.toApiRunCommandRequest()).toRequestBody("application/json".toMediaType()),
                    )
                    .headers(execdEndpoint.headers.toHeaders())
                    .build()

            val execution = Execution()

            httpClientProvider.sseClient.newCall(httpRequest).execute().use { response ->
                if (!response.isSuccessful) {
                    val errorBodyString = response.body?.string()
                    val sandboxError = parseSandboxError(errorBodyString)
                    val message = "Failed to run commands. Status code: ${response.code}, Body: $errorBodyString"
                    throw SandboxApiException(
                        message = message,
                        statusCode = response.code,
                        error = sandboxError ?: SandboxError(UNEXPECTED_RESPONSE),
                        requestId = response.header("X-Request-ID"),
                    )
                }

                response.body?.byteStream()?.bufferedReader(Charsets.UTF_8)?.use { reader ->
                    val dispatcher = ExecutionEventDispatcher(execution, request.handlers)
                    reader.lineSequence()
                        .filter(String::isNotBlank)
                        .forEach { line ->
                            try {
                                val eventNode = jsonParser.decodeFromString<EventNode>(line)
                                dispatcher.dispatch(eventNode)
                            } catch (e: Exception) {
                                logger.error("Failed to parse SSE line: {}", line, e)
                            }
                        }
                }
            }
            return execution
        } catch (e: Exception) {
            logger.error("Failed to run command (length: {})", request.command.length, e)
            throw e.toSandboxException()
        }
    }

    override fun interrupt(executionId: String) {
        try {
            api.interruptCommand(executionId)
        } catch (e: Exception) {
            logger.error("Failed to interrupt command", e)
            throw e.toSandboxException()
        }
    }

    override fun getCommandStatus(executionId: String): CommandStatus {
        return try {
            val status = api.getCommandStatus(executionId)
            status.toCommandStatus()
        } catch (e: Exception) {
            logger.error("Failed to get command status", e)
            throw e.toSandboxException()
        }
    }

    override fun getBackgroundCommandLogs(
        executionId: String,
        cursor: Long?,
    ): CommandLogs {
        return try {
            val localVarResponse = api.getBackgroundCommandLogsWithHttpInfo(executionId, cursor)
            val content =
                when (localVarResponse.responseType) {
                    ResponseType.Success -> (localVarResponse as Success<*>).data as String
                    ResponseType.Informational ->
                        throw UnsupportedOperationException("Client does not support Informational responses.")
                    ResponseType.Redirection ->
                        throw UnsupportedOperationException("Client does not support Redirection responses.")
                    ResponseType.ClientError -> {
                        val localVarError = localVarResponse as ClientError<*>
                        throw ClientException(
                            "Client error : ${localVarError.statusCode} ${localVarError.message.orEmpty()}",
                            localVarError.statusCode,
                            localVarResponse,
                        )
                    }
                    ResponseType.ServerError -> {
                        val localVarError = localVarResponse as ServerError<*>
                        throw ServerException(
                            "Server error : ${localVarError.statusCode} ${localVarError.message.orEmpty()} ${localVarError.body}",
                            localVarError.statusCode,
                            localVarResponse,
                        )
                    }
                }
            val cursorHeader =
                localVarResponse.headers["EXECD-COMMANDS-TAIL-CURSOR"]?.firstOrNull()
            val nextCursor = cursorHeader?.toLongOrNull()
            CommandLogs(content = content, cursor = nextCursor)
        } catch (e: Exception) {
            logger.error("Failed to get command logs", e)
            throw e.toSandboxException()
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/EgressAdapter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.service

import com.alibaba.opensandbox.sandbox.HttpClientProvider
import com.alibaba.opensandbox.sandbox.api.egress.PolicyApi
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkRule
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import com.alibaba.opensandbox.sandbox.domain.services.Egress
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.SandboxModelConverter.toApiEgressNetworkRule
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.SandboxModelConverter.toDomainEgressNetworkPolicy
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.toSandboxException
import org.slf4j.LoggerFactory

internal class EgressAdapter(
    private val httpClientProvider: HttpClientProvider,
    private val egressEndpoint: SandboxEndpoint,
) : Egress {
    private val logger = LoggerFactory.getLogger(EgressAdapter::class.java)
    private val api =
        PolicyApi(
            "${httpClientProvider.config.protocol}://${egressEndpoint.endpoint}",
            httpClientProvider.httpClient.newBuilder()
                .addInterceptor { chain ->
                    val requestBuilder = chain.request().newBuilder()
                    egressEndpoint.headers.forEach { (key, value) ->
                        requestBuilder.header(key, value)
                    }
                    chain.proceed(requestBuilder.build())
                }
                .build(),
        )

    override fun getPolicy(): NetworkPolicy {
        return try {
            val policy =
                api.policyGet().policy
                    ?: throw IllegalStateException("Egress policy response did not contain policy")
            policy.toDomainEgressNetworkPolicy()
        } catch (e: Exception) {
            logger.error("Failed to fetch egress policy from endpoint {}", egressEndpoint.endpoint, e)
            throw e.toSandboxException()
        }
    }

    override fun patchRules(rules: List<NetworkRule>) {
        try {
            api.policyPatch(rules.map { it.toApiEgressNetworkRule() })
        } catch (e: Exception) {
            logger.error("Failed to patch egress policy via endpoint {}", egressEndpoint.endpoint, e)
            throw e.toSandboxException()
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/FilesystemAdapter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.service

import com.alibaba.opensandbox.sandbox.HttpClientProvider
import com.alibaba.opensandbox.sandbox.api.execd.FilesystemApi
import com.alibaba.opensandbox.sandbox.domain.exceptions.InvalidArgumentException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxApiException
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxError
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxError.Companion.UNEXPECTED_RESPONSE
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.ContentReplaceEntry
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.EntryInfo
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.MoveEntry
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.SearchEntry
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.SetPermissionEntry
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.WriteEntry
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import com.alibaba.opensandbox.sandbox.domain.services.Filesystem
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.FilesystemConverter.toApiPermissionMap
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.FilesystemConverter.toApiRenameFileItems
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.FilesystemConverter.toApiReplaceFileContentMap
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.FilesystemConverter.toEntryInfo
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.FilesystemConverter.toEntryInfoMap
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.parseSandboxError
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.toSandboxException
import kotlinx.serialization.json.buildJsonObject
import kotlinx.serialization.json.put
import okhttp3.Headers.Companion.toHeaders
import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.MediaType.Companion.toMediaTypeOrNull
import okhttp3.MultipartBody
import okhttp3.Request
import okhttp3.RequestBody
import okhttp3.RequestBody.Companion.toRequestBody
import okio.BufferedSink
import okio.source
import org.slf4j.LoggerFactory
import java.io.InputStream
import java.nio.charset.Charset

/**
 * Implementation of [Filesystem] that adapts OpenAPI-generated [FilesystemApi].
 *
 * This adapter provides comprehensive file system management capabilities for sandboxes,
 * handling all file operations through the translation layer with proper error handling
 * and validation.
 */
internal class FilesystemAdapter(
    private val httpClientProvider: HttpClientProvider,
    private val execdEndpoint: SandboxEndpoint,
) : Filesystem {
    companion object {
        private const val FILESYSTEM_UPLOAD_PATH = "/files/upload"
        private const val FILESYSTEM_DOWNLOAD_PATH = "/files/download"
    }

    private val logger = LoggerFactory.getLogger(FilesystemAdapter::class.java)
    private val api =
        FilesystemApi(
            "${httpClientProvider.config.protocol}://${execdEndpoint.endpoint}",
            httpClientProvider.httpClient.newBuilder()
                .addInterceptor { chain ->
                    val requestBuilder = chain.request().newBuilder()
                    execdEndpoint.headers.forEach { (key, value) ->
                        requestBuilder.header(key, value)
                    }
                    chain.proceed(requestBuilder.build())
                }
                .build(),
        )

    override fun readFile(
        path: String,
        encoding: String,
        range: String?,
    ): String {
        try {
            val request = buildDownloadRequest(path, range)
            httpClientProvider.httpClient.newCall(request).execute().use { response ->
                if (!response.isSuccessful) {
                    val errorBodyString = response.body?.string()
                    val sandboxError = parseSandboxError(errorBodyString)
                    val message = "Failed to read file. Status code: ${response.code}, Body: $errorBodyString"
                    throw SandboxApiException(
                        message = message,
                        statusCode = response.code,
                        error = sandboxError ?: SandboxError(UNEXPECTED_RESPONSE),
                        requestId = response.header("X-Request-ID"),
                    )
                }

                val charset = getCharsetFromEncoding(encoding)
                return response.body?.source()?.readString(charset) ?: ""
            }
        } catch (e: Exception) {
            logger.error("Failed to read file with encoding $encoding: $path", e)
            throw e.toSandboxException()
        }
    }

    override fun readByteArray(
        path: String,
        range: String?,
    ): ByteArray {
        try {
            val request = buildDownloadRequest(path, range)
            httpClientProvider.httpClient.newCall(request).execute().use { response ->
                if (!response.isSuccessful) {
                    val errorBodyString = response.body?.string()
                    val sandboxError = parseSandboxError(errorBodyString)
                    val message = "Failed to read file. Status code: ${response.code}, Body: $errorBodyString"
                    throw SandboxApiException(
                        message = message,
                        statusCode = response.code,
                        error = sandboxError ?: SandboxError(UNEXPECTED_RESPONSE),
                        requestId = response.header("X-Request-ID"),
                    )
                }
                return response.body?.bytes() ?: ByteArray(0)
            }
        } catch (e: Exception) {
            logger.error("Failed to read file as byte array: $path", e)
            throw e.toSandboxException()
        }
    }

    override fun readStream(
        path: String,
        range: String?,
    ): InputStream {
        try {
            val request = buildDownloadRequest(path, range)
            val response = httpClientProvider.httpClient.newCall(request).execute()

            if (!response.isSuccessful) {
                try {
                    val errorBodyString = response.body?.string()
                    val sandboxError = parseSandboxError(errorBodyString)
                    val message = "Failed to read file. Status code: ${response.code}, Body: $errorBodyString"
                    throw SandboxApiException(
                        message = message,
                        statusCode = response.code,
                        error = sandboxError ?: SandboxError(UNEXPECTED_RESPONSE),
                        requestId = response.header("X-Request-ID"),
                    )
                } catch (e: Exception) {
                    response.close()
                    throw e
                }
            }

            return response.body?.byteStream()
                ?: throw IllegalStateException("Response body is null")
        } catch (e: Exception) {
            logger.error("Failed to read file as stream: $path", e)
            throw e.toSandboxException()
        }
    }

    override fun write(entries: List<WriteEntry>) {
        if (entries.isEmpty()) {
            return
        }

        try {
            val builder = MultipartBody.Builder().setType(MultipartBody.FORM)
            entries.forEach { entry ->
                val path = entry.path
                val data = entry.data
                requireNotNull(path) { "File path cannot be null" }
                requireNotNull(data) { "File data cannot be null" }
                val metadataJsonObject =
                    buildJsonObject {
                        put("path", path)
                        put("owner", entry.owner)
                        put("group", entry.group)
                        put("mode", entry.mode)
                    }

                val metadataJson = metadataJsonObject.toString()

                builder.addFormDataPart(
                    "metadata",
                    "metadata",
                    metadataJson.toRequestBody("application/json".toMediaType()),
                )

                val fileBody =
                    when (data) {
                        is ByteArray -> data.toRequestBody("application/octet-stream".toMediaType())
                        is String -> {
                            val charset = getCharsetFromEncoding(entry.encoding)
                            data.toRequestBody("text/plain; charset=${charset.name()}".toMediaType())
                        }
                        is InputStream ->
                            object : RequestBody() {
                                override fun contentType() = "application/octet-stream".toMediaTypeOrNull()

                                override fun contentLength() = -1L

                                override fun writeTo(sink: BufferedSink) {
                                    data.source().use { source -> sink.writeAll(source) }
                                }
                            }
                        else -> throw IllegalArgumentException("Unsupported file data type: ${data::class.java}")
                    }

                builder.addFormDataPart("file", path, fileBody)
            }

            val request =
                Request.Builder()
                    .url("${httpClientProvider.config.protocol}://${execdEndpoint.endpoint}$FILESYSTEM_UPLOAD_PATH")
                    .headers(execdEndpoint.headers.toHeaders())
                    .post(builder.build())
                    .build()

            httpClientProvider.httpClient.newCall(request).execute().use { response ->
                if (!response.isSuccessful) {
                    val errorBodyString = response.body?.string()
                    val sandboxError = parseSandboxError(errorBodyString)
                    val message = "Failed to write files. Status code: ${response.code}, Body: $errorBodyString"
                    throw SandboxApiException(
                        message = message,
                        statusCode = response.code,
                        error = sandboxError ?: SandboxError(UNEXPECTED_RESPONSE),
                        requestId = response.header("X-Request-ID"),
                    )
                }
            }
        } catch (e: Exception) {
            logger.error("Failed to write {} files", entries.size, e)
            throw e.toSandboxException()
        }
    }

    override fun createDirectories(entries: List<WriteEntry>) {
        return try {
            val permissionMap =
                entries.associate { entry ->
                    entry.path to
                        com.alibaba.opensandbox.sandbox.api.models.execd.Permission(
                            mode = entry.mode,
                            group = entry.group,
                            owner = entry.owner,
                        )
                }
            api.makeDirs(permissionMap)
        } catch (e: Exception) {
            logger.error("Failed to create directories", e)
            throw e.toSandboxException()
        }
    }

    override fun deleteFiles(paths: List<String>) {
        return try {
            api.removeFiles(paths)
        } catch (e: Exception) {
            logger.error("Failed to delete {} files", paths.size, e)
            throw e.toSandboxException()
        }
    }

    override fun deleteDirectories(paths: List<String>) {
        return try {
            api.removeDirs(paths)
        } catch (e: Exception) {
            logger.error("Failed to delete {} directories", paths.size, e)
            throw e.toSandboxException()
        }
    }

    override fun moveFiles(entries: List<MoveEntry>) {
        return try {
            val renameItems = entries.toApiRenameFileItems()
            api.renameFiles(renameItems)
        } catch (e: Exception) {
            logger.error("Failed to move files", e)
            throw e.toSandboxException()
        }
    }

    override fun setPermissions(entries: List<SetPermissionEntry>) {
        return try {
            val permissionMap = entries.toApiPermissionMap()
            api.chmodFiles(permissionMap)
        } catch (e: Exception) {
            logger.error("Failed to set permissions", e)
            throw e.toSandboxException()
        }
    }

    override fun replaceContents(entries: List<ContentReplaceEntry>) {
        return try {
            val replaceMap = entries.toApiReplaceFileContentMap()
            api.replaceContent(replaceMap)
        } catch (e: Exception) {
            logger.error("Failed to replace contents", e)
            throw e.toSandboxException()
        }
    }

    override fun search(entry: SearchEntry): List<EntryInfo> {
        return try {
            val response = api.searchFiles(entry.path, entry.pattern)
            response.map { it -> it.toEntryInfo() }
        } catch (e: Exception) {
            logger.error("Failed to search files", e)
            throw e.toSandboxException()
        }
    }

    override fun readFileInfo(paths: List<String>): Map<String, EntryInfo> {
        return try {
            val response = api.getFilesInfo(paths)
            response.toEntryInfoMap()
        } catch (e: Exception) {
            logger.error("Failed to get file info for {} paths", paths.size, e)
            throw e.toSandboxException()
        }
    }

    private fun getCharsetFromEncoding(encoding: String): Charset {
        try {
            return charset(encoding)
        } catch (e: IllegalArgumentException) {
            logger.error("Invalid encoding {}", encoding, e)
            throw InvalidArgumentException("Invalid encoding $encoding", e)
        }
    }

    private fun buildDownloadRequest(
        path: String,
        range: String?,
    ): Request {
        val baseUrlString = "${httpClientProvider.config.protocol}://${execdEndpoint.endpoint}$FILESYSTEM_DOWNLOAD_PATH"
        val httpUrl =
            baseUrlString.toHttpUrl()
                .newBuilder()
                .addQueryParameter("path", path)
                .build()

        val requestBuilder =
            Request.Builder()
                .url(httpUrl)
                .headers(execdEndpoint.headers.toHeaders())
                .get()

        if (range != null) {
            requestBuilder.header("Range", range)
        }

        return requestBuilder.build()
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/HealthAdapter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.service

import com.alibaba.opensandbox.sandbox.HttpClientProvider
import com.alibaba.opensandbox.sandbox.api.execd.HealthApi
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import com.alibaba.opensandbox.sandbox.domain.services.Health
import org.slf4j.LoggerFactory

/**
 * Implementation of [Health] that adapts OpenAPI-generated [HealthApi].
 */
internal class HealthAdapter(
    private val httpClientProvider: HttpClientProvider,
    private val execdEndpoint: SandboxEndpoint,
) : Health {
    private val logger = LoggerFactory.getLogger(HealthAdapter::class.java)
    private val api =
        HealthApi(
            "${httpClientProvider.config.protocol}://${execdEndpoint.endpoint}",
            httpClientProvider.httpClient.newBuilder()
                .addInterceptor { chain ->
                    val requestBuilder = chain.request().newBuilder()
                    execdEndpoint.headers.forEach { (key, value) ->
                        requestBuilder.header(key, value)
                    }
                    chain.proceed(requestBuilder.build())
                }
                .build(),
        )

    override fun ping(sandboxId: String): Boolean {
        logger.debug("Checking health for sandbox: {}", sandboxId)

        return try {
            api.ping()
            logger.debug("Health check successful for sandbox {}", sandboxId)
            true
        } catch (e: Exception) {
            logger.debug("Health check failed for sandbox: {}", sandboxId, e)
            false
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/MetricsAdapter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.service

import com.alibaba.opensandbox.sandbox.HttpClientProvider
import com.alibaba.opensandbox.sandbox.api.execd.MetricApi
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxMetrics
import com.alibaba.opensandbox.sandbox.domain.services.Metrics
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.SandboxModelConverter.toSandboxMetrics
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.toSandboxException
import org.slf4j.LoggerFactory

/**
 * Implementation of [Metrics] that adapts OpenAPI-generated [MetricApi].
 */
internal class MetricsAdapter(
    private val httpClientProvider: HttpClientProvider,
    private val execdEndpoint: SandboxEndpoint,
) : Metrics {
    private val logger = LoggerFactory.getLogger(MetricsAdapter::class.java)
    private val api =
        MetricApi(
            "${httpClientProvider.config.protocol}://${execdEndpoint.endpoint}",
            httpClientProvider.httpClient.newBuilder()
                .addInterceptor { chain ->
                    val requestBuilder = chain.request().newBuilder()
                    execdEndpoint.headers.forEach { (key, value) ->
                        requestBuilder.header(key, value)
                    }
                    chain.proceed(requestBuilder.build())
                }
                .build(),
        )

    override fun getMetrics(sandboxId: String): SandboxMetrics {
        logger.debug("Retrieving sandbox metrics for {}", sandboxId)
        return try {
            api.getMetrics().toSandboxMetrics()
        } catch (e: Exception) {
            throw e.toSandboxException()
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/SandboxesAdapter.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.service

import com.alibaba.opensandbox.sandbox.HttpClientProvider
import com.alibaba.opensandbox.sandbox.api.SandboxesApi
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PagedSandboxInfos
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxCreateResponse
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxFilter
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxImageSpec
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxInfo
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxRenewResponse
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.Volume
import com.alibaba.opensandbox.sandbox.domain.services.Sandboxes
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.SandboxModelConverter
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.SandboxModelConverter.toApiRenewRequest
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.SandboxModelConverter.toPagedSandboxInfos
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.SandboxModelConverter.toSandboxCreateResponse
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.SandboxModelConverter.toSandboxEndpoint
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.SandboxModelConverter.toSandboxInfo
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.SandboxModelConverter.toSandboxRenewResponse
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.toSandboxException
import org.slf4j.LoggerFactory
import java.time.Duration
import java.time.OffsetDateTime

/**
 * Implementation of [Sandboxes] that adapts OpenAPI-generated [SandboxesApi].
 *
 * This adapter provides a clean abstraction layer between business logic and
 * the auto-generated API client, handling all model conversions and error mapping.
 */
internal class SandboxesAdapter(
    private val provider: HttpClientProvider,
) : Sandboxes {
    private val logger = LoggerFactory.getLogger(SandboxesAdapter::class.java)

    private val api = SandboxesApi(provider.config.getBaseUrl(), provider.authenticatedClient)

    override fun createSandbox(
        spec: SandboxImageSpec,
        entrypoint: List<String>,
        env: Map<String, String>,
        metadata: Map<String, String>,
        timeout: Duration?,
        resource: Map<String, String>,
        networkPolicy: NetworkPolicy?,
        extensions: Map<String, String>,
        volumes: List<Volume>?,
    ): SandboxCreateResponse {
        logger.info("Creating sandbox with image: {}", spec.image)

        return try {
            val createRequest =
                SandboxModelConverter.toApiCreateSandboxRequest(
                    spec = spec,
                    entrypoint = entrypoint,
                    env = env,
                    metadata = metadata,
                    timeout = timeout,
                    resource = resource,
                    networkPolicy = networkPolicy,
                    extensions = extensions,
                    volumes = volumes,
                )
            val apiResponse = api.sandboxesPost(createRequest)
            val response = apiResponse.toSandboxCreateResponse()

            logger.info("Successfully created sandbox: {}", response.id)

            response
        } catch (e: Exception) {
            throw e.toSandboxException()
        }
    }

    override fun getSandboxInfo(sandboxId: String): SandboxInfo {
        logger.debug("Retrieving sandbox information: {}", sandboxId)

        return try {
            api.sandboxesSandboxIdGet(sandboxId).toSandboxInfo()
        } catch (e: Exception) {
            throw e.toSandboxException()
        }
    }

    override fun listSandboxes(filter: SandboxFilter): PagedSandboxInfos {
        logger.debug("Listing sandboxes with filter: {}", filter)
        val metadataQuery: String? =
            filter.metadata?.entries?.joinToString("&") { "${it.key}=${it.value}" }
        return try {
            api.sandboxesGet(filter.states, metadataQuery, filter.page, filter.pageSize).toPagedSandboxInfos()
        } catch (e: Exception) {
            throw e.toSandboxException()
        }
    }

    override fun getSandboxEndpoint(
        sandboxId: String,
        port: Int,
    ): SandboxEndpoint {
        return getSandboxEndpoint(sandboxId, port, false)
    }

    override fun getSandboxEndpoint(
        sandboxId: String,
        port: Int,
        useServerProxy: Boolean,
    ): SandboxEndpoint {
        logger.debug("Retrieving sandbox endpoint: {}, port {}", sandboxId, port)
        return try {
            api.sandboxesSandboxIdEndpointsPortGet(sandboxId, port, useServerProxy).toSandboxEndpoint()
        } catch (e: Exception) {
            logger.error("Failed to retrieve sandbox endpoint for sandbox {}", sandboxId, e)
            throw e.toSandboxException()
        }
    }

    override fun pauseSandbox(sandboxId: String) {
        logger.info("Pausing sandbox: {}", sandboxId)

        try {
            api.sandboxesSandboxIdPausePost(sandboxId)
            logger.info("Initiated pause for sandbox: {}", sandboxId)
        } catch (e: Exception) {
            logger.error("Failed to initiate pause sandbox: {}", sandboxId, e)
            throw e.toSandboxException()
        }
    }

    override fun resumeSandbox(sandboxId: String) {
        logger.info("Resuming sandbox: {}", sandboxId)

        try {
            api.sandboxesSandboxIdResumePost(sandboxId)
            logger.info("Initiated resume for sandbox: {}", sandboxId)
        } catch (e: Exception) {
            logger.error("Failed initiate resume sandbox: {}", sandboxId, e)
            throw e.toSandboxException()
        }
    }

    override fun renewSandboxExpiration(
        sandboxId: String,
        newExpirationTime: OffsetDateTime,
    ): SandboxRenewResponse {
        logger.info("Renew sandbox {} expiration to {}", sandboxId, newExpirationTime)

        return try {
            val response =
                api.sandboxesSandboxIdRenewExpirationPost(
                    sandboxId,
                    newExpirationTime.toApiRenewRequest(),
                ).toSandboxRenewResponse()

            logger.info("Successfully renewed sandbox {} expiration", sandboxId)

            response
        } catch (e: Exception) {
            logger.error("Failed to renew sandbox {} expiration", sandboxId, e)
            throw e.toSandboxException()
        }
    }

    override fun killSandbox(sandboxId: String) {
        logger.info("Terminating sandbox: {}", sandboxId)

        return try {
            api.sandboxesSandboxIdDelete(sandboxId)
            logger.info("Successfully terminated sandbox: {}", sandboxId)
        } catch (e: Exception) {
            logger.error("Failed to terminate sandbox: {}", sandboxId, e)
            throw e.toSandboxException()
        }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/factory/AdapterFactory.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.factory

import com.alibaba.opensandbox.sandbox.HttpClientProvider
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import com.alibaba.opensandbox.sandbox.domain.services.Commands
import com.alibaba.opensandbox.sandbox.domain.services.Egress
import com.alibaba.opensandbox.sandbox.domain.services.Filesystem
import com.alibaba.opensandbox.sandbox.domain.services.Health
import com.alibaba.opensandbox.sandbox.domain.services.Metrics
import com.alibaba.opensandbox.sandbox.domain.services.Sandboxes
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.service.CommandsAdapter
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.service.EgressAdapter
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.service.FilesystemAdapter
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.service.HealthAdapter
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.service.MetricsAdapter
import com.alibaba.opensandbox.sandbox.infrastructure.adapters.service.SandboxesAdapter

/**
 * Factory responsible for creating adapter instances.
 *
 * This factory encapsulates the instantiation logic of specific adapters,
 * decoupling the Sandbox domain object from infrastructure implementation details.
 */
internal class AdapterFactory(
    private val httpClientProvider: HttpClientProvider,
) {
    fun createSandboxes(): Sandboxes {
        return SandboxesAdapter(httpClientProvider)
    }

    fun createFilesystem(endpoint: SandboxEndpoint): Filesystem {
        return FilesystemAdapter(httpClientProvider, endpoint)
    }

    fun createCommands(endpoint: SandboxEndpoint): Commands {
        return CommandsAdapter(httpClientProvider, endpoint)
    }

    fun createEgress(endpoint: SandboxEndpoint): Egress {
        return EgressAdapter(httpClientProvider, endpoint)
    }

    fun createMetrics(endpoint: SandboxEndpoint): Metrics {
        return MetricsAdapter(httpClientProvider, endpoint)
    }

    fun createHealth(endpoint: SandboxEndpoint): Health {
        return HealthAdapter(httpClientProvider, endpoint)
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/SandboxManagerTest.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox

import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PagedSandboxInfos
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PaginationInfo
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxFilter
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxImageSpec
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxInfo
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxRenewResponse
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxState
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxStatus
import com.alibaba.opensandbox.sandbox.domain.services.Sandboxes
import io.mockk.Runs
import io.mockk.every
import io.mockk.impl.annotations.MockK
import io.mockk.junit5.MockKExtension
import io.mockk.just
import io.mockk.mockk
import io.mockk.verify
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertSame
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.extension.ExtendWith
import java.time.Duration
import java.time.OffsetDateTime

@ExtendWith(MockKExtension::class)
class SandboxManagerTest {
    @MockK
    lateinit var sandboxService: Sandboxes

    @MockK
    lateinit var httpClientProvider: HttpClientProvider

    private lateinit var sandboxManager: SandboxManager

    @BeforeEach
    fun setUp() {
        sandboxManager = SandboxManager(sandboxService, httpClientProvider)
    }

    @Test
    fun `listSandboxInfos should return sandboxes from service`() {
        val filter = SandboxFilter.builder().states("RUNNING").build()
        val pagination =
            PaginationInfo(
                page = 1,
                pageSize = 10,
                totalItems = 2,
                totalPages = 1,
                hasNextPage = false,
            )
        val expectedInfos =
            PagedSandboxInfos(
                sandboxInfos = listOf(mockk(), mockk()),
                pagination = pagination,
            )

        every { sandboxService.listSandboxes(filter) } returns expectedInfos

        val result = sandboxManager.listSandboxInfos(filter)

        assertEquals(expectedInfos, result)
        verify { sandboxService.listSandboxes(filter) }
    }

    @Test
    fun `getSandboxInfo should return info from service`() {
        val sandboxId = "sandbox-id"
        val status =
            SandboxStatus(
                state = SandboxState.RUNNING,
                reason = null,
                message = null,
                lastTransitionAt = OffsetDateTime.now(),
            )
        val imageSpec = SandboxImageSpec.builder().image("ubuntu").build()
        val expectedInfo =
            SandboxInfo(
                id = sandboxId,
                status = status,
                entrypoint = listOf("/bin/bash"),
                createdAt = OffsetDateTime.now(),
                expiresAt = OffsetDateTime.now().plusHours(1),
                image = imageSpec,
                metadata = emptyMap(),
            )

        every { sandboxService.getSandboxInfo(sandboxId) } returns expectedInfo

        val result = sandboxManager.getSandboxInfo(sandboxId)

        assertEquals(expectedInfo, result)
        verify { sandboxService.getSandboxInfo(sandboxId) }
    }

    @Test
    fun `killSandbox should call service`() {
        val sandboxId = "sandbox-id"
        every { sandboxService.killSandbox(sandboxId) } just Runs

        sandboxManager.killSandbox(sandboxId)

        verify { sandboxService.killSandbox(sandboxId) }
    }

    @Test
    fun `renewSandbox should call service`() {
        val sandboxId = "sandbox-id"
        val timeout = Duration.ofMinutes(30)
        val expectedRenew = mockk<SandboxRenewResponse>()

        every { sandboxService.renewSandboxExpiration(sandboxId, any()) } returns expectedRenew

        val actualRenew = sandboxManager.renewSandbox(sandboxId, timeout)

        assertSame(expectedRenew, actualRenew)
    }

    @Test
    fun `pauseSandbox should call service`() {
        val sandboxId = "sandbox-id"
        every { sandboxService.pauseSandbox(sandboxId) } just Runs

        sandboxManager.pauseSandbox(sandboxId)

        verify { sandboxService.pauseSandbox(sandboxId) }
    }

    @Test
    fun `resumeSandbox should call service`() {
        val sandboxId = "sandbox-id"
        every { sandboxService.resumeSandbox(sandboxId) } just Runs

        sandboxManager.resumeSandbox(sandboxId)

        verify { sandboxService.resumeSandbox(sandboxId) }
    }

    @Test
    fun `close should close httpClientProvider`() {
        every { httpClientProvider.close() } just Runs

        sandboxManager.close()

        verify { httpClientProvider.close() }
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/SandboxTest.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox

import com.alibaba.opensandbox.sandbox.config.ConnectionConfig
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxReadyTimeoutException
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkRule
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxInfo
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxMetrics
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxRenewResponse
import com.alibaba.opensandbox.sandbox.domain.services.Commands
import com.alibaba.opensandbox.sandbox.domain.services.Egress
import com.alibaba.opensandbox.sandbox.domain.services.Filesystem
import com.alibaba.opensandbox.sandbox.domain.services.Health
import com.alibaba.opensandbox.sandbox.domain.services.Metrics
import com.alibaba.opensandbox.sandbox.domain.services.Sandboxes
import io.mockk.Runs
import io.mockk.every
import io.mockk.impl.annotations.MockK
import io.mockk.junit5.MockKExtension
import io.mockk.just
import io.mockk.mockk
import io.mockk.verify
import org.junit.jupiter.api.Assertions.assertFalse
import org.junit.jupiter.api.Assertions.assertNull
import org.junit.jupiter.api.Assertions.assertSame
import org.junit.jupiter.api.Assertions.assertThrows
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.extension.ExtendWith
import java.time.Duration

@ExtendWith(MockKExtension::class)
class SandboxTest {
    @MockK
    lateinit var sandboxService: Sandboxes

    @MockK
    lateinit var fileSystemService: Filesystem

    @MockK
    lateinit var commandService: Commands

    @MockK
    lateinit var healthService: Health

    @MockK
    lateinit var metricsService: Metrics

    @MockK
    lateinit var egressService: Egress

    @MockK
    lateinit var httpClientProvider: HttpClientProvider

    private lateinit var sandbox: Sandbox
    private val sandboxId = "sandbox-id"

    @BeforeEach
    fun setUp() {
        every {
            httpClientProvider.config
        } returns
            ConnectionConfig.builder()
                .domain("localhost:8080")
                .useServerProxy(false)
                .build()

        sandbox =
            Sandbox(
                id = sandboxId,
                sandboxService = sandboxService,
                fileSystemService = fileSystemService,
                commandService = commandService,
                healthService = healthService,
                metricsService = metricsService,
                egressService = egressService,
                customHealthCheck = null,
                httpClientProvider = httpClientProvider,
            )
    }

    @Test
    fun `files should return filesystem service`() {
        assertSame(fileSystemService, sandbox.files())
    }

    @Test
    fun `commands should return command service`() {
        assertSame(commandService, sandbox.commands())
    }

    @Test
    fun `metrics should return metrics service`() {
        assertSame(metricsService, sandbox.metrics())
    }

    @Test
    fun `httpClientProvider should return http client provider`() {
        assertSame(httpClientProvider, sandbox.httpClientProvider())
    }

    @Test
    fun `getInfo should delegate to sandboxService`() {
        val expectedInfo = mockk<SandboxInfo>()
        every { sandboxService.getSandboxInfo(sandboxId) } returns expectedInfo

        val result = sandbox.getInfo()

        assertSame(expectedInfo, result)
        verify { sandboxService.getSandboxInfo(sandboxId) }
    }

    @Test
    fun `getEndpoint should delegate to sandboxService`() {
        val port = 8080
        val expectedEndpoint = mockk<SandboxEndpoint>()
        val connectionConfig = ConnectionConfig.builder().build()
        every { httpClientProvider.config } returns connectionConfig
        every { sandboxService.getSandboxEndpoint(sandboxId, port, false) } returns expectedEndpoint

        val result = sandbox.getEndpoint(port)

        assertSame(expectedEndpoint, result)
        verify { sandboxService.getSandboxEndpoint(sandboxId, port, false) }
    }

    @Test
    fun `getMetrics should delegate to metricsService`() {
        val expectedMetrics = mockk<SandboxMetrics>()
        every { metricsService.getMetrics(sandboxId) } returns expectedMetrics

        val result = sandbox.getMetrics()

        assertSame(expectedMetrics, result)
        verify { metricsService.getMetrics(sandboxId) }
    }

    @Test
    fun `renew should delegate to sandboxService`() {
        val timeout = Duration.ofMinutes(10)
        val expectedRenew = mockk<SandboxRenewResponse>()
        every { sandboxService.renewSandboxExpiration(sandboxId, any()) } returns expectedRenew

        val actualRenew = sandbox.renew(timeout)

        assertSame(expectedRenew, actualRenew)
    }

    @Test
    fun `getEgressPolicy should delegate to egressService`() {
        val expectedPolicy = mockk<NetworkPolicy>()
        every { egressService.getPolicy() } returns expectedPolicy

        val result = sandbox.getEgressPolicy()

        assertSame(expectedPolicy, result)
        verify { egressService.getPolicy() }
    }

    @Test
    fun `patchEgressRules should delegate to egressService`() {
        val rules = listOf(mockk<NetworkRule>())
        every { egressService.patchRules(rules) } just Runs

        sandbox.patchEgressRules(rules)

        verify { egressService.patchRules(rules) }
    }

    @Test
    fun `builder manualCleanup should clear timeout`() {
        val builder =
            Sandbox.builder()
                .image("python:3.12")
                .timeout(Duration.ofMinutes(5))
                .manualCleanup()

        val timeoutField = builder.javaClass.getDeclaredField("timeout")
        timeoutField.isAccessible = true

        assertNull(timeoutField.get(builder))
    }

    @Test
    fun `pause should delegate to sandboxService`() {
        every { sandboxService.pauseSandbox(sandboxId) } just Runs

        sandbox.pause()

        verify { sandboxService.pauseSandbox(sandboxId) }
    }

    @Test
    fun `kill should delegate to sandboxService`() {
        every { sandboxService.killSandbox(sandboxId) } just Runs

        sandbox.kill()

        verify { sandboxService.killSandbox(sandboxId) }
    }

    @Test
    fun `close should close httpClientProvider`() {
        every { httpClientProvider.close() } just Runs

        sandbox.close()

        verify { httpClientProvider.close() }
    }

    @Test
    fun `isHealthy should return true when healthService returns true`() {
        every { healthService.ping(sandboxId) } returns true

        assertTrue(sandbox.isHealthy())
        verify { healthService.ping(sandboxId) }
    }

    @Test
    fun `isHealthy should return false when healthService returns false`() {
        every { healthService.ping(sandboxId) } returns false

        assertFalse(sandbox.isHealthy())
        verify { healthService.ping(sandboxId) }
    }

    @Test
    fun `checkReady should return when healthy`() {
        every { healthService.ping(sandboxId) } returns true

        sandbox.checkReady(Duration.ofSeconds(1), Duration.ofMillis(10))

        verify { healthService.ping(sandboxId) }
    }

    @Test
    fun `checkReady should throw exception when timeout`() {
        every { healthService.ping(sandboxId) } returns false

        assertThrows(SandboxReadyTimeoutException::class.java) {
            sandbox.checkReady(Duration.ofMillis(100), Duration.ofMillis(10))
        }
    }

    @Test
    fun `checkReady timeout should include connection context and bridge hint`() {
        every { healthService.ping(sandboxId) } throws RuntimeException("connect ECONNREFUSED")

        val ex =
            assertThrows(SandboxReadyTimeoutException::class.java) {
                sandbox.checkReady(Duration.ofMillis(100), Duration.ofMillis(10))
            }

        assertTrue(ex.message!!.contains("Connection context: domain=localhost:8080, useServerProxy=false"))
        assertTrue(ex.message!!.contains("useServerProxy=true"))
        assertTrue(ex.message!!.contains("[docker].host_ip"))
        assertTrue(ex.message!!.contains("Last error: connect ECONNREFUSED"))
    }

    @Test
    fun `checkReady timeout should omit host_ip hint when server proxy is enabled`() {
        val proxyEnabledConfig =
            ConnectionConfig.builder()
                .domain("localhost:8080")
                .useServerProxy(true)
                .build()
        every { httpClientProvider.config } returns proxyEnabledConfig
        every { healthService.ping(sandboxId) } returns false

        val ex =
            assertThrows(SandboxReadyTimeoutException::class.java) {
                sandbox.checkReady(Duration.ofMillis(100), Duration.ofMillis(10))
            }

        assertTrue(ex.message!!.contains("useServerProxy=true"))
        assertFalse(ex.message!!.contains("[docker].host_ip"))
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxExceptionCompatibilityTest.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.exceptions

import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertNull
import org.junit.jupiter.api.Test

class SandboxExceptionCompatibilityTest {
    @Test
    fun `base exception should keep legacy constructor signature`() {
        val ex = SandboxException("boom", null, SandboxError("INTERNAL_UNKNOWN_ERROR"))

        assertEquals("boom", ex.message)
        assertNull(ex.requestId)
    }

    @Test
    fun `api exception should keep legacy constructor signature`() {
        val ex = SandboxApiException("boom", null, 500, SandboxError("UNEXPECTED_RESPONSE"))

        assertEquals("boom", ex.message)
        assertEquals(500, ex.statusCode)
        assertNull(ex.requestId)
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/domain/models/VolumeModelsTest.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.domain.models

import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.Host
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PVC
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.Volume
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertFalse
import org.junit.jupiter.api.Assertions.assertNotNull
import org.junit.jupiter.api.Assertions.assertNull
import org.junit.jupiter.api.Assertions.assertThrows
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.Test

class VolumeModelsTest {
    @Test
    fun `Host should require absolute path`() {
        val backend = Host.of("/data/shared")
        assertEquals("/data/shared", backend.path)
    }

    @Test
    fun `Host should reject relative path`() {
        assertThrows(IllegalArgumentException::class.java) {
            Host.of("relative/path")
        }
    }

    @Test
    fun `PVC should accept valid claim name`() {
        val backend = PVC.of("my-pvc")
        assertEquals("my-pvc", backend.claimName)
    }

    @Test
    fun `PVC should reject blank claim name`() {
        assertThrows(IllegalArgumentException::class.java) {
            PVC.of("   ")
        }
    }

    @Test
    fun `Volume with host backend should be created correctly`() {
        val volume =
            Volume.builder()
                .name("data")
                .host(Host.of("/data/shared"))
                .mountPath("/mnt/data")
                .build()

        assertEquals("data", volume.name)
        assertNotNull(volume.host)
        assertEquals("/data/shared", volume.host?.path)
        assertNull(volume.pvc)
        assertEquals("/mnt/data", volume.mountPath)
        assertFalse(volume.readOnly) // default is read-write
        assertNull(volume.subPath)
    }

    @Test
    fun `Volume with PVC backend should be created correctly`() {
        val volume =
            Volume.builder()
                .name("models")
                .pvc(PVC.of("shared-models"))
                .mountPath("/mnt/models")
                .readOnly(true)
                .subPath("v1")
                .build()

        assertEquals("models", volume.name)
        assertNull(volume.host)
        assertNotNull(volume.pvc)
        assertEquals("shared-models", volume.pvc?.claimName)
        assertEquals("/mnt/models", volume.mountPath)
        assertTrue(volume.readOnly)
        assertEquals("v1", volume.subPath)
    }

    @Test
    fun `Volume should reject blank name`() {
        assertThrows(IllegalArgumentException::class.java) {
            Volume.builder()
                .name("   ")
                .host(Host.of("/data"))
                .mountPath("/mnt")
                .build()
        }
    }

    @Test
    fun `Volume should require absolute mount path`() {
        assertThrows(IllegalArgumentException::class.java) {
            Volume.builder()
                .name("test")
                .host(Host.of("/data"))
                .mountPath("relative/path")
                .build()
        }
    }

    @Test
    fun `Volume should reject no backend specified`() {
        assertThrows(IllegalArgumentException::class.java) {
            Volume.builder()
                .name("test")
                .mountPath("/mnt")
                .build()
        }
    }

    @Test
    fun `Volume should reject multiple backends specified`() {
        assertThrows(IllegalArgumentException::class.java) {
            Volume.builder()
                .name("test")
                .host(Host.of("/data"))
                .pvc(PVC.of("my-pvc"))
                .mountPath("/mnt")
                .build()
        }
    }

    @Test
    fun `Volume should require name`() {
        assertThrows(IllegalArgumentException::class.java) {
            Volume.builder()
                .host(Host.of("/data"))
                .mountPath("/mnt")
                .build()
        }
    }

    @Test
    fun `Volume should require mount path`() {
        assertThrows(IllegalArgumentException::class.java) {
            Volume.builder()
                .name("test")
                .host(Host.of("/data"))
                .build()
        }
    }

    @Test
    fun `Volume readOnly defaults to false`() {
        val volume =
            Volume.builder()
                .name("test")
                .host(Host.of("/data"))
                .mountPath("/mnt")
                .build()

        assertFalse(volume.readOnly)
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/CommandsAdapterTest.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.service

import com.alibaba.opensandbox.sandbox.HttpClientProvider
import com.alibaba.opensandbox.sandbox.config.ConnectionConfig
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxApiException
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.ExecutionHandlers
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.RunCommandRequest
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.booleanOrNull
import kotlinx.serialization.json.intOrNull
import kotlinx.serialization.json.jsonObject
import kotlinx.serialization.json.jsonPrimitive
import okhttp3.mockwebserver.MockResponse
import okhttp3.mockwebserver.MockWebServer
import org.junit.jupiter.api.AfterEach
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertThrows
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.assertThrows
import java.util.concurrent.CountDownLatch
import java.util.concurrent.TimeUnit

class CommandsAdapterTest {
    // CommandsAdapter unit tests
    private lateinit var mockWebServer: MockWebServer
    private lateinit var commandsAdapter: CommandsAdapter
    private lateinit var httpClientProvider: HttpClientProvider

    @BeforeEach
    fun setUp() {
        mockWebServer = MockWebServer()
        mockWebServer.start()

        // We need to parse the port from MockWebServer to simulate the Execd endpoint
        val host = mockWebServer.hostName
        val port = mockWebServer.port
        val endpoint = SandboxEndpoint("$host:$port")

        val config =
            ConnectionConfig.builder()
                .domain("$host:$port")
                .protocol("http")
                .build()

        httpClientProvider = HttpClientProvider(config)
        commandsAdapter = CommandsAdapter(httpClientProvider, endpoint)
    }

    @AfterEach
    fun tearDown() {
        mockWebServer.shutdown()
        httpClientProvider.close()
    }

    @Test
    fun `run should stream events correctly`() {
        // SSE format: event nodes are JSON objects separated by newlines
        val event1 = """{"type":"stdout","text":"Hello","timestamp":1672531200000}"""
        val event2 = """{"type":"execution_complete","execution_time":100,"timestamp":1672531201000}"""

        val responseBody = "$event1\n$event2\n"

        mockWebServer.enqueue(
            MockResponse()
                .setResponseCode(200)
                .setBody(responseBody),
        )

        val receivedOutput = StringBuilder()
        val latch = CountDownLatch(1)
        var executionTime = -1L

        val handlers =
            ExecutionHandlers.builder()
                .onStdout { msg -> receivedOutput.append(msg.text) }
                .onExecutionComplete { complete ->
                    executionTime = complete.executionTimeInMillis
                    latch.countDown()
                }
                .build()

        val request =
            RunCommandRequest.builder()
                .command("echo Hello")
                .uid(1000)
                .gid(1000)
                .env("APP_ENV", "test")
                .env("LOG_LEVEL", "debug")
                .handlers(handlers)
                .build()

        commandsAdapter.run(request)

        assertTrue(latch.await(2, TimeUnit.SECONDS), "Timed out waiting for completion event")
        assertEquals("Hello", receivedOutput.toString())
        assertEquals(100L, executionTime)

        val recordedRequest = mockWebServer.takeRequest()
        assertEquals("/command", recordedRequest.path)
        assertEquals("POST", recordedRequest.method)
        val requestBodyJson = Json.parseToJsonElement(recordedRequest.body.readUtf8()).jsonObject
        assertEquals("echo Hello", requestBodyJson["command"]?.jsonPrimitive?.content)
        assertEquals(1000, requestBodyJson["uid"]?.jsonPrimitive?.intOrNull)
        assertEquals(1000, requestBodyJson["gid"]?.jsonPrimitive?.intOrNull)
        val envs = requestBodyJson["envs"]?.jsonObject
        assertEquals("test", envs?.get("APP_ENV")?.jsonPrimitive?.content)
        assertEquals("debug", envs?.get("LOG_LEVEL")?.jsonPrimitive?.content)
        // Builder defaults background to false; request body always includes it
        assertEquals(false, requestBodyJson["background"]?.jsonPrimitive?.booleanOrNull)
    }

    @Test
    fun `run command builder should require uid when gid is provided`() {
        assertThrows<IllegalArgumentException> {
            RunCommandRequest.builder()
                .command("id")
                .gid(1000)
                .build()
        }
    }

    @Test
    fun `run should expose request id on api exception`() {
        mockWebServer.enqueue(
            MockResponse()
                .setResponseCode(500)
                .addHeader("X-Request-ID", "req-kotlin-123")
                .setBody("""{"code":"INTERNAL_ERROR","message":"boom"}"""),
        )

        val request = RunCommandRequest.builder().command("echo Hello").build()
        val ex = assertThrows(SandboxApiException::class.java) { commandsAdapter.run(request) }

        assertEquals(500, ex.statusCode)
        assertEquals("req-kotlin-123", ex.requestId)
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/SandboxesAdapterTest.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.infrastructure.adapters.service

import com.alibaba.opensandbox.sandbox.HttpClientProvider
import com.alibaba.opensandbox.sandbox.config.ConnectionConfig
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkRule
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxFilter
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxImageSpec
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxState
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.jsonArray
import kotlinx.serialization.json.jsonObject
import kotlinx.serialization.json.jsonPrimitive
import okhttp3.mockwebserver.MockResponse
import okhttp3.mockwebserver.MockWebServer
import org.junit.jupiter.api.AfterEach
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertNotNull
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
import java.time.Duration

class SandboxesAdapterTest {
    private lateinit var mockWebServer: MockWebServer
    private lateinit var sandboxesAdapter: SandboxesAdapter
    private lateinit var httpClientProvider: HttpClientProvider

    @BeforeEach
    fun setUp() {
        mockWebServer = MockWebServer()
        mockWebServer.start()

        val host = mockWebServer.hostName
        val port = mockWebServer.port
        val config =
            ConnectionConfig.builder()
                .domain("$host:$port")
                .protocol("http")
                .build()

        httpClientProvider = HttpClientProvider(config)
        sandboxesAdapter = SandboxesAdapter(httpClientProvider)
    }

    @AfterEach
    fun tearDown() {
        mockWebServer.shutdown()
        httpClientProvider.close()
    }

    @Test
    fun `createSandbox should send correct request and parse response`() {
        // Mock response
        val responseBody =
            """
            {
                "id": "550e8400-e29b-41d4-a716-446655440000",
                "status": { "state": "Running" },
                "expiresAt": "2023-01-01T11:00:00Z",
                "createdAt": "2023-01-01T10:00:00Z",
                "entrypoint": ["bash"]
            }
            """.trimIndent()
        mockWebServer.enqueue(MockResponse().setBody(responseBody).setResponseCode(201))

        // Execute
        val spec = SandboxImageSpec.builder().image("ubuntu:latest").build()
        val extensions = mapOf("storage.id" to "abc123", "debug" to "true")
        val networkPolicy =
            NetworkPolicy.builder()
                .defaultAction(NetworkPolicy.DefaultAction.DENY)
                .addEgress(
                    NetworkRule.builder()
                        .action(NetworkRule.Action.ALLOW)
                        .target("pypi.org")
                        .build(),
                )
                .build()
        val result =
            sandboxesAdapter.createSandbox(
                spec = spec,
                entrypoint = listOf("bash"),
                env = mapOf("KEY" to "VALUE"),
                metadata = mapOf("meta" to "data"),
                timeout = Duration.ofSeconds(600),
                resource = mapOf("cpu" to "1"),
                networkPolicy = networkPolicy,
                extensions = extensions,
                volumes = null,
            )

        // Verify request
        val request = mockWebServer.takeRequest()
        assertEquals("POST", request.method)
        assertEquals("/v1/sandboxes", request.path)
        val requestBody = request.body.readUtf8()
        assertTrue(requestBody.isNotBlank(), "request body should not be blank")

        val payload = Json.parseToJsonElement(requestBody).jsonObject
        val gotExtensions = payload["extensions"]?.jsonObject
        assertNotNull(gotExtensions, "extensions should be present in createSandbox request")
        assertEquals("abc123", gotExtensions!!["storage.id"]!!.jsonPrimitive.content)
        assertEquals("true", gotExtensions["debug"]!!.jsonPrimitive.content)
        val gotNetworkPolicy = payload["networkPolicy"]?.jsonObject
        assertNotNull(gotNetworkPolicy, "networkPolicy should be present in createSandbox request")
        val gotDefaultAction = gotNetworkPolicy!!["defaultAction"]
        assertNotNull(gotDefaultAction, "defaultAction should be present in networkPolicy")
        assertEquals("deny", gotDefaultAction!!.jsonPrimitive.content)
        val egressArray = gotNetworkPolicy["egress"]!!.jsonArray
        assertEquals(1, egressArray.size)
        val rule = egressArray[0].jsonObject
        assertEquals("allow", rule["action"]!!.jsonPrimitive.content)
        assertEquals("pypi.org", rule["target"]!!.jsonPrimitive.content)

        // Verify response
        assertEquals("550e8400-e29b-41d4-a716-446655440000", result.id)
    }

    @Test
    fun `createSandbox should accept null expiresAt for manual cleanup response`() {
        val responseBody =
            """
            {
                "id": "manual-sbx",
                "status": { "state": "Running" },
                "expiresAt": null,
                "createdAt": "2023-01-01T10:00:00Z",
                "entrypoint": ["bash"]
            }
            """.trimIndent()
        mockWebServer.enqueue(MockResponse().setBody(responseBody).setResponseCode(201))

        val spec = SandboxImageSpec.builder().image("ubuntu:latest").build()
        val result =
            sandboxesAdapter.createSandbox(
                spec = spec,
                entrypoint = listOf("bash"),
                env = emptyMap(),
                metadata = emptyMap(),
                timeout = null,
                resource = mapOf("cpu" to "1"),
                networkPolicy = null,
                extensions = emptyMap(),
                volumes = null,
            )

        assertEquals("manual-sbx", result.id)
    }

    @Test
    fun `getSandboxInfo should parse response correctly`() {
        val sandboxId = "sandbox-id"
        val responseBody =
            """
            {
                "id": "$sandboxId",
                "status": {
                    "state": "Running",
                    "reason": null,
                    "message": null,
                    "lastTransitionAt": "2023-01-01T10:00:00Z"
                },
                "entrypoint": ["/bin/bash"],
                "expiresAt": "2023-01-01T11:00:00Z",
                "createdAt": "2023-01-01T10:00:00Z",
                "image": {
                    "uri": "ubuntu:latest"
                },
                "metadata": {}
            }
            """.trimIndent()

        mockWebServer.enqueue(MockResponse().setBody(responseBody))

        val result = sandboxesAdapter.getSandboxInfo(sandboxId)

        assertEquals(sandboxId, result.id)
        assertEquals(SandboxState.RUNNING, result.status.state)
        assertEquals("ubuntu:latest", result.image.image)

        val request = mockWebServer.takeRequest()
        assertEquals("/v1/sandboxes/$sandboxId", request.path)
    }

    @Test
    fun `getSandboxInfo should parse null expiresAt for manual cleanup`() {
        val sandboxId = "manual-sandbox"
        val responseBody =
            """
            {
                "id": "$sandboxId",
                "status": {
                    "state": "Running",
                    "reason": null,
                    "message": null,
                    "lastTransitionAt": "2023-01-01T10:00:00Z"
                },
                "entrypoint": ["/bin/bash"],
                "expiresAt": null,
                "createdAt": "2023-01-01T10:00:00Z",
                "image": {
                    "uri": "ubuntu:latest"
                },
                "metadata": {}
            }
            """.trimIndent()

        mockWebServer.enqueue(MockResponse().setBody(responseBody))

        val result = sandboxesAdapter.getSandboxInfo(sandboxId)

        assertEquals(sandboxId, result.id)
        assertEquals(null, result.expiresAt)
    }

    @Test
    fun `listSandboxes should construct query params correctly`() {
        val responseBody =
            """
            {
                "items": [],
                "pagination": {
                    "page": 0,
                    "pageSize": 10,
                    "totalItems": 0,
                    "totalPages": 0,
                    "hasNextPage": false
                }
            }
            """.trimIndent()

        mockWebServer.enqueue(MockResponse().setBody(responseBody))

        val filter =
            SandboxFilter.builder()
                .states("RUNNING", "PENDING")
                .metadata(mapOf("key" to "value"))
                .page(1)
                .pageSize(20)
                .build()

        sandboxesAdapter.listSandboxes(filter)

        val request = mockWebServer.takeRequest()
        val url = request.requestUrl
        assertNotNull(url)
        assertEquals("RUNNING", url!!.queryParameter("state"))
        assertEquals("PENDING", url.queryParameterValues("state")[1])
        assertEquals("key=value", url.queryParameter("metadata"))
        assertEquals("1", url.queryParameter("page"))
        assertEquals("20", url.queryParameter("pageSize"))
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox-api/build.gradle.kts
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.openapitools.generator.gradle.plugin.tasks.GenerateTask

plugins {
    alias(libs.plugins.openapi.generator)
}

repositories {
    mavenCentral()
}

dependencies {
    implementation(libs.okhttp)
    implementation(libs.bundles.serialization)
}

fun GenerateTask.configureCommonOptions() {
    generatorName.set("kotlin")
    library.set("jvm-okhttp4")

    typeMappings.set(
        mapOf(
            "object" to "kotlinx.serialization.json.JsonElement",
            "Object" to "kotlinx.serialization.json.JsonElement",
            "java.lang.Object" to "kotlinx.serialization.json.JsonElement",
            "Any" to "kotlinx.serialization.json.JsonElement",
            "kotlin.Any" to "kotlinx.serialization.json.JsonElement",
            "binary" to "java.io.InputStream",
            "file" to "java.io.InputStream",
        ),
    )

    importMappings.set(
        mapOf(
            "JsonElement" to "kotlinx.serialization.json.JsonElement",
        ),
    )

    configOptions.set(
        mapOf(
            "jvm8" to "true",
            "coroutine" to "false",
            "dateLibrary" to "java8",
            "serializationLibrary" to "kotlinx_serialization",
            "documentationProvider" to "kdoc",
            "useKtor" to "false",
            "omitGradleWrapper" to "true",
        ),
    )

    globalProperties.set(
        mapOf(
            "apiTests" to "false",
            "modelTests" to "false",
        ),
    )
}

val generateSandboxLifecycleApi =
    tasks.register<GenerateTask>("generateSandboxLifecycleApi") {
        configureCommonOptions()

        inputSpec.set(
            rootProject.projectDir.parentFile.parentFile.parentFile
                .resolve("specs/sandbox-lifecycle.yml").absolutePath,
        )
        outputDir.set(layout.buildDirectory.dir("generated/api/lifecycle").get().asFile.absolutePath)
        packageName.set("com.alibaba.opensandbox.sandbox.api")
        apiPackage.set("com.alibaba.opensandbox.sandbox.api")
        modelPackage.set("com.alibaba.opensandbox.sandbox.api.models")
    }

val generateExecdApi =
    tasks.register<GenerateTask>("generateExecdApi") {
        configureCommonOptions()

        inputSpec.set(rootProject.projectDir.parentFile.parentFile.parentFile.resolve("specs/execd-api.yaml").absolutePath)
        outputDir.set(layout.buildDirectory.dir("generated/api/execd").get().asFile.absolutePath)
        packageName.set("com.alibaba.opensandbox.sandbox.api.execd")
        apiPackage.set("com.alibaba.opensandbox.sandbox.api.execd")
        modelPackage.set("com.alibaba.opensandbox.sandbox.api.models.execd")
    }

val generateEgressApi =
    tasks.register<GenerateTask>("generateEgressApi") {
        configureCommonOptions()

        inputSpec.set(rootProject.projectDir.parentFile.parentFile.parentFile.resolve("specs/egress-api.yaml").absolutePath)
        outputDir.set(layout.buildDirectory.dir("generated/api/egress").get().asFile.absolutePath)
        packageName.set("com.alibaba.opensandbox.sandbox.api.egress")
        apiPackage.set("com.alibaba.opensandbox.sandbox.api.egress")
        modelPackage.set("com.alibaba.opensandbox.sandbox.api.models.egress")
    }

val lifecycleSrc = generateSandboxLifecycleApi.map { file(it.outputDir).resolve("src/main/kotlin") }
val execdSrc = generateExecdApi.map { file(it.outputDir).resolve("src/main/kotlin") }
val egressSrc = generateEgressApi.map { file(it.outputDir).resolve("src/main/kotlin") }
sourceSets {
    main {
        java.srcDirs(lifecycleSrc, execdSrc, egressSrc)
    }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox-api/src/main/kotlin/com/alibaba/opensandbox/sandbox/api/models/execd/ExecutionModels.kt
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.sandbox.api.models.execd

import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.contentOrNull
import kotlinx.serialization.json.jsonPrimitive

@Serializable
data class EventNode(
    val type: String,
    val timestamp: Long,
    val text: String? = null,
    val results: ResultData? = null,
    @SerialName("execution_time")
    val executionTimeInMillis: Long? = null,
    @SerialName("execution_count")
    val executionCount: Long? = null,
    val error: ErrorData? = null,
)

@Serializable
@JvmInline
value class ResultData(val raw: JsonObject) {
    fun getText(): String? {
        return raw["text"]?.jsonPrimitive?.contentOrNull
    }

    fun getStringResult(key: String): String? = raw[key]?.jsonPrimitive?.contentOrNull
}

@Serializable
data class ErrorData(
    @SerialName("ename")
    val name: String? = null,
    @SerialName("evalue")
    val value: String? = null,
    @SerialName("traceback")
    val traceback: List<String> = emptyList(),
)


================================================
FILE: sdks/sandbox/kotlin/sandbox-api/src/main/kotlin/com/alibaba/opensandbox/sandbox/api/openapitools.json
================================================
{
  "$schema": "./node_modules/@openapitools/openapi-generator-cli/config.schema.json",
  "spaces": 2,
  "generator-cli": {
    "version": "7.17.0"
  }
}


================================================
FILE: sdks/sandbox/kotlin/sandbox-bom/build.gradle.kts
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

plugins {
    `java-platform`
}

dependencies {
    constraints {
        api(project(":sandbox"))
        api(project(":sandbox-api"))

        api(libs.kotlin.stdlib)
        api(libs.okhttp)
        api(libs.okhttp.logging)
        api(libs.kotlinx.serialization.json)
        api(libs.slf4j.api)
    }
}


================================================
FILE: sdks/sandbox/kotlin/settings.gradle.kts
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

rootProject.name = "sandbox-parent"

plugins {
    id("org.gradle.toolchains.foojay-resolver-convention") version("1.0.0")
}

include(":sandbox")
include(":sandbox-api")
include(":sandbox-bom")


================================================
FILE: sdks/sandbox/python/LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: sdks/sandbox/python/Makefile
================================================
.PHONY: help install dev-install format lint type-check test test-cov clean docs build publish

# Default target
help: ## Show this help message
	@echo "Available commands:"
	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}'

install: ## Install package dependencies
	uv sync

dev-install: generate-api ## Install package with development dependencies
	uv sync --all-extras

format: ## Format code with black and isort
	uv run black .
	uv run isort .

lint: ## Run linting with ruff
	uv run ruff check .

type-check: ## Run type checking with pyright
	uv run pyright

test: ## Run tests
	uv run pytest

test-cov: ## Run tests with coverage
	uv run pytest --cov=src/opensandbox --cov-report=html --cov-report=term

clean: ## Clean build artifacts
	rm -rf build/
	rm -rf dist/
	rm -rf *.egg-info/
	rm -rf .pytest_cache/
	rm -rf .coverage
	rm -rf htmlcov/
	find . -type d -name __pycache__ -exec rm -rf {} +
	find . -name "*.pyc" -delete

docs: ## Generate documentation
	cd docs && uv run sphinx-build -b html . _build/html

build: generate-api ## Build package with API generation
	uv build

publish: ## Publish to PyPI (requires authentication)
	uv publish

# Development workflow targets
check: format lint type-check ## Run all code quality checks

ci: generate-api dev-install check test ## Run CI pipeline locally

generate-api: ## Generate API clients from OpenAPI specs (using openapi-python-client)
	uv run python scripts/generate_api.py

clean-api: ## Clean generated API client code
	rm -rf src/opensandbox/api/execd/
	rm -rf src/opensandbox/api/egress/
	rm -rf src/opensandbox/api/lifecycle/

# Docker targets
docker-build: ## Build Docker image for development
	docker build -t opensandbox-python-dev .

docker-test: ## Run tests in Docker container
	docker run --rm -v $(PWD):/app opensandbox-python-dev make test


================================================
FILE: sdks/sandbox/python/README.md
================================================
# OpenSandbox SDK for Python

English | [中文](README_zh.md)

A Python SDK for low-level interaction with OpenSandbox. It provides capabilities to create, manage, and interact with secure sandbox environments, including executing shell commands, managing files, and monitoring resources.

## Installation

### pip

```bash
pip install opensandbox
```

### uv

```bash
uv add opensandbox
```

## Quick Start

The following example shows how to create a sandbox and execute a shell command.

> **Note**: Before running this example, ensure the OpenSandbox service is running. See the root [README.md](../../../README.md) for startup instructions.

```python
import asyncio
from opensandbox.sandbox import Sandbox
from opensandbox.config import ConnectionConfig
from opensandbox.exceptions import SandboxException

async def main():
    # 1. Configure connection
    config = ConnectionConfig(
        domain="api.opensandbox.io",
        api_key="your-api-key"
    )

    # 2. Create a Sandbox
    try:
        sandbox = await Sandbox.create(
            "ubuntu",
            connection_config=config
        )
        async with sandbox:

            # 3. Execute a shell command
            execution = await sandbox.commands.run("echo 'Hello Sandbox!'")

            # 4. Print output
            print(execution.logs.stdout[0].text)

            # 5. Cleanup (sandbox.close() called automatically)
            # Note: kill() must be called explicitly if you want to terminate the remote sandbox instance immediately
            await sandbox.kill()

    except SandboxException as e:
        # Handle Sandbox specific exceptions
        print(f"Sandbox Error: [{e.error.code}] {e.error.message}")
        # Server logs can be correlated by this request id (if available)
        print(f"Request ID: {e.request_id}")
    except Exception as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    asyncio.run(main())
```

### Synchronous Quick Start

If you prefer a synchronous API, use `SandboxSync` / `SandboxManagerSync` and `ConnectionConfigSync`:

```python
from datetime import timedelta

import httpx
from opensandbox import SandboxSync
from opensandbox.config import ConnectionConfigSync

config = ConnectionConfigSync(
    domain="api.opensandbox.io",
    api_key="your-api-key",
    request_timeout=timedelta(seconds=30),
    transport=httpx.HTTPTransport(limits=httpx.Limits(max_connections=20)),
)

sandbox = SandboxSync.create("ubuntu", connection_config=config)
with sandbox:
    execution = sandbox.commands.run("echo 'Hello Sandbox!'")
    print(execution.logs.stdout[0].text)
    sandbox.kill()
```

## Usage Examples

### 1. Lifecycle Management

Manage the sandbox lifecycle, including renewal, pausing, and resuming.

```python
from datetime import timedelta

# Renew the sandbox
# This resets the expiration time to (current time + duration)
await sandbox.renew(timedelta(minutes=30))

# Pause execution (suspends all processes)
await sandbox.pause()

# Resume execution
sandbox = await Sandbox.resume(
    sandbox_id=sandbox.id,
    connection_config=config,
)

# Get current status
info = await sandbox.get_info()
print(f"State: {info.status.state}")
print(f"Expires: {info.expires_at}")  # None when manual cleanup mode is used
```

Create a non-expiring sandbox by passing `timeout=None`:

```python
manual = await Sandbox.create(
    "ubuntu",
    connection_config=config,
    timeout=None,
)
```

### 2. Custom Health Check

Define custom logic to determine if the sandbox is healthy. This overrides the default ping check.

```python
async def custom_health_check(sbx: Sandbox) -> bool:
    try:
        # 1. Get the external mapped address for port 80
        endpoint = await sbx.get_endpoint(80)

        # 2. Perform your connection check (e.g. HTTP request, Socket connect)
        # return await check_connection(endpoint.endpoint)
        return True
    except Exception:
        return False

sandbox = await Sandbox.create(
    "nginx:latest",
    connection_config=config,
    health_check=custom_health_check  # Custom check: Wait for port 80 to be accessible
)
```

### 3. Command Execution & Streaming

Execute commands and handle output streams in real-time.

```python
from opensandbox.models.execd import ExecutionHandlers, RunCommandOpts

# Define async handlers for streaming output
async def handle_stdout(msg):
    print(f"STDOUT: {msg.text}")

async def handle_stderr(msg):
    print(f"STDERR: {msg.text}")

async def handle_complete(complete):
    print(f"Command finished in {complete.execution_time_in_millis}ms")

# Create handlers (all handlers must be async)
handlers = ExecutionHandlers(
    on_stdout=handle_stdout,
    on_stderr=handle_stderr,
    on_execution_complete=handle_complete
)

# Execute command with handlers
result = await sandbox.commands.run(
    "for i in {1..5}; do echo \"Count $i\"; sleep 0.5; done",
    handlers=handlers
)
```

### 4. Comprehensive File Operations

Manage files and directories, including read, write, list, delete, and search.

```python
from opensandbox.models.filesystem import WriteEntry, SearchEntry

# 1. Write file
await sandbox.files.write_files([
    WriteEntry(
        path="/tmp/hello.txt",
        data="Hello World",
        mode=644
    )
])

# 2. Read file
content = await sandbox.files.read_file("/tmp/hello.txt")
print(f"Content: {content}")

# 3. List/Search files
files = await sandbox.files.search(
    SearchEntry(
        path="/tmp",
        pattern="*.txt"
    )
)
for f in files:
    print(f"Found: {f.path}")

# 4. Delete file
await sandbox.files.delete_files(["/tmp/hello.txt"])
```

### 5. Sandbox Management (Admin)

Use `SandboxManager` for administrative tasks and finding existing sandboxes.

```python
from opensandbox.manager import SandboxManager
from opensandbox.models.sandboxes import SandboxFilter

# Create manager using async context manager
async with await SandboxManager.create(connection_config=config) as manager:

    # List running sandboxes
    sandboxes = await manager.list_sandbox_infos(
        SandboxFilter(
            states=["RUNNING"],
            page_size=10
        )
    )

    for info in sandboxes.sandbox_infos:
        print(f"Found sandbox: {info.id}")
        # Perform admin actions
        await manager.kill_sandbox(info.id)
```

## Configuration

### 1. Connection Configuration

The `ConnectionConfig` class manages API server connection settings.

| Parameter         | Description                                | Default                      | Environment Variable   |
| ----------------- | ------------------------------------------ | ---------------------------- | ---------------------- |
| `api_key`         | API Key for authentication                 | Required                     | `OPEN_SANDBOX_API_KEY` |
| `domain`          | The endpoint domain of the sandbox service | Required (or localhost:8080) | `OPEN_SANDBOX_DOMAIN`  |
| `protocol`        | HTTP protocol (http/https)                 | `http`                       | -                      |
| `request_timeout` | Timeout for API requests                   | 30 seconds                   | -                      |
| `debug`           | Enable debug logging for HTTP requests     | `False`                      | -                      |
| `headers`         | Custom HTTP headers                        | Empty                        | -                      |
| `transport`       | Shared httpx transport (pool/proxy/retry)  | SDK-created per instance     | -                      |
| `use_server_proxy` | Use sandbox server as proxy for execd/endpoint requests (e.g. when client cannot reach the sandbox directly) | `False` | -                      |

```python
from datetime import timedelta

# 1. Basic configuration
config = ConnectionConfig(
    api_key="your-key",
    domain="api.opensandbox.io",
    request_timeout=timedelta(seconds=60)
)

# 2. Advanced: Custom headers and custom transport
# If you create many Sandbox instances, configuring a shared transport is recommended to optimize resource usage.
# SDK default keep-alive is 30 seconds for its own transports.
import httpx

config = ConnectionConfig(
    api_key="your-key",
    domain="api.opensandbox.io",
    headers={
        "X-Custom-Header": "value",
        "X-Request-ID": "trace-123",
    },
    transport=httpx.AsyncHTTPTransport(
        limits=httpx.Limits(
            max_connections=100,
            max_keepalive_connections=50,
        keepalive_expiry=30.0,
        )
    ),
)

# If you provide a custom transport, you are responsible for closing it:
# await config.transport.aclose()
```

### 2. Sandbox Creation Configuration

The `Sandbox.create()` allows configuring the sandbox environment.

| Parameter       | Description                              | Default                         |
| --------------- | ---------------------------------------- | ------------------------------- |
| `image`    | Docker image specification               | Required                        |
| `timeout`       | Automatic termination timeout            | 10 minutes                      |
| `entrypoint`    | Container entrypoint command             | `["tail", "-f", "/dev/null"]`   |
| `resource`      | CPU and memory limits                    | `{"cpu": "1", "memory": "2Gi"}` |
| `env`           | Environment variables                    | Empty                           |
| `metadata`      | Custom metadata tags                     | Empty                           |
| `network_policy` | Optional outbound network policy (egress) | -                             |
| `ready_timeout` | Max time to wait for sandbox to be ready | 30 seconds                      |

Note: metadata keys under `opensandbox.io/` are reserved for system-managed
labels and will be rejected by the server.

```python
from datetime import timedelta

from opensandbox.models.sandboxes import NetworkPolicy, NetworkRule

sandbox = await Sandbox.create(
    "python:3.11",
    connection_config=config,
    timeout=timedelta(minutes=30),
    resource={"cpu": "2", "memory": "4Gi"},
    env={"PYTHONPATH": "/app"},
    metadata={"project": "demo"},
    network_policy=NetworkPolicy(
        defaultAction="deny",
        egress=[NetworkRule(action="allow", target="pypi.org")],
    ),
)
```

### 3. Runtime Egress Policy Updates

Runtime egress policy reads and patches are sent directly to the sandbox egress sidecar.
The SDK first resolves the sandbox endpoint on port `18080`, then calls the sidecar `/policy` API.

Patch uses merge semantics:
- Incoming rules take priority over existing rules with the same `target`.
- Existing rules for other targets remain unchanged.
- Within a single patch payload, the first rule for a `target` wins.
- The current `defaultAction` is preserved.

```python
policy = await sandbox.get_egress_policy()

await sandbox.patch_egress_rules(
    [
        NetworkRule(action="allow", target="www.github.com"),
        NetworkRule(action="deny", target="pypi.org"),
    ]
)
```


================================================
FILE: sdks/sandbox/python/README_zh.md
================================================
# OpenSandbox SDK for Python

中文 | [English](README.md)

用于与 OpenSandbox 进行底层交互的 Python SDK。它提供了创建、管理和与安全沙箱环境交互的能力，包括执行 Shell 命令、管理文件和监控资源。

## 安装指南

### pip

```bash
pip install opensandbox
```

### uv

```bash
uv add opensandbox
```

## 快速开始

以下示例展示了如何创建一个沙箱并执行 Shell 命令。

> **注意**: 在运行此示例之前，请确保 OpenSandbox 服务已启动。服务启动请参考根目录的 [README_zh.md](../../../docs/README_zh.md)。

```python
import asyncio
from opensandbox.sandbox import Sandbox
from opensandbox.config import ConnectionConfig
from opensandbox.exceptions import SandboxException

async def main():
    # 1. 配置连接信息
    config = ConnectionConfig(
        domain="api.opensandbox.io",
        api_key="your-api-key"
    )

    # 2. 创建 Sandbox
    try:
        sandbox = await Sandbox.create(
            "ubuntu",
            connection_config=config
        )
        async with sandbox:

            # 3. 执行 Shell 命令
            execution = await sandbox.commands.run("echo 'Hello Sandbox!'")

            # 4. 打印输出
            print(execution.logs.stdout[0].text)

            # 5. 清理资源 (自动调用 sandbox.close())
            # 注意: 如果希望立即终止远程沙箱实例，仍需显式调用 kill()
            await sandbox.kill()

    except SandboxException as e:
        # 处理 Sandbox 特定异常
        print(f"沙箱错误: [{e.error.code}] {e.error.message}")
    except Exception as e:
        print(f"错误: {e}")

if __name__ == "__main__":
    asyncio.run(main())
```

### 同步版本快速开始

如果你更偏好同步 API，可以使用 `SandboxSync` / `SandboxManagerSync` 与 `ConnectionConfigSync`：

```python
from datetime import timedelta

import httpx
from opensandbox import SandboxSync
from opensandbox.config import ConnectionConfigSync

config = ConnectionConfigSync(
    domain="api.opensandbox.io",
    api_key="your-api-key",
    request_timeout=timedelta(seconds=30),
    transport=httpx.HTTPTransport(limits=httpx.Limits(max_connections=20)),
)

sandbox = SandboxSync.create("ubuntu", connection_config=config)
with sandbox:
    execution = sandbox.commands.run("echo 'Hello Sandbox!'")
    print(execution.logs.stdout[0].text)
    sandbox.kill()
```

## 核心功能示例

### 1. 生命周期管理

管理沙箱的生命周期，包括续期、暂停、恢复和状态查询。

```python
from datetime import timedelta

# 续期沙箱
# 此操作将沙箱的过期时间重置为 (当前时间 + duration)
await sandbox.renew(timedelta(minutes=30))

# 暂停执行 (挂起所有进程)
await sandbox.pause()

# 恢复执行
sandbox = await Sandbox.resume(
    sandbox_id=sandbox.id,
    connection_config=config,
)

# 获取当前状态
info = await sandbox.get_info()
print(f"当前状态: {info.status.state}")
print(f"过期时间: {info.expires_at}")  # 使用手动清理模式时为 None
```

通过传入 `timeout=None` 创建一个不会自动过期的沙箱：

```python
manual = await Sandbox.create(
    "ubuntu",
    connection_config=config,
    timeout=None,
)
```

### 2. 自定义健康检查

定义自定义逻辑来判断沙箱是否健康。这可以覆盖默认的 Ping 检查。

```python
async def custom_health_check(sbx: Sandbox) -> bool:
    try:
        # 1. 获取沙箱 80 端口映射的外部访问地址
        endpoint = await sbx.get_endpoint(80)

        # 2. 执行你的连接检查逻辑 (例如 HTTP 请求、Socket 连接等)
        # return await check_connection(endpoint.endpoint)
        return True
    except Exception:
        return False

sandbox = await Sandbox.create(
    "nginx:latest",
    connection_config=config,
    health_check=custom_health_check  # 自定义检查：等待 80 端口可访问
)
```

### 3. 命令执行与流式响应

执行命令并实时处理输出流。

```python
from opensandbox.models.execd import ExecutionHandlers, RunCommandOpts

# 定义异步处理器用于流式输出
async def handle_stdout(msg):
    print(f"STDOUT: {msg.text}")

async def handle_stderr(msg):
    print(f"STDERR: {msg.text}")

async def handle_complete(complete):
    print(f"命令执行耗时: {complete.execution_time_in_millis}ms")

# 创建流式输出处理器 (所有处理器必须是异步函数)
handlers = ExecutionHandlers(
    on_stdout=handle_stdout,
    on_stderr=handle_stderr,
    on_execution_complete=handle_complete
)

# 带处理器的命令执行
result = await sandbox.commands.run(
    "for i in {1..5}; do echo \"Count $i\"; sleep 0.5; done"
    handlers=handlers,
)
```

### 4. 全面的文件操作

管理文件和目录，包括读写、列表、删除和搜索。

```python
from opensandbox.models.filesystem import WriteEntry, SearchEntry

# 1. 写入文件
await sandbox.files.write_files([
    WriteEntry(
        path="/tmp/hello.txt",
        data="Hello World",
        mode=644
    )
])

# 2. 读取文件
content = await sandbox.files.read_file("/tmp/hello.txt")
print(f"文件内容: {content}")

# 3. 搜索/列表文件
files = await sandbox.files.search(
    SearchEntry(
        path="/tmp",
        pattern="*.txt"
    )
)
for f in files:
    print(f"找到文件: {f.path}")

# 4. 删除文件
await sandbox.files.delete_files(["/tmp/hello.txt"])
```

### 5. 沙箱管理 (Sandbox Manager)

使用 `SandboxManager` 进行管理操作，如查询现有沙箱列表。

```python
from opensandbox.manager import SandboxManager
from opensandbox.models.sandboxes import SandboxFilter

# 使用异步上下文管理器创建管理器
async with await SandboxManager.create(connection_config=config) as manager:

    # 列出运行中的沙箱
    sandboxes = await manager.list_sandbox_infos(
        SandboxFilter(
            states=["RUNNING"],
            page_size=10
        )
    )

    for info in sandboxes.sandbox_infos:
        print(f"找到沙箱: {info.id}")
        # 执行管理操作
        await manager.kill_sandbox(info.id)
```

## 配置说明

### 1. 连接配置 (Connection Configuration)

`ConnectionConfig` 类管理与 API 服务器的连接设置。

| 参数              | 描述                                     | 默认值                   | 环境变量               |
| ----------------- | ---------------------------------------- | ------------------------ | ---------------------- |
| `api_key`         | 用于认证的 API Key                       | 必填                     | `OPEN_SANDBOX_API_KEY` |
| `domain`          | 沙箱服务的端点域名                       | 必填 (或 localhost:8080) | `OPEN_SANDBOX_DOMAIN`  |
| `protocol`        | HTTP 协议 (http/https)                   | `http`                   | -                      |
| `request_timeout` | API 请求超时时间                         | 30 秒                    | -                      |
| `debug`           | 是否开启 HTTP 请求的调试日志             | `False`                  | -                      |
| `headers`         | 自定义 HTTP 请求头                       | 空                       | -                      |
| `transport`       | 共享 httpx transport（连接池/代理/重试） | SDK 每实例创建           | -                      |
| `use_server_proxy` | 是否通过沙箱服务代理访问 execd/endpoint（适用于客户端无法直连沙箱的场景） | `False` | -                      |

```python
from datetime import timedelta

# 1. 基础配置
config = ConnectionConfig(
    api_key="your-key",
    domain="api.opensandbox.io",
    request_timeout=timedelta(seconds=60)
)

# 2. 进阶配置：自定义请求头和 transport
# 如果你需要创建大量 Sandbox 实例，建议配置共享 transport 以优化资源使用。
# SDK 默认连接保活时间为 30 秒。
import httpx

config = ConnectionConfig(
    api_key="your-key",
    domain="api.opensandbox.io",
    headers={"X-Custom-Header": "value"},
    transport=httpx.AsyncHTTPTransport(
        limits=httpx.Limits(
            max_connections=100,
            max_keepalive_connections=50,
        keepalive_expiry=30.0,
        )
    ),
)

# 如果你传入自定义 transport，需要你自己负责关闭：
# await config.transport.aclose()
```

### 2. 沙箱创建配置 (Sandbox Creation Configuration)

`Sandbox.create()` 用于配置沙箱环境。

| 参数            | 描述                 | 默认值                          |
| --------------- | -------------------- | ------------------------------- |
| `image`    | Docker 镜像        | 必填                            |
| `timeout`       | 自动终止的超时时间     | 10 分钟                         |
| `entrypoint`    | 容器启动入口命令       | `["tail", "-f", "/dev/null"]`   |
| `resource`      | CPU 和内存限制        | `{"cpu": "1", "memory": "2Gi"}` |
| `env`           | 环境变量             | 空                              |
| `metadata`      | 自定义元数据标签       | 空                              |
| `network_policy` | 可选的出站网络策略（egress） | -                         |
| `ready_timeout` | 等待沙箱就绪的最大时间 | 30 秒                           |

注意：`opensandbox.io/` 前缀下的 metadata key 属于系统保留标签，服务端会拒绝用户传入。

```python
from datetime import timedelta

from opensandbox.models.sandboxes import NetworkPolicy, NetworkRule

sandbox = await Sandbox.create(
    "python:3.11",
    connection_config=config,
    timeout=timedelta(minutes=30),
    resource={"cpu": "2", "memory": "4Gi"},
    env={"PYTHONPATH": "/app"},
    metadata={"project": "demo"},
    network_policy=NetworkPolicy(
        defaultAction="deny",
        egress=[NetworkRule(action="allow", target="pypi.org")],
    ),
)
```

### 3. 运行时 Egress 策略更新

运行时的 egress 查询和 patch 不再通过 lifecycle API 转发，而是由 SDK 先解析沙箱在 `18080` 端口上的 endpoint，再直接调用 sidecar 的 `/policy` API。

```python
policy = await sandbox.get_egress_policy()

await sandbox.patch_egress_rules(
    [
        NetworkRule(action="allow", target="www.github.com"),
        NetworkRule(action="deny", target="pypi.org"),
    ]
)
```


================================================
FILE: sdks/sandbox/python/pyproject.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[build-system]
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"

[project]
name = "opensandbox"
dynamic = ["version"]
description = "OpenSandbox Python SDK - Secure, isolated execution environments"
authors = [
    { name = "OpenSandbox Team", email = "ninan.nn@alibaba-inc.com" }
]
license = { file = "LICENSE" }
readme = "README.md"
requires-python = ">=3.10"
keywords = ["sandbox", "code-execution", "docker", "security", "sdk", "opensandbox"]
classifiers = [
    "Development Status :: 3 - Alpha",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: Apache Software License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3 :: Only",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Topic :: Software Development :: Libraries",
    "Typing :: Typed",
]
dependencies = [
    "pydantic>=2.4.2,<3.0",
    "python-dateutil>=2.8.2,<3.0",
    "attrs>=21.3.0",
    "httpx>=0.27.0,<1.0",
]

[project.urls]
Homepage = "https://open-sandbox.ai"
Repository = "https://github.com/alibaba/OpenSandbox"
Documentation = "https://open-sandbox.ai"
Issues = "https://github.com/alibaba/OpenSandbox/issues"

[tool.hatch.version]
source = "vcs"

[tool.hatch.version.raw-options]
# This package is in a subdirectory; explicitly point setuptools-scm at the git root.
root = "../../.."
tag_regex = "^python/sandbox/v(?P<version>\\d+\\.\\d+\\.\\d+(?:[\\.\\w\\+\\-]*)?)$"
git_describe_command = 'git describe --dirty --tags --long --match "python/sandbox/v*"'
fallback_version = "0.1.0"

[tool.hatch.build]
include = [
    "LICENSE",
    "src/**/py.typed",
    "src/opensandbox"
]

[tool.hatch.build.targets.wheel]
packages = ["src/opensandbox"]

[tool.ruff]
target-version = "py310"
line-length = 88
exclude = [
    "src/opensandbox/api/**",
]

[tool.ruff.lint]
select = [
    "E",  # pycodestyle errors
    "W",  # pycodestyle warnings
    "F",  # pyflakes
    "I",  # isort
    "B",  # flake8-bugbear
    "C4", # flake8-comprehensions
    "UP", # pyupgrade
]
ignore = [
    "E501", # line too long, handled by formatter
    "B008", # do not perform function calls in argument defaults
    "C901", # too complex
]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]

[tool.pyright]
typeCheckingMode = "standard"
pythonVersion = "3.10"
pythonPlatform = "All"

include = ["src"]

exclude = [
    "**/node_modules",
    "**/__pycache__",
    "src/opensandbox/api/**",
]

venvPath = "."
venv = ".venv"

reportMissingImports = true
reportMissingTypeStubs = false

[tool.pytest.ini_options]
minversion = "6.0"
addopts = "-ra -q --strict-markers --strict-config"
testpaths = [
    "tests",
]
python_files = [
    "test_*.py",
    "*_test.py",
]
asyncio_mode = "auto"

[tool.coverage.run]
source = ["src"]
branch = true

[dependency-groups]
dev = [
    "pytest>=7.0.0",
    "pytest-asyncio>=0.21.0",
    "pytest-cov>=4.0.0",
    "ruff>=0.14.8",
    "pyright>=1.1.0",
    "openapi-python-client>=0.28.0",
]


================================================
FILE: sdks/sandbox/python/scripts/generate_api.py
================================================
#!/usr/bin/env python3

#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
OpenAPI client generation script for OpenSandbox Python SDK.

This script generates Python client code from OpenAPI specifications
using openapi-python-client, which generates httpx-based async clients
that support custom httpx.AsyncClient injection.
"""

import shutil
import subprocess
import sys
from pathlib import Path

APACHE_2_LICENSE_HEADER = """#\n# Copyright 2026 Alibaba Group Holding Ltd.\n#\n# Licensed under the Apache License, Version 2.0 (the "License");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an "AS IS" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n"""


def run_command(cmd: list[str], description: str) -> subprocess.CompletedProcess:
    """Run a command and handle errors."""
    print(f"Running: {description}")
    print(f"Command: {' '.join(cmd)}")

    try:
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        print("✅ Success!")
        if result.stdout:
            print(f"Output: {result.stdout}")
        return result
    except subprocess.CalledProcessError as e:
        print(f"❌ Error: {e}")
        if e.stdout:
            print(f"Stdout: {e.stdout}")
        if e.stderr:
            print(f"Stderr: {e.stderr}")
        raise


def generate_execd_api_client() -> None:
    """Generate the execd API client from OpenAPI spec."""
    print("\n🔧 Generating execd API client...")

    spec_path = Path("../../../specs/execd-api.yaml").resolve()
    output_path = Path("src/opensandbox/api/execd")
    config_path = Path("scripts/openapi_execd_config.yaml")
    temp_output = Path("temp_execd_client")

    if not spec_path.exists():
        print(f"❌ OpenAPI spec not found at {spec_path}")
        print("Please ensure the specs directory is available")
        return

    # Remove existing generated code
    if output_path.exists():
        shutil.rmtree(output_path)

    # Remove temp directory if exists
    if temp_output.exists():
        shutil.rmtree(temp_output)

    # Generate using openapi-python-client
    cmd = [
        "openapi-python-client",
        "generate",
        "--path",
        str(spec_path),
        "--output-path",
        str(temp_output),
        "--config",
        str(config_path),
        "--overwrite",
    ]

    try:
        run_command(cmd, "Generating execd API client")
    except subprocess.CalledProcessError:
        print("❌ Failed to generate execd API client")
        return

    # Move generated files to correct location
    # openapi-python-client generates package inside the output directory
    generated_package = temp_output / "opensandbox_api_execd"
    if generated_package.exists():
        output_path.parent.mkdir(parents=True, exist_ok=True)
        shutil.move(str(generated_package), str(output_path))
        shutil.rmtree(temp_output)
        print(f"✅ Moved generated code to {output_path}")
    else:
        # If package name doesn't match, find the generated package
        for item in temp_output.iterdir():
            if item.is_dir() and not item.name.startswith("."):
                output_path.parent.mkdir(parents=True, exist_ok=True)
                shutil.move(str(item), str(output_path))
                shutil.rmtree(temp_output)
                print(f"✅ Moved generated code from {item} to {output_path}")
                break


def generate_egress_api_client() -> None:
    """Generate the egress API client from OpenAPI spec."""
    print("\n🔧 Generating egress API client...")

    spec_path = Path("../../../specs/egress-api.yaml").resolve()
    output_path = Path("src/opensandbox/api/egress")
    config_path = Path("scripts/openapi_egress_config.yaml")
    temp_output = Path("temp_egress_client")

    if not spec_path.exists():
        print(f"❌ OpenAPI spec not found at {spec_path}")
        print("Please ensure the specs directory is available")
        return

    if output_path.exists():
        shutil.rmtree(output_path)

    if temp_output.exists():
        shutil.rmtree(temp_output)

    cmd = [
        "openapi-python-client",
        "generate",
        "--path",
        str(spec_path),
        "--output-path",
        str(temp_output),
        "--config",
        str(config_path),
        "--overwrite",
    ]

    try:
        run_command(cmd, "Generating egress API client")
    except subprocess.CalledProcessError:
        print("❌ Failed to generate egress API client")
        return

    generated_package = temp_output / "opensandbox_api_egress"
    if generated_package.exists():
        output_path.parent.mkdir(parents=True, exist_ok=True)
        shutil.move(str(generated_package), str(output_path))
        shutil.rmtree(temp_output)
        print(f"✅ Moved generated code to {output_path}")
    else:
        for item in temp_output.iterdir():
            if item.is_dir() and not item.name.startswith("."):
                output_path.parent.mkdir(parents=True, exist_ok=True)
                shutil.move(str(item), str(output_path))
                shutil.rmtree(temp_output)
                print(f"✅ Moved generated code from {item} to {output_path}")
                break


def generate_sandbox_lifecycle_api() -> None:
    """Generate the sandbox lifecycle API client."""
    print("\n🔧 Generating sandbox lifecycle API client...")

    spec_path = Path("../../../specs/sandbox-lifecycle.yml").resolve()
    output_path = Path("src/opensandbox/api/lifecycle")
    config_path = Path("scripts/openapi_lifecycle_config.yaml")
    temp_output = Path("temp_lifecycle_client")

    if not spec_path.exists():
        print(f"❌ OpenAPI spec not found at {spec_path}")
        return

    # Remove existing generated code
    if output_path.exists():
        shutil.rmtree(output_path)

    # Remove temp directory if exists
    if temp_output.exists():
        shutil.rmtree(temp_output)

    # Generate using openapi-python-client
    cmd = [
        "openapi-python-client",
        "generate",
        "--path",
        str(spec_path),
        "--output-path",
        str(temp_output),
        "--config",
        str(config_path),
        "--overwrite",
    ]

    try:
        run_command(cmd, "Generating sandbox lifecycle API client")
    except subprocess.CalledProcessError:
        print("❌ Failed to generate lifecycle API client")
        return

    # Move generated files to correct location
    generated_package = temp_output / "opensandbox_api_lifecycle"
    if generated_package.exists():
        output_path.parent.mkdir(parents=True, exist_ok=True)
        shutil.move(str(generated_package), str(output_path))
        shutil.rmtree(temp_output)
        print(f"✅ Moved generated code to {output_path}")
    else:
        # If package name doesn't match, find the generated package
        for item in temp_output.iterdir():
            if item.is_dir() and not item.name.startswith("."):
                output_path.parent.mkdir(parents=True, exist_ok=True)
                shutil.move(str(item), str(output_path))
                shutil.rmtree(temp_output)
                print(f"✅ Moved generated code from {item} to {output_path}")
                break


def add_license_headers(root: Path) -> None:
    """Add Apache-2.0 license header to generated python files (idempotent)."""
    if not root.exists():
        return

    touched = 0
    skipped = 0

    for file_path in root.rglob("*.py"):
        content = file_path.read_text(encoding="utf-8")

        # Avoid double-inserting if generation already includes headers.
        # Keep the check lightweight and tolerant to minor variations.
        head = "\n".join(content.splitlines()[:50])
        if "Licensed under the Apache License, Version 2.0" in head:
            skipped += 1
            continue

        file_path.write_text(APACHE_2_LICENSE_HEADER + content, encoding="utf-8")
        touched += 1

    print(
        f"✅ Added license headers under {root} (updated={touched}, skipped={skipped})"
    )


def patch_lifecycle_nullable_nested_models(root: Path) -> None:
    """Patch generated lifecycle models that openapi-python-client does not null-handle."""
    replacements = {
        root / "models" / "image_spec.py": [
            (
                "        if isinstance(_auth, Unset):\n            auth = UNSET\n",
                "        if isinstance(_auth, Unset) or _auth is None:\n            auth = UNSET\n",
            )
        ],
        root / "models" / "create_sandbox_response.py": [
            (
                "        if isinstance(_metadata, Unset):\n            metadata = UNSET\n",
                "        if isinstance(_metadata, Unset) or _metadata is None:\n            metadata = UNSET\n",
            )
        ],
        root / "models" / "sandbox.py": [
            (
                "        if isinstance(_metadata, Unset):\n            metadata = UNSET\n",
                "        if isinstance(_metadata, Unset) or _metadata is None:\n            metadata = UNSET\n",
            )
        ],
        root / "models" / "sandbox_status.py": [
            (
                "        if isinstance(_last_transition_at, Unset):\n            last_transition_at = UNSET\n",
                "        if isinstance(_last_transition_at, Unset) or _last_transition_at is None:\n            last_transition_at = UNSET\n",
            )
        ],
    }

    patched_files = 0
    for file_path, file_replacements in replacements.items():
        if not file_path.exists():
            continue

        content = file_path.read_text(encoding="utf-8")
        updated = content
        for old, new in file_replacements:
            if old in updated:
                updated = updated.replace(old, new, 1)

        if updated != content:
            file_path.write_text(updated, encoding="utf-8")
            patched_files += 1

    if patched_files:
        print(f"✅ Patched nullable lifecycle model handling in {patched_files} files")


def post_process_generated_code() -> None:
    """Post-process the generated code to ensure proper package structure."""
    print("\n🔧 Post-processing generated code...")

    # Ensure API directory has __init__.py
    api_dir = Path("src/opensandbox/api")
    if api_dir.exists():
        init_file = api_dir / "__init__.py"
        if not init_file.exists():
            init_file.write_text(
                '"""OpenSandbox API clients generated from OpenAPI specs."""\n'
            )
            print(f"✅ Created {init_file}")

    # Ensure all generated python files have a license header.
    add_license_headers(Path("src/opensandbox/api/execd"))
    add_license_headers(Path("src/opensandbox/api/egress"))
    add_license_headers(Path("src/opensandbox/api/lifecycle"))
    add_license_headers(Path("src/opensandbox/api"))
    patch_lifecycle_nullable_nested_models(Path("src/opensandbox/api/lifecycle"))


def main() -> None:
    """Main function to generate all API clients."""
    print("🚀 OpenSandbox Python SDK API Generator")
    print("=" * 50)
    print("Using openapi-python-client for httpx-based async clients")
    print("=" * 50)

    # Check if openapi-python-client is available
    try:
        result = subprocess.run(
            ["openapi-python-client", "--version"], check=True, capture_output=True
        )
        version = result.stdout.decode().strip() or result.stderr.decode().strip()
        print(f"openapi-python-client version: {version}")
    except (subprocess.CalledProcessError, FileNotFoundError):
        print("❌ openapi-python-client not found!")
        print("Please install it with: pip install openapi-python-client")
        print("Or: uv add --dev openapi-python-client")
        sys.exit(1)

    # Create API directories
    Path("src/opensandbox/api").mkdir(parents=True, exist_ok=True)

    # Generate API clients
    generate_execd_api_client()
    generate_egress_api_client()
    generate_sandbox_lifecycle_api()

    # Post-process
    post_process_generated_code()

    print("\n✅ API client generation completed!")
    print("Generated clients:")
    print("  - src/opensandbox/api/execd/")
    print("  - src/opensandbox/api/egress/")
    print("  - src/opensandbox/api/lifecycle/")
    print("\nThe generated clients support custom httpx.AsyncClient injection:")
    print("  from opensandbox.api.execd import Client, AuthenticatedClient")
    print(
        '  client = AuthenticatedClient(base_url="...", token="...", httpx_client=custom_client)'
    )


if __name__ == "__main__":
    main()


================================================
FILE: sdks/sandbox/python/scripts/openapi_egress_config.yaml
================================================
# openapi-python-client configuration for egress API
# This generates a httpx-based async client that supports custom httpx.AsyncClient injection

project_name_override: opensandbox_api_egress
package_name_override: opensandbox_api_egress

use_path_prefix_for_title_model_names: true


================================================
FILE: sdks/sandbox/python/scripts/openapi_execd_config.yaml
================================================
# openapi-python-client configuration for execd API
# This generates a httpx-based async client that supports custom httpx.AsyncClient injection

# Package name without hyphens (will be the module directory name)
project_name_override: opensandbox_api_execd
package_name_override: opensandbox_api_execd

# Use modern Python features
use_path_prefix_for_title_model_names: true

# Generate both sync and async clients (async is default, sync is opt-in)
# The generated client will support passing a custom httpx.AsyncClient


================================================
FILE: sdks/sandbox/python/scripts/openapi_lifecycle_config.yaml
================================================
# openapi-python-client configuration for lifecycle API
# This generates a httpx-based async client that supports custom httpx.AsyncClient injection

# Package name without hyphens (will be the module directory name)
project_name_override: opensandbox_api_lifecycle
package_name_override: opensandbox_api_lifecycle

# Use modern Python features
use_path_prefix_for_title_model_names: true

# Generate both sync and async clients (async is default, sync is opt-in)
# The generated client will support passing a custom httpx.AsyncClient


================================================
FILE: sdks/sandbox/python/src/opensandbox/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
OpenSandbox Python SDK

Secure, isolated execution environments for code and applications.

## Basic Usage

```python
import asyncio
from opensandbox import Sandbox
from opensandbox.models.execd import RunCommandOpts
from opensandbox.models.sandboxes import SandboxImageSpec

async def main():
    # Create a sandbox instance.
    #
    # Note on lifecycle:
    # - Exiting the context manager will call `sandbox.close()` (local HTTP resources only).
    # - You must still call `sandbox.kill()` to terminate the remote sandbox instance.
    async with await Sandbox.create("python:3.11") as sandbox:
        # Write a file
        await sandbox.files.write_file("hello.py", "print('Hello World')")

        # Execute a command
        result = await sandbox.commands.run("python hello.py")
        print(result.logs.stdout[0].text)  # Hello World

if __name__ == "__main__":
    asyncio.run(main())
```

## Advanced Usage

```python
from datetime import timedelta
from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig
from opensandbox.models.execd import RunCommandOpts
from opensandbox.models.sandboxes import SandboxImageSpec, SandboxImageAuth

async def main():
    config = ConnectionConfig(
        api_key="your-api-key",
        domain="api.opensandbox.io"
    )

    # With private registry auth
    image_spec = SandboxImageSpec(
        "my-registry.com/python:3.11",
        auth=SandboxImageAuth(username="user", password="secret")
    )

    sandbox = await Sandbox.create(
        image_spec,
        timeout=timedelta(minutes=30),
        env={"PYTHONPATH": "/workspace"},
        connection_config=config,
    )

    try:
        # File operations
        await sandbox.files.write_file("script.py", "print('Hello OpenSandbox!')")

        # Command execution
        result = await sandbox.commands.run("python script.py")
        print(result.logs.stdout[0].text)

        # Get metrics
        metrics = await sandbox.get_metrics()
        print(f"Memory usage: {metrics.memory_used_in_mib}MB")

    finally:
        await sandbox.kill()
        await sandbox.close()

if __name__ == "__main__":
    asyncio.run(main())
```

For advanced code execution with persistent contexts, see the separate
`opensandbox-code-interpreter` package.
"""

from importlib.metadata import PackageNotFoundError
from importlib.metadata import version as _pkg_version

from opensandbox.manager import SandboxManager
from opensandbox.sandbox import Sandbox
from opensandbox.sync import SandboxManagerSync, SandboxSync

try:
    __version__ = _pkg_version("opensandbox")
except PackageNotFoundError:  # pragma: no cover
    # Fallback for editable/uninstalled source checkouts.
    __version__ = "0.0.0"

__all__ = [
    "Sandbox",
    "SandboxManager",
    "SandboxSync",
    "SandboxManagerSync",
]


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Adapter layer for OpenSandbox SDK.

Implements the service protocols using HTTP API calls.
"""

from opensandbox.adapters.command_adapter import CommandsAdapter
from opensandbox.adapters.egress_adapter import EgressAdapter
from opensandbox.adapters.factory import AdapterFactory
from opensandbox.adapters.filesystem_adapter import FilesystemAdapter
from opensandbox.adapters.health_adapter import HealthAdapter
from opensandbox.adapters.metrics_adapter import MetricsAdapter
from opensandbox.adapters.sandboxes_adapter import SandboxesAdapter

__all__ = [
    "AdapterFactory",
    "SandboxesAdapter",
    "FilesystemAdapter",
    "CommandsAdapter",
    "EgressAdapter",
    "HealthAdapter",
    "MetricsAdapter",
]


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/command_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Command service adapter implementation.

Implementation of Commands that adapts openapi-python-client generated CommandApi.
This adapter handles command execution within sandboxes, providing both
synchronous and streaming execution modes with proper session management.
"""

import json
import logging

import httpx

from opensandbox.adapters.converter.command_model_converter import (
    to_command_status,
)
from opensandbox.adapters.converter.event_node import EventNode
from opensandbox.adapters.converter.exception_converter import (
    ExceptionConverter,
)
from opensandbox.adapters.converter.execution_converter import (
    ExecutionConverter,
)
from opensandbox.adapters.converter.execution_event_dispatcher import (
    ExecutionEventDispatcher,
)
from opensandbox.adapters.converter.response_handler import (
    extract_request_id,
    handle_api_error,
)
from opensandbox.config import ConnectionConfig
from opensandbox.exceptions import InvalidArgumentException, SandboxApiException
from opensandbox.models.execd import (
    CommandLogs,
    CommandStatus,
    Execution,
    ExecutionHandlers,
    RunCommandOpts,
)
from opensandbox.models.sandboxes import SandboxEndpoint
from opensandbox.services.command import Commands

logger = logging.getLogger(__name__)


class CommandsAdapter(Commands):
    """
    Implementation of Commands that adapts openapi-python-client generated CommandApi.

    This adapter handles command execution within sandboxes, providing both
    synchronous and streaming execution modes with proper session management.

    The adapter uses direct httpx streaming for command execution to handle
    Server-Sent Events (SSE) properly, while using the generated API client
    for simpler operations like interrupt.
    """

    RUN_COMMAND_PATH = "/command"
    INTERRUPT_COMMAND_PATH = "/command/{execution_id}/interrupt"

    def __init__(
        self,
        connection_config: ConnectionConfig,
        execd_endpoint: SandboxEndpoint,
    ) -> None:
        """
        Initialize the command service adapter.

        Args:
            connection_config: Connection configuration (shared transport, headers, timeouts)
            execd_endpoint: Endpoint for execd service
        """
        self.connection_config = connection_config
        self.execd_endpoint = execd_endpoint
        from opensandbox.api.execd import Client

        protocol = self.connection_config.protocol
        base_url = f"{protocol}://{self.execd_endpoint.endpoint}"
        timeout_seconds = self.connection_config.request_timeout.total_seconds()
        timeout = httpx.Timeout(timeout_seconds)

        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
            **self.execd_endpoint.headers,
        }

        # Execd API does not require authentication
        self._client = Client(
            base_url=base_url,
            timeout=timeout,
        )

        # Inject httpx client (adapter-owned)
        self._httpx_client = httpx.AsyncClient(
            base_url=base_url,
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client.set_async_httpx_client(self._httpx_client)

        # SSE client (read timeout disabled); endpoint headers already in headers
        sse_headers = {
            **headers,
            "Accept": "text/event-stream",
            "Cache-Control": "no-cache",
        }
        self._sse_client = httpx.AsyncClient(
            headers=sse_headers,
            timeout=httpx.Timeout(
                connect=timeout_seconds,
                read=None,
                write=timeout_seconds,
                pool=None,
            ),
            transport=self.connection_config.transport,
        )

    async def _get_client(self):
        """Return the client for execd API (no auth required)."""
        return self._client

    def _get_execd_url(self, path: str) -> str:
        """Build URL for execd endpoint."""
        protocol = self.connection_config.protocol
        return f"{protocol}://{self.execd_endpoint.endpoint}{path}"

    async def _get_sse_client(self) -> httpx.AsyncClient:
        """Return SSE client (read timeout disabled) for execd streaming."""
        return self._sse_client

    async def run(
        self,
        command: str,
        *,
        opts: RunCommandOpts | None = None,
        handlers: ExecutionHandlers | None = None,
    ) -> Execution:
        """Execute a shell command within the sandbox.

        This method uses direct httpx streaming to handle SSE responses
        from the execd service.
        """
        if not command.strip():
            raise InvalidArgumentException("Command cannot be empty")

        try:
            # Convert domain model to API model
            opts = opts or RunCommandOpts()
            json_body = ExecutionConverter.to_api_run_command_json(command, opts)

            # Prepare URL
            url = self._get_execd_url(self.RUN_COMMAND_PATH)

            execution = Execution(
                id=None,
                execution_count=None,
                result=[],
                error=None,
            )

            # Use SSE client for streaming responses (read timeout disabled)
            client = await self._get_sse_client()

            # Use streaming request for SSE
            async with client.stream("POST", url, json=json_body) as response:
                if response.status_code != 200:
                    await response.aread()
                    error_body = response.text
                    logger.error(
                        f"Failed to run command. Status: {response.status_code}, Body: {error_body}"
                    )
                    raise SandboxApiException(
                        message=f"Failed to run command. Status code: {response.status_code}",
                        status_code=response.status_code,
                        request_id=extract_request_id(response.headers),
                    )

                dispatcher = ExecutionEventDispatcher(execution, handlers)

                async for line in response.aiter_lines():
                    if not line.strip():
                        continue

                    # Handle potential SSE format "data: ..."
                    data = line
                    if data.startswith("data:"):
                        data = data[5:].strip()

                    try:
                        event_dict = json.loads(data)
                        event_node = EventNode(**event_dict)
                        await dispatcher.dispatch(event_node)
                    except Exception as e:
                        logger.error(f"Failed to parse SSE line: {line}", exc_info=e)

            return execution

        except Exception as e:
            logger.error(
                "Failed to run command (length: %s)",
                len(command),
                exc_info=e,
            )
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def interrupt(self, execution_id: str) -> None:
        """Interrupt a running command execution."""
        try:
            from opensandbox.api.execd.api.command import interrupt_command

            client = await self._get_client()
            response_obj = await interrupt_command.asyncio_detailed(
                client=client,
                id=execution_id,
            )

            handle_api_error(response_obj, "Interrupt command")

        except Exception as e:
            logger.error("Failed to interrupt command", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def get_command_status(self, execution_id: str) -> CommandStatus:
        """Get the current running status for a command."""
        try:
            from opensandbox.adapters.converter.response_handler import require_parsed
            from opensandbox.api.execd.api.command import get_command_status
            from opensandbox.api.execd.models import CommandStatusResponse

            client = await self._get_client()
            response_obj = await get_command_status.asyncio_detailed(
                client=client,
                id=execution_id,
            )

            handle_api_error(response_obj, "Get command status")
            parsed = require_parsed(response_obj, CommandStatusResponse, "Get command status")
            return to_command_status(parsed)
        except Exception as e:
            logger.error("Failed to get command status", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def get_background_command_logs(
        self, execution_id: str, cursor: int | None = None
    ) -> CommandLogs:
        """Get background command logs (non-streamed)."""
        try:
            from opensandbox.adapters.converter.response_handler import require_parsed
            from opensandbox.api.execd.api.command import get_background_command_logs

            client = await self._get_client()
            from opensandbox.api.execd.types import UNSET

            response_obj = await get_background_command_logs.asyncio_detailed(
                client=client,
                id=execution_id,
                cursor=cursor if cursor is not None else UNSET,
            )

            handle_api_error(response_obj, "Get command logs")
            content = require_parsed(response_obj, str, "Get command logs")
            cursor_header = response_obj.headers.get("EXECD-COMMANDS-TAIL-CURSOR")
            next_cursor = None
            if cursor_header:
                try:
                    next_cursor = int(cursor_header)
                except ValueError:
                    next_cursor = None
            return CommandLogs(content=content, cursor=next_cursor)
        except Exception as e:
            logger.error("Failed to get command logs", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/converter/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Model converter utilities for API/domain model mapping.

This package provides:
- ExceptionConverter: Convert various exceptions to SandboxException
- ResponseHandler: Unified API response handling
- SandboxModelConverter: Convert between API and domain models
- FilesystemModelConverter: Convert filesystem-related models
- MetricsModelConverter: Convert metrics-related models
- ExecutionConverter: Convert execution-related models
"""

from opensandbox.adapters.converter.command_model_converter import (
    to_command_status,
)
from opensandbox.adapters.converter.exception_converter import (
    ExceptionConverter,
    parse_sandbox_error,
)
from opensandbox.adapters.converter.filesystem_model_converter import (
    FilesystemModelConverter,
)
from opensandbox.adapters.converter.metrics_model_converter import (
    MetricsModelConverter,
)
from opensandbox.adapters.converter.response_handler import (
    handle_api_error,
)
from opensandbox.adapters.converter.sandbox_model_converter import (
    SandboxModelConverter,
)

__all__ = [
    "ExceptionConverter",
    "parse_sandbox_error",
    "FilesystemModelConverter",
    "MetricsModelConverter",
    "to_command_status",
    "SandboxModelConverter",
    "handle_api_error",
]


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/converter/command_model_converter.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""
Converters for execd command-related models.
"""

from typing import TypeVar, cast

from opensandbox.api.execd.models import CommandStatusResponse
from opensandbox.api.execd.types import Unset
from opensandbox.models.execd import CommandStatus

T = TypeVar("T")


def _unwrap_optional(value: Unset | T) -> T | None:
    if isinstance(value, Unset):
        return None
    return cast(T, value)


def to_command_status(raw: CommandStatusResponse) -> CommandStatus:
    """
    Convert OpenAPI CommandStatusResponse to SDK CommandStatus.
    """

    return CommandStatus(
        id=_unwrap_optional(raw.id),
        content=_unwrap_optional(raw.content),
        running=_unwrap_optional(raw.running),
        exit_code=_unwrap_optional(raw.exit_code),
        error=_unwrap_optional(raw.error),
        started_at=_unwrap_optional(raw.started_at),
        finished_at=_unwrap_optional(raw.finished_at),
    )


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/converter/event_node.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
EventNode model for parsing Server-Sent Events from execd.
"""

from pydantic import BaseModel, ConfigDict, Field


class EventNodeError(BaseModel):
    """Error details in an event node."""

    name: str | None = Field(default=None, alias="ename")
    value: str | None = Field(default=None, alias="evalue")
    traceback: list[str] = Field(default_factory=list)


class EventNodeResults(BaseModel):
    """Results container in an event node."""

    text: str | None = Field(default=None, alias="text")

    def get_text(self) -> str:
        """Get the text representation of the result."""
        return self.text or ""

    model_config = ConfigDict(extra="allow")  # Allow other mime types


class EventNode(BaseModel):
    """
    Represents a single event from the server stream.
    Corresponds to ServerStreamEvent in OpenAPI spec.
    """

    type: str
    text: str | None = None
    execution_count: int | None = Field(default=None, alias="execution_count")
    execution_time_in_millis: int | None = Field(default=None, alias="execution_time")
    timestamp: int
    results: EventNodeResults | None = None
    error: EventNodeError | None = None


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/converter/exception_converter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Exception converter utilities.

Provides conversion functions from API exceptions to domain exceptions,
similar to the Kotlin SDK ExceptionConverter pattern.

This module handles:
1. Converting openapi-python-client generated exceptions
2. Converting httpx HTTP errors
3. Converting network/IO errors
4. Parsing error response bodies to extract SandboxError information
"""

import json
import logging
from typing import Any

from httpx import (
    ConnectError,
    HTTPStatusError,
    NetworkError,
    ReadTimeout,
    TimeoutException,
    WriteTimeout,
)

from opensandbox.api.execd.errors import UnexpectedStatus as ExecdUnexpectedStatus
from opensandbox.api.lifecycle.errors import (
    UnexpectedStatus as LifecycleUnexpectedStatus,
)
from opensandbox.exceptions import (
    InvalidArgumentException,
    SandboxApiException,
    SandboxError,
    SandboxException,
    SandboxInternalException,
)

logger = logging.getLogger(__name__)

UNEXPECTED_STATUS_TYPES = (LifecycleUnexpectedStatus, ExecdUnexpectedStatus)
HTTPX_NETWORK_ERROR_TYPES = (
    ConnectError,
    TimeoutException,
    NetworkError,
    ReadTimeout,
    WriteTimeout,
)


class ExceptionConverter:
    """
    Exception converter utilities following Kotlin SDK patterns.

    Provides static methods to convert various exceptions to sandbox exceptions,
    including proper parsing of error response bodies.
    """

    @staticmethod
    def to_sandbox_exception(e: Exception) -> SandboxException:
        """
        Convert any exception to a SandboxException.

        Following Kotlin SDK pattern:
        - SandboxException -> return as-is
        - API client exceptions -> convert to SandboxApiException
        - IOError/network errors -> convert to SandboxInternalException with network message
        - IllegalArgumentError/ValueError -> convert to SandboxInternalException with usage message
        - Other exceptions -> convert to SandboxInternalException with unexpected error message

        Args:
            e: The original exception

        Returns:
            A SandboxException subclass
        """
        # If already a SandboxException, return as-is
        if isinstance(e, SandboxException):
            return e

        # Handle openapi-python-client UnexpectedStatus error
        if _is_unexpected_status_error(e):
            return _convert_unexpected_status_to_api_exception(e)

        # Handle httpx HTTPStatusError
        if _is_httpx_status_error(e):
            return _convert_httpx_error_to_api_exception(e)

        # Handle network/IO errors
        if isinstance(e, (IOError, OSError, ConnectionError)):
            return SandboxInternalException(
                message=f"Network connectivity error: {e}",
                cause=e,
            )

        # Handle httpx network errors
        if _is_httpx_network_error(e):
            return SandboxInternalException(
                message=f"Network connectivity error: {e}",
                cause=e,
            )

        # Handle validation and argument errors (SDK usage errors)
        # - ValueError/TypeError are typically raised for invalid user inputs or model validation
        # - Pydantic ValidationError represents invalid input data for SDK models
        try:
            from pydantic import ValidationError  # type: ignore

            if isinstance(e, ValidationError):
                return InvalidArgumentException(message=str(e), cause=e)
        except Exception:
            # If pydantic isn't available for some reason, just ignore and continue
            pass

        if isinstance(e, (ValueError, TypeError)):
            return InvalidArgumentException(message=str(e), cause=e)

        # Handle unsupported operations
        if isinstance(e, NotImplementedError):
            return SandboxInternalException(
                message=f"Operation not supported: {e}",
                cause=e,
            )

        # Default to unexpected error
        return SandboxInternalException(
            message=f"Unexpected SDK error occurred: {e}",
            cause=e,
        )


def _is_unexpected_status_error(e: Exception) -> bool:
    """Check if exception is an openapi-python-client UnexpectedStatus error."""
    return isinstance(e, UNEXPECTED_STATUS_TYPES)


def _is_httpx_status_error(e: Exception) -> bool:
    """Check if exception is an httpx HTTPStatusError."""
    return isinstance(e, HTTPStatusError)


def _is_httpx_network_error(e: Exception) -> bool:
    """Check if exception is an httpx network-related error."""
    return isinstance(e, HTTPX_NETWORK_ERROR_TYPES)


def _convert_unexpected_status_to_api_exception(e: Exception) -> SandboxApiException:
    """Convert openapi-python-client UnexpectedStatus to SandboxApiException."""
    status_code = getattr(e, "status_code", 0)
    content = getattr(e, "content", b"")

    # Try to parse error body
    sandbox_error = _parse_error_body(content)

    return SandboxApiException(
        message=f"API error: HTTP {status_code}",
        status_code=status_code,
        cause=e,
        error=sandbox_error,
    )


def _convert_httpx_error_to_api_exception(e: Exception) -> SandboxApiException:
    """Convert httpx HTTPStatusError to SandboxApiException."""
    response = getattr(e, "response", None)
    status_code = response.status_code if response else 0
    content = response.content if response else b""
    request_id = None
    if response is not None:
        from opensandbox.adapters.converter.response_handler import extract_request_id

        request_id = extract_request_id(response.headers)

    # Try to parse error body
    sandbox_error = _parse_error_body(content)

    return SandboxApiException(
        message=f"API error: HTTP {status_code}",
        status_code=status_code,
        cause=e,
        error=sandbox_error,
        request_id=request_id,
    )


def _parse_error_body(body: Any) -> SandboxError | None:
    """
    Parse error body to extract SandboxError information.

    Similar to Kotlin SDK's parseSandboxError function.

    Args:
        body: The error response body (bytes, str, or dict)

    Returns:
        SandboxError if parsing succeeds, None otherwise
    """
    if body is None:
        return None

    try:
        # Convert bytes to string
        if isinstance(body, bytes):
            if not body:
                return None
            body = body.decode("utf-8", errors="replace")

        if isinstance(body, str) and not body:
            return None

        # Parse JSON string
        if isinstance(body, str):
            try:
                body = json.loads(body)
            except json.JSONDecodeError:
                # If not JSON, return error with the raw string as message
                return SandboxError(
                    code=SandboxError.UNEXPECTED_RESPONSE,
                    message=body,
                )

        # Extract code and message from dict
        if isinstance(body, dict):
            code: str | None = body.get("code")
            message: str | None = body.get("message")

            if code:
                return SandboxError(code=code, message=message or "")

        return None

    except Exception as ex:
        logger.debug("Failed to parse error body: %s", ex)
        return None


def parse_sandbox_error(body: Any) -> SandboxError | None:
    """
    Public function to parse error body to SandboxError.

    Exposed for use by other modules that need to parse error bodies.
    """
    return _parse_error_body(body)


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/converter/execution_converter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Execution model converter utilities.

Provides conversion functions between API models and domain models for execution-related operations,
similar to the Kotlin SDK ExecutionConverter.

This converter is designed to work with openapi-python-client generated models.
"""

from typing import Any

from opensandbox.api.execd.models.run_command_request import (
    RunCommandRequest as ApiRunCommandRequest,
)
from opensandbox.models.execd import RunCommandOpts


class ExecutionConverter:
    """
    Execution model converter utilities.

    Provides static methods to convert between API models and domain models
    for execution-related operations.

    The API models are generated by openapi-python-client and use attrs.
    """

    @staticmethod
    def to_api_run_command_request(command: str, opts: RunCommandOpts) -> ApiRunCommandRequest:
        """Convert domain command + options to API RunCommandRequest."""
        from opensandbox.api.execd.models.run_command_request_envs import (
            RunCommandRequestEnvs,
        )
        from opensandbox.api.execd.types import UNSET

        # Convert working_directory to cwd, handling None
        cwd = UNSET
        if opts.working_directory:
            cwd = opts.working_directory

        background = UNSET
        if opts.background:
            background = opts.background

        timeout_milliseconds = UNSET
        if opts.timeout is not None:
            timeout_milliseconds = int(opts.timeout.total_seconds() * 1000)

        uid = UNSET
        if opts.uid is not None:
            uid = opts.uid

        gid = UNSET
        if opts.gid is not None:
            gid = opts.gid

        envs = UNSET
        if opts.envs is not None:
            envs_payload = RunCommandRequestEnvs()
            for key, value in opts.envs.items():
                envs_payload[key] = value
            envs = envs_payload

        return ApiRunCommandRequest(
            command=command,
            background=background,
            cwd=cwd,  # Domain uses 'working_directory', API uses 'cwd'
            timeout=timeout_milliseconds,
            uid=uid,
            gid=gid,
            envs=envs,
            # Note: handlers are not included in API request as they are for local processing
        )

    @staticmethod
    def to_api_run_command_json(command: str, opts: RunCommandOpts) -> dict[str, Any]:
        """
        Convert command + options to a plain JSON-serializable dict for httpx requests.
        Centralizes the attrs/pydantic differences behind one callsite.
        """
        api_request = ExecutionConverter.to_api_run_command_request(command, opts)
        if hasattr(api_request, "to_dict"):
            return api_request.to_dict()
        # Fallback (shouldn't normally happen for openapi-python-client models).
        return dict(getattr(api_request, "__dict__", {}))


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/converter/execution_event_dispatcher.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Dispatcher for processing execution events.
"""

from opensandbox.adapters.converter.event_node import EventNode
from opensandbox.models.execd import (
    Execution,
    ExecutionComplete,
    ExecutionError,
    ExecutionHandlers,
    ExecutionInit,
    ExecutionResult,
    OutputMessage,
)


class ExecutionEventDispatcher:
    """
    Dispatches events from the server stream to the Execution object and handlers.
    """

    def __init__(
        self,
        execution: Execution,
        handlers: ExecutionHandlers | None = None,
    ) -> None:
        self.execution = execution
        self.handlers = handlers

    async def dispatch(self, event_node: EventNode) -> None:
        """Dispatch a single event node asynchronously."""
        event_type = event_node.type
        timestamp = event_node.timestamp

        if event_type == "stdout":
            await self._handle_stdout(event_node, timestamp)
        elif event_type == "stderr":
            await self._handle_stderr(event_node, timestamp)
        elif event_type == "result":
            await self._handle_result(event_node, timestamp)
        elif event_type == "error":
            await self._handle_error(event_node, timestamp)
        elif event_type == "execution_complete":
            await self._handle_execution_complete(event_node, timestamp)
        elif event_type == "init":
            await self._handle_init(event_node, timestamp)
        elif event_type == "execution_count":
            if event_node.execution_count is not None:
                self.execution.execution_count = event_node.execution_count

    async def _handle_init(self, event_node: EventNode, timestamp: int) -> None:
        execution_id = event_node.text or ""
        init_event = ExecutionInit(
            id=execution_id,
            timestamp=timestamp,
        )
        self.execution.id = init_event.id
        if self.handlers and self.handlers.on_init:
            await self.handlers.on_init(init_event)

    async def _handle_stdout(self, event_node: EventNode, timestamp: int) -> None:
        text = event_node.text or ""
        message = OutputMessage(
            text=text,
            timestamp=timestamp,
            is_error=False,
        )
        self.execution.logs.add_stdout(message)
        if self.handlers and self.handlers.on_stdout:
            await self.handlers.on_stdout(message)

    async def _handle_stderr(self, event_node: EventNode, timestamp: int) -> None:
        text = event_node.text or ""
        message = OutputMessage(
            text=text,
            timestamp=timestamp,
            is_error=True,
        )
        self.execution.logs.add_stderr(message)
        if self.handlers and self.handlers.on_stderr:
            await self.handlers.on_stderr(message)

    async def _handle_result(self, event_node: EventNode, timestamp: int) -> None:
        result_text = event_node.results.get_text() if event_node.results else ""
        result = ExecutionResult(
            text=result_text,
            timestamp=timestamp,
        )
        self.execution.add_result(result)
        if self.handlers and self.handlers.on_result:
            await self.handlers.on_result(result)

    async def _handle_error(self, event_node: EventNode, timestamp: int) -> None:
        if not event_node.error:
            return

        error_data = event_node.error
        error = ExecutionError(
            name=error_data.name or "",
            value=error_data.value or "",
            timestamp=timestamp,
            traceback=error_data.traceback,
        )
        self.execution.error = error
        if self.handlers and self.handlers.on_error:
            await self.handlers.on_error(error)

    async def _handle_execution_complete(self, event_node: EventNode, timestamp: int) -> None:
        complete = ExecutionComplete(
            timestamp=timestamp,
            execution_time_in_millis=event_node.execution_time_in_millis or 0,
        )
        if self.handlers and self.handlers.on_execution_complete:
            await self.handlers.on_execution_complete(complete)


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/converter/filesystem_model_converter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Filesystem model converter utilities.

Provides conversion functions between API models and domain models for filesystem operations,
similar to SandboxModelConverter.

This converter is designed to work with openapi-python-client generated models.
"""

from typing import Any

from opensandbox.api.execd.models import FileInfo
from opensandbox.models.filesystem import (
    ContentReplaceEntry,
    EntryInfo,
    MoveEntry,
    SetPermissionEntry,
    WriteEntry,
)


class FilesystemModelConverter:
    """
    Filesystem model converter utilities.

    Provides static methods to convert between API models and domain models
    for filesystem operations, following the pattern from SandboxModelConverter.
    """

    @staticmethod
    def to_entry_info(api_file_info: FileInfo) -> EntryInfo:
        """Convert API FileInfo to domain EntryInfo."""
        return EntryInfo(
            path=api_file_info.path,
            mode=api_file_info.mode,
            owner=api_file_info.owner,
            group=api_file_info.group,
            size=api_file_info.size,
            modified_at=api_file_info.modified_at,
            created_at=api_file_info.created_at,
        )

    @staticmethod
    def to_entry_info_list(api_file_infos: list[FileInfo]) -> list[EntryInfo]:
        """Convert list of API FileInfo to list of domain EntryInfo."""
        if not api_file_infos:
            return []

        return [FilesystemModelConverter.to_entry_info(item) for item in api_file_infos]

    @staticmethod
    def to_entry_info_map(api_response: Any) -> dict[str, EntryInfo]:
        """Convert API response to a map of path to EntryInfo."""
        if not api_response:
            return {}

        result: dict[str, EntryInfo] = {}

        if hasattr(api_response, "additional_properties"):
            for path, info_data in api_response.additional_properties.items():
                if isinstance(info_data, FileInfo):
                    result[path] = FilesystemModelConverter.to_entry_info(info_data)
        elif isinstance(api_response, dict):
            for path, info_data in api_response.items():
                if isinstance(info_data, FileInfo):
                    result[path] = FilesystemModelConverter.to_entry_info(info_data)

        return result

    @staticmethod
    def to_api_make_dirs_body(entries: list[WriteEntry]):
        """Convert directory entries to MakeDirsBody."""
        from opensandbox.api.execd.models.make_dirs_body import MakeDirsBody

        dirs_data = {
            entry.path: {
                "mode": entry.mode,
                "owner": entry.owner,
                "group": entry.group,
            }
            for entry in entries
        }
        return MakeDirsBody.from_dict(dirs_data)

    @staticmethod
    def to_api_chmod_files_body(entries: list[SetPermissionEntry]):
        """Convert permission entries to ChmodFilesBody."""
        from opensandbox.api.execd.models.chmod_files_body import ChmodFilesBody

        permission_data = {
            entry.path: {
                "mode": entry.mode,
                "owner": entry.owner,
                "group": entry.group,
            }
            for entry in entries
        }
        return ChmodFilesBody.from_dict(permission_data)

    @staticmethod
    def to_api_replace_content_body(entries: list[ContentReplaceEntry]):
        """Convert content replacement entries to ReplaceContentBody."""
        from opensandbox.api.execd.models.replace_content_body import ReplaceContentBody

        replace_data = {
            entry.path: {
                # Execd API expects keys "old" and "new" (see execd-api.yaml ReplaceFileContentItem).
                "old": entry.old_content,
                "new": entry.new_content,
            }
            for entry in entries
        }
        return ReplaceContentBody.from_dict(replace_data)

    @staticmethod
    def to_api_rename_file_items(entries: list[MoveEntry]):
        """Convert move entries to list of RenameFileItem."""
        from opensandbox.api.execd.models.rename_file_item import RenameFileItem

        return [RenameFileItem(src=e.src, dest=e.dest) for e in entries]


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/converter/metrics_model_converter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Metrics model converter utilities.

Provides conversion functions between API models and domain models for metrics operations.
"""

from opensandbox.api.execd.models import Metrics
from opensandbox.models.sandboxes import SandboxMetrics


class MetricsModelConverter:
    """
    Metrics model converter utilities.

    Provides static methods to convert between API models and domain models
    for metrics operations.
    """

    @staticmethod
    def to_sandbox_metrics(api_metrics: Metrics) -> SandboxMetrics:
        """Convert API Metrics to domain SandboxMetrics."""
        return SandboxMetrics(
            cpu_count=api_metrics.cpu_count,
            cpu_used_percentage=api_metrics.cpu_used_pct,
            memory_total_in_mib=api_metrics.mem_total_mib,
            memory_used_in_mib=api_metrics.mem_used_mib,
            timestamp=api_metrics.timestamp,
        )


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/converter/response_handler.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Unified response handler for API calls.

Provides a centralized way to handle API responses, including:
1. Status code validation
2. Error response handling
3. Unified exception conversion

This eliminates the need to repeat response handling logic in each adapter method.
"""

import logging
from http import HTTPStatus
from typing import Any, TypeVar

from opensandbox.exceptions import SandboxApiException

logger = logging.getLogger(__name__)


T = TypeVar("T")


def extract_request_id(headers: Any) -> str | None:
    """
    Extract X-Request-ID from response headers in a case-insensitive way.
    """
    if not headers:
        return None
    try:
        # httpx.Headers supports case-insensitive lookup.
        value = headers.get("X-Request-ID") or headers.get("x-request-id")
        if isinstance(value, str):
            value = value.strip()
        return value or None
    except Exception:
        return None


def _status_code_to_int(status_code: Any) -> int:
    """
    Normalize status_code from openapi-python-client responses to a plain int.

    openapi-python-client may use http.HTTPStatus; some callers may already provide an int.
    """
    if isinstance(status_code, HTTPStatus):
        return int(status_code)
    if isinstance(status_code, int):
        return status_code
    value = getattr(status_code, "value", None)
    if isinstance(value, int):
        return value
    try:
        return int(status_code)
    except Exception:
        return 0


def require_parsed(response_obj: Any, expected_type: type[T], operation_name: str) -> T:
    """
    Validate and return the parsed payload from an openapi-python-client response.

    Use this after `handle_api_error()` to enforce:
    - parsed payload must exist
    - parsed payload must match the expected type
    """
    status_code = _status_code_to_int(getattr(response_obj, "status_code", 0))
    request_id = extract_request_id(getattr(response_obj, "headers", None))

    parsed = getattr(response_obj, "parsed", None)
    if parsed is None:
        raise SandboxApiException(
            message=f"{operation_name} failed: empty response",
            status_code=status_code,
            request_id=request_id,
        )
    if not isinstance(parsed, expected_type):
        raise SandboxApiException(
            message=f"{operation_name} failed: unexpected response type",
            status_code=status_code,
            request_id=request_id,
        )
    return parsed


def handle_api_error(response_obj: Any, operation_name: str = "API call") -> None:
    """
    Check API response for errors and raise exception if needed.

    Call this before accessing response_obj.parsed to validate the response.

    Args:
        response_obj: The Response object from asyncio_detailed or sync_detailed
        operation_name: Name of the operation for error messages

    Raises:
        SandboxApiException: If the response indicates an error
    """
    status_code = _status_code_to_int(getattr(response_obj, "status_code", 0))
    request_id = extract_request_id(getattr(response_obj, "headers", None))

    logger.debug(f"{operation_name} response: status={status_code}")

    if status_code >= 300:
        error_message = f"{operation_name} failed: HTTP {status_code}"

        if hasattr(response_obj, "parsed") and response_obj.parsed is not None:
            if hasattr(response_obj.parsed, "message"):
                error_message = (
                    f"{operation_name} failed: {response_obj.parsed.message}"
                )
            elif hasattr(response_obj.parsed, "code"):
                error_message = f"{operation_name} failed: {response_obj.parsed.code}"

        raise SandboxApiException(
            message=error_message,
            status_code=status_code,
            request_id=request_id,
        )


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/converter/sandbox_model_converter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Sandbox model converter utilities.

Provides conversion functions between API models and domain models,
similar to the Kotlin SDK SandboxModelConverter.

This converter is designed to work with openapi-python-client generated models,
which use attrs for model definitions.
"""
from datetime import datetime, timedelta, timezone
from typing import Literal, cast

from opensandbox.api.lifecycle.models import (
    CreateSandboxResponse,
    Endpoint,
    ListSandboxesResponse,
    RenewSandboxExpirationRequest,
    RenewSandboxExpirationResponse,
    Sandbox,
)
from opensandbox.api.lifecycle.models import (
    PaginationInfo as ApiPaginationInfo,
)
from opensandbox.api.lifecycle.models import (
    SandboxStatus as ApiSandboxStatus,
)
from opensandbox.api.lifecycle.models.create_sandbox_request import CreateSandboxRequest
from opensandbox.api.lifecycle.models.image_spec import ImageSpec
from opensandbox.models.sandboxes import (
    NetworkPolicy,
    NetworkRule,
    PagedSandboxInfos,
    PaginationInfo,
    SandboxCreateResponse,
    SandboxEndpoint,
    SandboxImageSpec,
    SandboxInfo,
    SandboxRenewResponse,
    SandboxStatus,
    Volume,
)


class SandboxModelConverter:
    """
    Sandbox model converter utilities.

    Provides static methods to convert between API models and domain models,
    following the pattern from the Kotlin SDK.

    The API models are generated by openapi-python-client and use attrs,
    while domain models are standard Python dataclasses/classes.
    """

    @staticmethod
    def to_api_image_spec(spec: SandboxImageSpec) -> ImageSpec:
        """Convert domain SandboxImageSpec to API ImageSpec."""
        from opensandbox.api.lifecycle.models.image_spec import ImageSpec
        from opensandbox.api.lifecycle.models.image_spec_auth import ImageSpecAuth
        from opensandbox.api.lifecycle.types import UNSET

        auth = UNSET
        if spec.auth:
            auth = ImageSpecAuth(
                username=spec.auth.username,
                password=spec.auth.password,
            )

        return ImageSpec(
            uri=spec.image,  # API uses 'uri', domain uses 'image'
            auth=auth,
        )

    @staticmethod
    def to_api_volume(volume: Volume):
        """Convert domain Volume to API Volume."""
        from opensandbox.api.lifecycle.models.host import (
            Host as ApiHost,
        )
        from opensandbox.api.lifecycle.models.ossfs import (
            OSSFS as ApiOSSFS,
        )
        from opensandbox.api.lifecycle.models.ossfs_version import OSSFSVersion
        from opensandbox.api.lifecycle.models.pvc import (
            PVC as ApiPVC,
        )
        from opensandbox.api.lifecycle.models.volume import Volume as ApiVolume
        from opensandbox.api.lifecycle.types import UNSET

        api_host = UNSET
        if volume.host is not None:
            api_host = ApiHost(path=volume.host.path)

        api_pvc = UNSET
        if volume.pvc is not None:
            api_pvc = ApiPVC(claim_name=volume.pvc.claim_name)

        api_ossfs = UNSET
        if volume.ossfs is not None and volume.ossfs.access_key_id is not None and volume.ossfs.access_key_secret is not None:
            api_ossfs = ApiOSSFS(
                bucket=volume.ossfs.bucket,
                endpoint=volume.ossfs.endpoint,
                access_key_id=volume.ossfs.access_key_id,
                access_key_secret=volume.ossfs.access_key_secret,
                version=OSSFSVersion(volume.ossfs.version),
                options=volume.ossfs.options if volume.ossfs.options is not None else UNSET,
            )

        api_sub_path = UNSET
        if volume.sub_path is not None:
            api_sub_path = volume.sub_path

        return ApiVolume(
            name=volume.name,
            mount_path=volume.mount_path,
            read_only=volume.read_only,
            host=api_host,
            pvc=api_pvc,
            ossfs=api_ossfs,
            sub_path=api_sub_path,
        )

    @staticmethod
    def to_api_create_sandbox_request(
        spec: SandboxImageSpec,
        entrypoint: list[str],
        env: dict[str, str],
        metadata: dict[str, str],
        timeout: timedelta | None,
        resource: dict[str, str],
        network_policy: NetworkPolicy | None,
        extensions: dict[str, str],
        volumes: list[Volume] | None,
    ) -> CreateSandboxRequest:
        """Convert domain parameters to API CreateSandboxRequest."""
        from opensandbox.api.lifecycle.models.create_sandbox_request import (
            CreateSandboxRequest,
        )
        from opensandbox.api.lifecycle.models.create_sandbox_request_env import (
            CreateSandboxRequestEnv,
        )
        from opensandbox.api.lifecycle.models.create_sandbox_request_extensions import (
            CreateSandboxRequestExtensions,
        )
        from opensandbox.api.lifecycle.models.create_sandbox_request_metadata import (
            CreateSandboxRequestMetadata,
        )
        from opensandbox.api.lifecycle.models.network_policy import (
            NetworkPolicy as ApiNetworkPolicy,
        )
        from opensandbox.api.lifecycle.models.network_policy_default_action import (
            NetworkPolicyDefaultAction,
        )
        from opensandbox.api.lifecycle.models.network_rule import (
            NetworkRule as ApiNetworkRule,
        )
        from opensandbox.api.lifecycle.models.network_rule_action import (
            NetworkRuleAction,
        )
        from opensandbox.api.lifecycle.models.resource_limits import ResourceLimits
        from opensandbox.api.lifecycle.types import UNSET

        # Convert env dict to API model
        api_env = UNSET
        if env:
            api_env = CreateSandboxRequestEnv.from_dict(env)

        # Convert metadata dict to API model
        api_metadata = UNSET
        if metadata:
            api_metadata = CreateSandboxRequestMetadata.from_dict(metadata)

        # Convert resource limits dict to API model
        api_resource_limits = ResourceLimits.from_dict(resource)

        api_network_policy = UNSET
        if network_policy is not None:
            if not isinstance(network_policy, NetworkPolicy):
                raise TypeError(
                    "network_policy must be a NetworkPolicy or None, "
                    f"got {type(network_policy).__name__}"
                )
            api_default_action = UNSET
            if network_policy.default_action:
                api_default_action = NetworkPolicyDefaultAction(
                    network_policy.default_action
                )

            api_egress = UNSET
            if network_policy.egress is not None:
                api_egress = [
                    ApiNetworkRule(
                        action=NetworkRuleAction(rule.action),
                        target=rule.target,
                    )
                    for rule in network_policy.egress
                ]

            api_network_policy = ApiNetworkPolicy(
                default_action=api_default_action,
                egress=api_egress,
            )

        api_extensions = (
            CreateSandboxRequestExtensions.from_dict(extensions) if extensions else UNSET
        )

        # Convert volumes to API model
        api_volumes = UNSET
        if volumes is not None and len(volumes) > 0:
            api_volumes = [
                SandboxModelConverter.to_api_volume(v) for v in volumes
            ]

        request = CreateSandboxRequest(
            image=SandboxModelConverter.to_api_image_spec(spec),
            entrypoint=entrypoint,
            env=api_env,
            metadata=api_metadata,
            resource_limits=api_resource_limits,
            network_policy=api_network_policy,
            extensions=api_extensions,
            volumes=api_volumes,
        )
        if timeout is not None:
            request.timeout = int(timeout.total_seconds())
        return request

    @staticmethod
    def to_api_renew_request(
        new_expiration_time: datetime,
    ) -> RenewSandboxExpirationRequest:
        """Convert datetime to API renew request."""
        from opensandbox.api.lifecycle.models.renew_sandbox_expiration_request import (
            RenewSandboxExpirationRequest,
        )

        # Ensure timezone-aware datetime for unambiguous serialization.
        # If a naive datetime is provided, treat it as UTC.
        if new_expiration_time.tzinfo is None:
            new_expiration_time = new_expiration_time.replace(tzinfo=timezone.utc)

        return RenewSandboxExpirationRequest(
            expires_at=new_expiration_time,
        )

    @staticmethod
    def to_api_network_rules(rules: list[NetworkRule]):
        """Convert domain NetworkRule list to API NetworkRule list."""
        from opensandbox.api.lifecycle.models.network_rule import (
            NetworkRule as ApiNetworkRule,
        )
        from opensandbox.api.lifecycle.models.network_rule_action import (
            NetworkRuleAction,
        )

        return [
            ApiNetworkRule(
                action=NetworkRuleAction(rule.action),
                target=rule.target,
            )
            for rule in rules
        ]

    @staticmethod
    def to_sandbox_network_policy(api_policy):
        """Convert API NetworkPolicy to domain NetworkPolicy."""
        from opensandbox.api.lifecycle.models.network_policy import (
            NetworkPolicy as ApiNetworkPolicy,
        )
        from opensandbox.api.lifecycle.types import Unset

        if not isinstance(api_policy, ApiNetworkPolicy):
            raise TypeError(f"Expected NetworkPolicy, got {type(api_policy).__name__}")

        default_action: str | None = "deny"
        if not isinstance(api_policy.default_action, Unset):
            default_action = str(api_policy.default_action.value)

        egress: list[NetworkRule] | None = None
        if not isinstance(api_policy.egress, Unset):
            egress = [
                NetworkRule(
                    action=cast(Literal["allow", "deny"], rule.action.value),
                    target=rule.target,
                )
                for rule in api_policy.egress
            ]

        return NetworkPolicy.model_validate(
            {
                "defaultAction": default_action,
                "egress": egress,
            }
        )

    @staticmethod
    def to_sandbox_renew_response(
        api_response: RenewSandboxExpirationResponse,
    ) -> SandboxRenewResponse:
        """
        Convert API RenewSandboxExpirationResponse to domain SandboxRenewResponse.

        Note: We intentionally keep the public SDK surface using domain models instead of the
        generated OpenAPI client models.
        """

        if not isinstance(api_response, RenewSandboxExpirationResponse):
            raise TypeError(
                f"Expected RenewSandboxExpirationResponse, got {type(api_response).__name__}"
            )

        return SandboxRenewResponse(expires_at=api_response.expires_at)

    @staticmethod
    def to_sandbox_create_response(
        api_response: CreateSandboxResponse,
    ) -> SandboxCreateResponse:
        """Convert API CreateSandboxResponse to domain SandboxCreateResponse."""
        from opensandbox.models.sandboxes import SandboxCreateResponse

        return SandboxCreateResponse(
            id=str(api_response.id)
        )

    @staticmethod
    def to_sandbox_info(api_sandbox: Sandbox) -> SandboxInfo:
        """Convert API Sandbox to domain SandboxInfo."""
        from opensandbox.api.lifecycle.types import Unset
        from opensandbox.models.sandboxes import (
            SandboxImageAuth,
            SandboxImageSpec,
            SandboxInfo,
        )

        domain_image_spec = None
        if hasattr(api_sandbox, "image") and not isinstance(api_sandbox.image, Unset):
            auth = None
            if hasattr(api_sandbox.image, "auth") and not isinstance(
                api_sandbox.image.auth, Unset
            ):
                auth_obj = api_sandbox.image.auth
                username_val = getattr(auth_obj, "username", None)
                password_val = getattr(auth_obj, "password", None)
                if isinstance(username_val, str) and isinstance(password_val, str):
                    auth = SandboxImageAuth(username=username_val, password=password_val)
            domain_image_spec = SandboxImageSpec(
                image=api_sandbox.image.uri,
                auth=auth,
            )

        metadata: dict[str, str] = {}
        if hasattr(api_sandbox, "metadata") and not isinstance(api_sandbox.metadata, Unset):
            metadata_obj = api_sandbox.metadata
            if hasattr(metadata_obj, "additional_properties") and not isinstance(
                getattr(metadata_obj, "additional_properties", None), Unset
            ):
                props = metadata_obj.additional_properties
                if isinstance(props, dict):
                    metadata = dict(props)
            elif isinstance(metadata_obj, dict):
                metadata = metadata_obj

        expires_at = api_sandbox.expires_at
        if isinstance(expires_at, Unset):
            expires_at = None

        return SandboxInfo(
            id=api_sandbox.id,
            status=SandboxModelConverter._convert_sandbox_status(api_sandbox.status),
            image=domain_image_spec,
            created_at=api_sandbox.created_at,
            expires_at=expires_at,
            entrypoint=api_sandbox.entrypoint,
            metadata=metadata,
        )

    @staticmethod
    def to_paged_sandbox_infos(
        api_response: ListSandboxesResponse,
    ) -> PagedSandboxInfos:
        """Convert API ListSandboxesResponse to domain PagedSandboxInfos."""
        from opensandbox.models.sandboxes import PagedSandboxInfos

        items = api_response.items if hasattr(api_response, "items") else []

        return PagedSandboxInfos(
            sandbox_infos=[SandboxModelConverter.to_sandbox_info(s) for s in items],
            pagination=SandboxModelConverter._convert_pagination_info(
                api_response.pagination
            ),
        )

    @staticmethod
    def to_sandbox_endpoint(api_endpoint: Endpoint) -> SandboxEndpoint:
        """Convert API Endpoint to domain SandboxEndpoint."""
        from opensandbox.api.lifecycle.types import Unset
        from opensandbox.models.sandboxes import SandboxEndpoint

        headers: dict[str, str] = {}
        if not isinstance(api_endpoint.headers, Unset):
            headers = dict(api_endpoint.headers.additional_properties)
        return SandboxEndpoint(
            endpoint=api_endpoint.endpoint,
            headers=headers,
        )

    @staticmethod
    def _convert_sandbox_status(
        api_status: ApiSandboxStatus | None,
    ) -> SandboxStatus:
        """Convert API SandboxStatus to domain SandboxStatus."""
        from datetime import datetime

        from opensandbox.api.lifecycle.types import Unset
        from opensandbox.models.sandboxes import SandboxStatus

        if api_status is None:
            return SandboxStatus(
                state="Unknown",
                reason=None,
                message=None,
                last_transition_at=None,
            )

        reason: str | None = None
        if hasattr(api_status, "reason"):
            reason_val = api_status.reason
            if isinstance(reason_val, str):
                reason = reason_val

        message: str | None = None
        if hasattr(api_status, "message"):
            message_val = api_status.message
            if isinstance(message_val, str):
                message = message_val

        last_transition_at: datetime | None = None
        if hasattr(api_status, "last_transition_at"):
            lta_val = api_status.last_transition_at
            if isinstance(lta_val, datetime):
                last_transition_at = lta_val
            elif isinstance(lta_val, Unset) or lta_val is None:
                last_transition_at = None

        return SandboxStatus(
            state=api_status.state,
            reason=reason,
            message=message,
            last_transition_at=last_transition_at,
        )

    @staticmethod
    def _convert_pagination_info(
        api_pagination: ApiPaginationInfo | None,
    ) -> PaginationInfo:
        """Convert API PaginationInfo to domain PaginationInfo."""
        from opensandbox.models.sandboxes import PaginationInfo

        if api_pagination is None:
            return PaginationInfo(
                page=1,
                page_size=10,
                total_pages=0,
                total_items=0,
                has_next_page=False,
            )

        return PaginationInfo(
            page=api_pagination.page or 1,
            page_size=api_pagination.page_size or 10,
            total_pages=api_pagination.total_pages or 0,
            total_items=api_pagination.total_items or 0,
            has_next_page=api_pagination.has_next_page or False,
        )


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/egress_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Direct egress sidecar adapter implementation.
"""

import logging

import httpx

from opensandbox.adapters.converter.exception_converter import ExceptionConverter
from opensandbox.adapters.converter.response_handler import (
    handle_api_error,
    require_parsed,
)
from opensandbox.config import ConnectionConfig
from opensandbox.models.sandboxes import NetworkPolicy, NetworkRule, SandboxEndpoint
from opensandbox.services.egress import Egress

logger = logging.getLogger(__name__)


class EgressAdapter(Egress):
    """Direct egress sidecar adapter using the generated egress client."""

    def __init__(self, connection_config: ConnectionConfig, endpoint: SandboxEndpoint) -> None:
        self.connection_config = connection_config
        self.endpoint = endpoint
        from opensandbox.api.egress import Client

        base_url = f"{self.connection_config.protocol}://{self.endpoint.endpoint}"
        timeout_seconds = self.connection_config.request_timeout.total_seconds()
        timeout = httpx.Timeout(timeout_seconds)
        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
            **self.endpoint.headers,
        }

        self._client = Client(
            base_url=base_url,
            timeout=timeout,
        )
        self._httpx_client = httpx.AsyncClient(
            base_url=base_url,
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client.set_async_httpx_client(self._httpx_client)

    async def get_policy(self) -> NetworkPolicy:
        try:
            from opensandbox.api.egress.api.policy import get_policy
            from opensandbox.api.egress.models.network_policy import (
                NetworkPolicy as ApiNetworkPolicy,
            )
            from opensandbox.api.egress.models.policy_status_response import (
                PolicyStatusResponse,
            )
            from opensandbox.api.egress.types import Unset

            response_obj = await get_policy.asyncio_detailed(client=self._client)
            handle_api_error(response_obj, "Get egress policy")
            parsed = require_parsed(response_obj, PolicyStatusResponse, "Get egress policy")
            policy = parsed.policy
            if isinstance(policy, Unset):
                raise ValueError("Egress policy response missing policy payload")
            if not isinstance(policy, ApiNetworkPolicy):
                raise TypeError(f"Expected NetworkPolicy, got {type(policy).__name__}")
            return NetworkPolicy.model_validate(policy.to_dict())
        except Exception as e:
            logger.error("Failed to get egress policy from endpoint %s", self.endpoint.endpoint, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def patch_rules(self, rules: list[NetworkRule]) -> None:
        try:
            from opensandbox.api.egress.api.policy import patch_policy
            from opensandbox.api.egress.models.network_rule import (
                NetworkRule as ApiNetworkRule,
            )
            from opensandbox.api.egress.models.network_rule_action import (
                NetworkRuleAction,
            )

            response_obj = await patch_policy.asyncio_detailed(
                client=self._client,
                body=[
                    ApiNetworkRule(
                        action=NetworkRuleAction(rule.action),
                        target=rule.target,
                    )
                    for rule in rules
                ],
            )
            handle_api_error(response_obj, "Patch egress rules")
        except Exception as e:
            logger.error("Failed to patch egress policy via endpoint %s", self.endpoint.endpoint, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/factory.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Service factory for creating adapter instances.

Factory for creating service adapter instances that provide access to
sandbox operations including command execution, file system management,
health monitoring, and metrics collection.

All HTTP clients created by adapters share the same `ConnectionConfig.transport`
to ensure consistent pooling/proxy/retry behavior across services.
"""

from opensandbox.adapters.command_adapter import CommandsAdapter
from opensandbox.adapters.egress_adapter import EgressAdapter
from opensandbox.adapters.filesystem_adapter import FilesystemAdapter
from opensandbox.adapters.health_adapter import HealthAdapter
from opensandbox.adapters.metrics_adapter import MetricsAdapter
from opensandbox.adapters.sandboxes_adapter import SandboxesAdapter
from opensandbox.config import ConnectionConfig
from opensandbox.models.sandboxes import SandboxEndpoint
from opensandbox.services.command import Commands
from opensandbox.services.egress import Egress
from opensandbox.services.filesystem import Filesystem
from opensandbox.services.health import Health
from opensandbox.services.metrics import Metrics
from opensandbox.services.sandbox import Sandboxes


class AdapterFactory:
    """
    Factory responsible for creating service instances.

    This factory encapsulates the instantiation logic of specific service adapters.
    Each adapter creates its own httpx clients, but they all share the same transport
    instance coming from the provided ConnectionConfig.

    Usage:
        config = ConnectionConfig(...)
        factory = AdapterFactory(config)
    """

    def __init__(self, connection_config: ConnectionConfig) -> None:
        """
        Initialize the service factory.

        Args:
            connection_config: Shared connection configuration, including transport.
        """
        self.connection_config = connection_config

    def create_sandbox_service(self) -> Sandboxes:
        """Create a sandbox management service for lifecycle operations.

        Returns:
            Service for creating, managing, and monitoring sandbox instances
        """
        return SandboxesAdapter(self.connection_config)

    def create_filesystem_service(self, endpoint: SandboxEndpoint) -> Filesystem:
        """Create a filesystem service for file and directory operations.

        Args:
            endpoint: Sandbox endpoint information for file operations

        Returns:
            Service for file system management within the sandbox
        """
        return FilesystemAdapter(self.connection_config, endpoint)

    def create_command_service(self, endpoint: SandboxEndpoint) -> Commands:
        """Create a command execution service for running shell commands.

        Args:
            endpoint: Sandbox endpoint information for command execution

        Returns:
            Service for executing commands within the sandbox
        """
        return CommandsAdapter(self.connection_config, endpoint)

    def create_egress_service(self, endpoint: SandboxEndpoint) -> Egress:
        """Create a direct egress service for runtime egress policy operations."""
        return EgressAdapter(self.connection_config, endpoint)

    def create_health_service(self, endpoint: SandboxEndpoint) -> Health:
        """Create a health monitoring service for sandbox status checks.

        Args:
            endpoint: Sandbox endpoint information for health checks

        Returns:
            Service for monitoring sandbox health and availability
        """
        return HealthAdapter(self.connection_config, endpoint)

    def create_metrics_service(self, endpoint: SandboxEndpoint) -> Metrics:
        """Create a metrics collection service for resource monitoring.

        Args:
            endpoint: Sandbox endpoint information for metrics collection

        Returns:
            Service for collecting sandbox resource usage metrics
        """
        return MetricsAdapter(self.connection_config, endpoint)


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/filesystem_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Filesystem service adapter implementation.

Implementation of FilesystemService that adapts openapi-python-client generated FilesystemApi.
This adapter handles file operations within sandboxes using the auto-generated API client.
"""

import json
import logging
from collections.abc import AsyncIterator
from io import IOBase, TextIOBase
from typing import TypedDict
from urllib.parse import quote

import httpx

from opensandbox.adapters.converter.exception_converter import (
    ExceptionConverter,
)
from opensandbox.adapters.converter.filesystem_model_converter import (
    FilesystemModelConverter,
)
from opensandbox.adapters.converter.response_handler import (
    extract_request_id,
    handle_api_error,
)
from opensandbox.config import ConnectionConfig
from opensandbox.exceptions import InvalidArgumentException, SandboxApiException
from opensandbox.models.filesystem import (
    ContentReplaceEntry,
    EntryInfo,
    MoveEntry,
    SearchEntry,
    SetPermissionEntry,
    WriteEntry,
)
from opensandbox.models.sandboxes import SandboxEndpoint
from opensandbox.services.filesystem import Filesystem

logger = logging.getLogger(__name__)

class _DownloadRequest(TypedDict):
    url: str
    params: dict[str, str] | None
    headers: dict[str, str]


class FilesystemAdapter(Filesystem):
    """
    Implementation of FilesystemService that provides comprehensive file system operations.

    This adapter handles file operations within sandboxes using optimized approaches
    for different operation types - API calls for standard operations and direct HTTP
    for file upload/download operations requiring special handling.

    All HTTP clients created by this adapter share `ConnectionConfig.transport`.
    """

    FILESYSTEM_UPLOAD_PATH = "/files/upload"
    FILESYSTEM_DOWNLOAD_PATH = "/files/download"

    def __init__(
        self, connection_config: ConnectionConfig, execd_endpoint: SandboxEndpoint
    ) -> None:
        """
        Initialize the filesystem service adapter.

        Args:
            connection_config: Connection configuration (shared transport, headers, timeouts)
            execd_endpoint: Execd endpoint information for direct HTTP calls
        """
        self.connection_config = connection_config
        self.execd_endpoint = execd_endpoint
        from opensandbox.api.execd import Client

        base_url = self._get_execd_base_url()
        timeout_seconds = self.connection_config.request_timeout.total_seconds()
        timeout = httpx.Timeout(timeout_seconds)
        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
            **self.execd_endpoint.headers,
        }

        self._httpx_client = httpx.AsyncClient(
            base_url=base_url,
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )

        # Execd API does not require authentication
        self._client = Client(
            base_url=base_url,
            timeout=timeout,
        )
        self._client.set_async_httpx_client(self._httpx_client)

    def _get_execd_base_url(self) -> str:
        protocol = self.connection_config.protocol
        return f"{protocol}://{self.execd_endpoint.endpoint}"

    async def _get_httpx_client(self) -> httpx.AsyncClient:
        """Return adapter-owned httpx client for execd (no auth required)."""
        return self._httpx_client

    async def _get_client(self):
        """Return the client for execd API (no auth required)."""
        return self._client

    def _get_execd_url(self, path: str) -> str:
        """Build URL for execd endpoint."""
        protocol = self.connection_config.protocol
        return f"{protocol}://{self.execd_endpoint.endpoint}{path}"

    async def read_file(
        self,
        path: str,
        *,
        encoding: str = "utf-8",
        range_header: str | None = None,
    ) -> str:
        """Read file content as string via HTTP API."""
        content = await self.read_bytes(path, range_header=range_header)
        return content.decode(encoding)

    async def read_bytes(
        self,
        path: str,
        *,
        range_header: str | None = None,
    ) -> bytes:
        """Read file content as bytes with support for range requests.

        Args:
            path: Path to the file to read
            range_header: Optional range header for partial content requests

        Returns:
            File content as bytes

        Raises:
            SandboxApiException: If the read operation fails
        """
        logger.debug(f"Reading file as bytes: {path}")
        try:
            request_data = self._build_download_request(path, range_header)
            client = await self._get_httpx_client()

            if request_data["params"] is None:
                response = await client.get(
                    request_data["url"],
                    headers=request_data["headers"],
                )
            else:
                response = await client.get(
                    request_data["url"],
                    headers=request_data["headers"],
                    params=request_data["params"],
                )
            response.raise_for_status()
            return response.content
        except Exception as e:
            logger.error(f"Failed to read file {path}", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def read_bytes_stream(
            self,
            path: str,
            *,
            chunk_size: int = 64 * 1024,
            range_header: str | None = None,
    ) -> AsyncIterator[bytes]:
        """Stream file content as bytes chunks via HTTP (true streaming)."""
        logger.debug(f"Streaming file as bytes: {path} (chunk_size={chunk_size})")
        try:
            request_data = self._build_download_request(path, range_header)
            client = await self._get_httpx_client()

            url = request_data["url"]
            params = request_data["params"]
            headers = request_data["headers"]

            if params is None:
                request = client.build_request("GET", url, headers=headers)
            else:
                request = client.build_request(
                    "GET",
                    url,
                    headers=headers,
                    params=params,
                )

            response = await client.send(request, stream=True)

            if response.status_code >= 300:
                try:
                    await response.aread()
                finally:
                    await response.aclose()

                raise SandboxApiException(
                    f"Failed to stream file {path}: {response.status_code}",
                    status_code=response.status_code,
                    request_id=extract_request_id(response.headers),
                )
            return response.aiter_bytes(chunk_size=chunk_size)
        except Exception as e:
            logger.error(f"Failed to stream file {path}", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def write_files(self, entries: list[WriteEntry]) -> None:
        """Write multiple files in a single operation using multipart upload.

        Aligned with Kotlin SDK implementation.
        """
        if not entries:
            return

        logger.debug(f"Writing {len(entries)} files")

        try:
            client = await self._get_httpx_client()
            multipart_parts = []

            for entry in entries:
                if not entry.path:
                    raise InvalidArgumentException("File path cannot be null")
                if entry.data is None:
                    raise InvalidArgumentException("File data cannot be null")

                metadata = {
                    "path": entry.path,
                    "owner": entry.owner,
                    "group": entry.group,
                    "mode": entry.mode,
                }
                metadata_json = json.dumps(metadata)

                multipart_parts.append(
                    ("metadata", ("metadata", metadata_json, "application/json"))
                )

                content: bytes | str | IOBase
                content_type: str

                if isinstance(entry.data, bytes):
                    content = entry.data
                    content_type = "application/octet-stream"

                elif isinstance(entry.data, str):
                    encoding = entry.encoding or "utf-8"
                    content = entry.data
                    content_type = f"text/plain; charset={encoding}"

                elif isinstance(entry.data, IOBase):
                    if isinstance(entry.data, TextIOBase):
                        raise InvalidArgumentException(
                            "File stream must be binary (opened with 'rb'). Text streams are not supported."
                        )
                    else:
                        content = entry.data
                        content_type = "application/octet-stream"
                else:
                    raise InvalidArgumentException(
                        f"Unsupported file data type: {type(entry.data)}"
                    )
                multipart_parts.append(("file", (entry.path, content, content_type)))

            url = self._get_execd_url(self.FILESYSTEM_UPLOAD_PATH)
            response = await client.post(url, files=multipart_parts)
            response.raise_for_status()
        except Exception as e:
            logger.error(f"Failed to write {len(entries)} files", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def write_file(
        self,
        path: str,
        data: str | bytes | IOBase,
        *,
        encoding: str = "utf-8",
        mode: int = 755,
        owner: str | None = None,
        group: str | None = None,
    ) -> None:
        """Write single file (convenience method)."""
        entry = WriteEntry(
            path=path,
            data=data,
            mode=mode,
            owner=owner,
            group=group,
            encoding=encoding,
        )
        await self.write_files([entry])

    async def create_directories(self, entries: list[WriteEntry]) -> None:
        """Create multiple directories with specified permissions.

        Args:
            entries: List of directory entries with paths and permissions

        Raises:
            SandboxException: If directory creation fails
        """
        try:
            from opensandbox.api.execd.api.filesystem import make_dirs

            client = await self._get_client()
            response_obj = await make_dirs.asyncio_detailed(
                client=client,
                body=FilesystemModelConverter.to_api_make_dirs_body(entries),
            )

            handle_api_error(response_obj, "Create directories")

        except Exception as e:
            logger.error("Failed to create directories", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def delete_files(self, paths: list[str]) -> None:
        """Delete files using auto-generated API."""
        try:
            from opensandbox.api.execd.api.filesystem import remove_files

            client = await self._get_client()
            response_obj = await remove_files.asyncio_detailed(
                client=client,
                path=paths,
            )

            handle_api_error(response_obj, "Delete files")

        except Exception as e:
            logger.error(f"Failed to delete {len(paths)} files", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def delete_directories(self, paths: list[str]) -> None:
        """Delete directories using auto-generated API."""
        try:
            from opensandbox.api.execd.api.filesystem import remove_dirs

            client = await self._get_client()
            response_obj = await remove_dirs.asyncio_detailed(
                client=client,
                path=paths,
            )

            handle_api_error(response_obj, "Delete directories")

        except Exception as e:
            logger.error(f"Failed to delete {len(paths)} directories", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def move_files(self, entries: list[MoveEntry]) -> None:
        """Move or rename multiple files and directories.

        Args:
            entries: List of move operations with source and destination paths

        Raises:
            SandboxException: If move operations fail
        """
        try:
            from opensandbox.api.execd.api.filesystem import rename_files
            rename_items = FilesystemModelConverter.to_api_rename_file_items(entries)

            client = await self._get_client()
            response_obj = await rename_files.asyncio_detailed(
                client=client,
                body=rename_items,
            )

            handle_api_error(response_obj, "Move files")

        except Exception as e:
            logger.error("Failed to move files", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def set_permissions(self, entries: list[SetPermissionEntry]) -> None:
        """Set file permissions using auto-generated API."""
        try:
            from opensandbox.api.execd.api.filesystem import chmod_files

            client = await self._get_client()
            response_obj = await chmod_files.asyncio_detailed(
                client=client,
                body=FilesystemModelConverter.to_api_chmod_files_body(entries),
            )

            handle_api_error(response_obj, "Set permissions")

        except Exception as e:
            logger.error("Failed to set permissions", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def replace_contents(self, entries: list[ContentReplaceEntry]) -> None:
        """Replace file contents using auto-generated API."""
        try:
            from opensandbox.api.execd.api.filesystem import replace_content

            client = await self._get_client()
            response_obj = await replace_content.asyncio_detailed(
                client=client,
                body=FilesystemModelConverter.to_api_replace_content_body(entries),
            )

            handle_api_error(response_obj, "Replace contents")

        except Exception as e:
            logger.error("Failed to replace contents", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def search(self, entry: SearchEntry) -> list[EntryInfo]:
        """Search files using auto-generated API."""
        try:
            from opensandbox.api.execd.api.filesystem import search_files
            from opensandbox.api.execd.models import FileInfo

            client = await self._get_client()
            response_obj = await search_files.asyncio_detailed(
                client=client,
                path=entry.path,
                pattern=entry.pattern,
            )

            handle_api_error(response_obj, "Search files")

            parsed = response_obj.parsed
            if not parsed:
                return []

            if isinstance(parsed, list) and all(isinstance(x, FileInfo) for x in parsed):
                return FilesystemModelConverter.to_entry_info_list(parsed)
            raise SandboxApiException(
                message="Search files failed: unexpected response type",
                request_id=extract_request_id(getattr(response_obj, "headers", None)),
            )

        except Exception as e:
            logger.error("Failed to search files", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def get_file_info(self, paths: list[str]) -> dict[str, EntryInfo]:
        """Get file information using auto-generated API."""
        try:
            from opensandbox.api.execd.api.filesystem import get_files_info

            client = await self._get_client()
            response_obj = await get_files_info.asyncio_detailed(
                client=client,
                path=paths,
            )

            handle_api_error(response_obj, "Get file info")

            if not response_obj.parsed:
                return {}

            return FilesystemModelConverter.to_entry_info_map(response_obj.parsed)

        except Exception as e:
            logger.error(f"Failed to get file info for {len(paths)} paths", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def _build_download_request(
            self, path: str, range_header: str | None = None
    ) -> _DownloadRequest:
        """Build HTTP request for file download operations.

        Args:
            path: File path to download
            range_header: Optional range header for partial downloads

        Returns:
            Dictionary containing URL, parameters, and headers for the request
        """
        encoded_path = quote(path, safe="/")
        url = f"{self._get_execd_url(self.FILESYSTEM_DOWNLOAD_PATH)}?path={encoded_path}"
        headers: dict[str, str] = {}

        if range_header:
            headers["Range"] = range_header

        return {
            "url": url,
            "params": None,
            "headers": headers,
        }


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/health_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Health service adapter implementation.

Implementation of HealthService that adapts openapi-python-client generated HealthApi.
This adapter provides health check functionality for sandboxes.
"""

import logging

import httpx

from opensandbox.config import ConnectionConfig
from opensandbox.models.sandboxes import SandboxEndpoint
from opensandbox.services.health import Health

logger = logging.getLogger(__name__)


class HealthAdapter(Health):
    """
    Implementation of HealthService for sandbox health monitoring.

    This adapter provides health check functionality to verify sandbox
    availability and responsiveness using the openapi-python-client
    generated API client.
    """

    def __init__(
        self,
        connection_config: ConnectionConfig,
        execd_endpoint: SandboxEndpoint,
    ) -> None:
        """
        Initialize the health service adapter.

        Args:
            connection_config: Connection configuration (shared transport, headers, timeouts)
            execd_endpoint: Endpoint for execd service
        """
        self.connection_config = connection_config
        self.execd_endpoint = execd_endpoint
        from opensandbox.api.execd import Client

        protocol = self.connection_config.protocol
        base_url = f"{protocol}://{self.execd_endpoint.endpoint}"
        timeout_seconds = self.connection_config.request_timeout.total_seconds()
        timeout = httpx.Timeout(timeout_seconds)

        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
            **self.execd_endpoint.headers,
        }

        # Execd API does not require authentication
        self._client = Client(
            base_url=base_url,
            timeout=timeout,
        )

        self._httpx_client = httpx.AsyncClient(
            base_url=base_url,
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client.set_async_httpx_client(self._httpx_client)

    async def _get_client(self):
        """Return the client for execd API (no auth required)."""
        return self._client

    async def ping(self, sandbox_id: str) -> bool:
        """Check if a sandbox is alive and responsive.

        Args:
            sandbox_id: Unique identifier of the sandbox to check

        Returns:
            True if the sandbox is healthy and responsive, False otherwise
        """
        try:
            from opensandbox.adapters.converter.response_handler import (
                handle_api_error,
            )
            from opensandbox.api.execd.api.health import ping

            client = await self._get_client()
            response_obj = await ping.asyncio_detailed(client=client)

            handle_api_error(response_obj, "Ping")
            return True

        except Exception as e:
            logger.debug(f"Health check failed for sandbox {sandbox_id}: {e}")
            return False


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/metrics_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Metrics service adapter implementation.

Implementation of MetricsService that adapts openapi-python-client generated MetricApi.
"""

import logging

import httpx

from opensandbox.adapters.converter.exception_converter import (
    ExceptionConverter,
)
from opensandbox.adapters.converter.metrics_model_converter import (
    MetricsModelConverter,
)
from opensandbox.adapters.converter.response_handler import (
    handle_api_error,
    require_parsed,
)
from opensandbox.config import ConnectionConfig
from opensandbox.models.sandboxes import SandboxEndpoint, SandboxMetrics
from opensandbox.services.metrics import Metrics

logger = logging.getLogger(__name__)


class MetricsAdapter(Metrics):
    """
    Implementation of MetricsService for sandbox resource monitoring.

    This adapter provides comprehensive metrics collection and monitoring capabilities
    for sandbox environments, including CPU usage, memory consumption, and other
    performance metrics using the openapi-python-client generated API client.
    """

    def __init__(
        self,
        connection_config: ConnectionConfig,
        execd_endpoint: SandboxEndpoint,
    ) -> None:
        """
        Initialize the metrics service adapter.

        Args:
            connection_config: Connection configuration (shared transport, headers, timeouts)
            execd_endpoint: Endpoint for execd service
        """
        self.connection_config = connection_config
        self.execd_endpoint = execd_endpoint
        from opensandbox.api.execd import Client

        protocol = self.connection_config.protocol
        base_url = f"{protocol}://{self.execd_endpoint.endpoint}"
        timeout_seconds = self.connection_config.request_timeout.total_seconds()
        timeout = httpx.Timeout(timeout_seconds)

        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
            **self.execd_endpoint.headers,
        }

        # Execd API does not require authentication
        self._client = Client(
            base_url=base_url,
            timeout=timeout,
        )

        self._httpx_client = httpx.AsyncClient(
            base_url=base_url,
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client.set_async_httpx_client(self._httpx_client)

    async def _get_client(self):
        """Return the client for execd API (no auth required)."""
        return self._client

    async def get_metrics(self, sandbox_id: str) -> SandboxMetrics:
        """Retrieve current resource usage metrics for a sandbox.

        Args:
            sandbox_id: The unique identifier of the sandbox

        Returns:
            Current metrics including CPU usage, memory consumption, and timestamp

        Raises:
            SandboxException: If metrics retrieval fails
        """
        logger.debug(f"Retrieving sandbox metrics for {sandbox_id}")

        try:
            from opensandbox.api.execd.api.metric import get_metrics

            client = await self._get_client()
            response_obj = await get_metrics.asyncio_detailed(client=client)

            handle_api_error(response_obj, "Get metrics")
            from opensandbox.api.execd.models import Metrics
            parsed = require_parsed(response_obj, Metrics, "Get metrics")
            return MetricsModelConverter.to_sandbox_metrics(parsed)

        except Exception as e:
            logger.error(f"Failed to get metrics for sandbox {sandbox_id}", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e


================================================
FILE: sdks/sandbox/python/src/opensandbox/adapters/sandboxes_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Sandbox service adapter implementation.

Implementation of SandboxService that adapts openapi-python-client generated API.
This adapter provides a clean abstraction layer between business logic and
the auto-generated API client, handling all model conversions and error mapping.
"""

import logging
from datetime import datetime, timedelta

import httpx  # type: ignore[reportMissingImports]

from opensandbox.adapters.converter.exception_converter import (
    ExceptionConverter,
)
from opensandbox.adapters.converter.response_handler import (
    handle_api_error,
    require_parsed,
)
from opensandbox.adapters.converter.sandbox_model_converter import (
    SandboxModelConverter,
)
from opensandbox.api.lifecycle.types import UNSET
from opensandbox.config import ConnectionConfig
from opensandbox.models.sandboxes import (
    NetworkPolicy,
    PagedSandboxInfos,
    SandboxCreateResponse,
    SandboxEndpoint,
    SandboxFilter,
    SandboxImageSpec,
    SandboxInfo,
    SandboxRenewResponse,
    Volume,
)
from opensandbox.services.sandbox import Sandboxes

logger = logging.getLogger(__name__)


class SandboxesAdapter(Sandboxes):
    """
    Implementation of SandboxService that adapts openapi-python-client generated API.

    This adapter provides a clean abstraction layer between business logic and
    the sandbox management API, handling all model conversions and error mapping.

    The openapi-python-client generates functional APIs that support custom
    httpx.AsyncClient injection, allowing for fine-grained control over HTTP behavior.
    """

    def __init__(self, connection_config: ConnectionConfig) -> None:
        """
        Initialize the sandbox service adapter.

        Args:
            connection_config: Connection configuration (shared transport, headers, timeouts)
        """
        self.connection_config = connection_config
        from opensandbox.api.lifecycle import AuthenticatedClient

        api_key = self.connection_config.get_api_key()
        timeout_seconds = self.connection_config.request_timeout.total_seconds()
        timeout = httpx.Timeout(timeout_seconds)

        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
        }
        if api_key:
            headers["OPEN-SANDBOX-API-KEY"] = api_key

        # Create client with custom auth header for OpenSandbox API
        self._client = AuthenticatedClient(
            base_url=self.connection_config.get_base_url(),
            token=api_key or "",
            prefix="",  # No prefix, just the token
            auth_header_name="OPEN-SANDBOX-API-KEY",  # Custom header name
            timeout=timeout,
        )

        # Inject httpx client (adapter-owned)
        self._httpx_client = httpx.AsyncClient(
            base_url=self.connection_config.get_base_url(),
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client.set_async_httpx_client(self._httpx_client)

    async def _get_client(self):
        """Return the authenticated client for lifecycle API."""
        return self._client

    async def create_sandbox(
        self,
        spec: SandboxImageSpec,
        entrypoint: list[str],
        env: dict[str, str],
        metadata: dict[str, str],
        timeout: timedelta | None,
        resource: dict[str, str],
        network_policy: NetworkPolicy | None,
        extensions: dict[str, str],
        volumes: list[Volume] | None,
    ) -> SandboxCreateResponse:
        """Create a new sandbox instance with the specified configuration."""
        logger.info(f"Creating sandbox with image: {spec.image}")

        try:
            from opensandbox.api.lifecycle.api.sandboxes import post_sandboxes

            create_request = SandboxModelConverter.to_api_create_sandbox_request(
                spec=spec,
                entrypoint=entrypoint,
                env=env,
                metadata=metadata,
                timeout=timeout,
                resource=resource,
                network_policy=network_policy,
                extensions=extensions,
                volumes=volumes,
            )

            client = await self._get_client()
            response_obj = await post_sandboxes.asyncio_detailed(
                client=client,
                body=create_request,
            )

            handle_api_error(response_obj, "Create sandbox")

            from opensandbox.api.lifecycle.models import CreateSandboxResponse
            parsed = require_parsed(response_obj, CreateSandboxResponse, "Create sandbox")
            response = SandboxModelConverter.to_sandbox_create_response(parsed)
            logger.info(f"Successfully created sandbox: {response.id}")
            return response

        except Exception as e:
            logger.error(
                f"Failed to create sandbox with image: {spec.image}", exc_info=e
            )
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def get_sandbox_info(self, sandbox_id: str) -> SandboxInfo:
        """Retrieve detailed information about a sandbox."""
        logger.debug(f"Retrieving sandbox information: {sandbox_id}")

        try:
            from opensandbox.api.lifecycle.api.sandboxes import get_sandboxes_sandbox_id

            client = await self._get_client()
            response_obj = await get_sandboxes_sandbox_id.asyncio_detailed(
                client=client,
                sandbox_id=sandbox_id,
            )

            handle_api_error(response_obj, f"Get sandbox {sandbox_id}")

            from opensandbox.api.lifecycle.models import Sandbox
            parsed = require_parsed(response_obj, Sandbox, f"Get sandbox {sandbox_id}")
            return SandboxModelConverter.to_sandbox_info(parsed)

        except Exception as e:
            logger.error(f"Failed to get sandbox info: {sandbox_id}", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def list_sandboxes(self, filter: SandboxFilter) -> PagedSandboxInfos:
        """List sandboxes with optional filtering criteria."""
        logger.debug(f"Listing sandboxes with filter: {filter}")

        # Prepare metadata parameter similar to Kotlin SDK
        metadata = UNSET
        if filter.metadata:

            metadata_parts: list[str] = []
            for key, value in filter.metadata.items():
                metadata_parts.append(f"{key}={value}")
            metadata = "&".join(metadata_parts)

        try:
            from opensandbox.api.lifecycle.api.sandboxes import get_sandboxes
            from opensandbox.api.lifecycle.types import UNSET as API_UNSET

            client = await self._get_client()
            response_obj = await get_sandboxes.asyncio_detailed(
                client=client,
                state=filter.states if filter.states else API_UNSET,
                metadata=metadata,
                page=filter.page if filter.page is not None else API_UNSET,
                page_size=filter.page_size if filter.page_size is not None else API_UNSET,
            )

            handle_api_error(response_obj, "List sandboxes")

            from opensandbox.api.lifecycle.models import ListSandboxesResponse
            parsed = require_parsed(response_obj, ListSandboxesResponse, "List sandboxes")
            return SandboxModelConverter.to_paged_sandbox_infos(parsed)

        except Exception as e:
            logger.error("Failed to list sandboxes", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def get_sandbox_endpoint(
        self, sandbox_id: str, port: int, use_server_proxy: bool = False
    ) -> SandboxEndpoint:
        """Get network endpoint information for a sandbox service."""
        logger.debug(f"Retrieving sandbox endpoint: {sandbox_id}, port {port}")

        try:
            from opensandbox.api.lifecycle.api.sandboxes import (
                get_sandboxes_sandbox_id_endpoints_port,
            )

            client = await self._get_client()
            response_obj = (
                await get_sandboxes_sandbox_id_endpoints_port.asyncio_detailed(
                    client=client,
                    sandbox_id=sandbox_id,
                    port=port,
                    use_server_proxy=use_server_proxy,
                )
            )

            handle_api_error(
                response_obj, f"Get endpoint for sandbox {sandbox_id} port {port}"
            )

            from opensandbox.api.lifecycle.models import Endpoint
            parsed = require_parsed(response_obj, Endpoint, "Get endpoint")
            return SandboxModelConverter.to_sandbox_endpoint(parsed)

        except Exception as e:
            logger.error(
                f"Failed to retrieve sandbox endpoint for sandbox {sandbox_id}",
                exc_info=e,
            )
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def pause_sandbox(self, sandbox_id: str) -> None:
        """Pause a running sandbox while preserving its state."""
        logger.info(f"Pausing sandbox: {sandbox_id}")

        try:
            from opensandbox.api.lifecycle.api.sandboxes import (
                post_sandboxes_sandbox_id_pause,
            )

            client = await self._get_client()
            response_obj = await post_sandboxes_sandbox_id_pause.asyncio_detailed(
                client=client,
                sandbox_id=sandbox_id,
            )

            handle_api_error(response_obj, f"Pause sandbox {sandbox_id}")

            logger.info(f"Initiated pause for sandbox: {sandbox_id}")

        except Exception as e:
            logger.error(f"Failed to initiate pause sandbox: {sandbox_id}", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def resume_sandbox(self, sandbox_id: str) -> None:
        """Resume a previously paused sandbox."""
        logger.info(f"Resuming sandbox: {sandbox_id}")

        try:
            from opensandbox.api.lifecycle.api.sandboxes import (
                post_sandboxes_sandbox_id_resume,
            )

            client = await self._get_client()
            response_obj = await post_sandboxes_sandbox_id_resume.asyncio_detailed(
                client=client,
                sandbox_id=sandbox_id,
            )

            handle_api_error(response_obj, f"Resume sandbox {sandbox_id}")

            logger.info(f"Initiated resume for sandbox: {sandbox_id}")

        except Exception as e:
            logger.error(f"Failed initiate resume sandbox: {sandbox_id}", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def renew_sandbox_expiration(
        self, sandbox_id: str, new_expiration_time: datetime
    ) -> SandboxRenewResponse:
        """Extend the expiration time of a sandbox."""
        logger.info(f"Renew sandbox {sandbox_id} expiration to {new_expiration_time}")

        try:
            from opensandbox.api.lifecycle.api.sandboxes import (
                post_sandboxes_sandbox_id_renew_expiration,
            )
            from opensandbox.api.lifecycle.models.renew_sandbox_expiration_response import (
                RenewSandboxExpirationResponse,
            )

            renew_request = SandboxModelConverter.to_api_renew_request(
                new_expiration_time
            )

            client = await self._get_client()
            response_obj = (
                await post_sandboxes_sandbox_id_renew_expiration.asyncio_detailed(
                    client=client,
                    sandbox_id=sandbox_id,
                    body=renew_request,
                )
            )

            handle_api_error(response_obj, f"Renew sandbox {sandbox_id} expiration")

            parsed = require_parsed(
                response_obj,
                RenewSandboxExpirationResponse,
                f"Renew sandbox {sandbox_id} expiration",
            )
            renew_response = SandboxModelConverter.to_sandbox_renew_response(parsed)
            logger.info(
                "Successfully renewed sandbox %s expiration to %s",
                sandbox_id,
                renew_response.expires_at,
            )
            return renew_response

        except Exception as e:
            logger.error(f"Failed to renew sandbox {sandbox_id} expiration", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    async def kill_sandbox(self, sandbox_id: str) -> None:
        """Permanently terminate a sandbox and clean up its resources."""
        logger.info(f"Terminating sandbox: {sandbox_id}")

        try:
            from opensandbox.api.lifecycle.api.sandboxes import (
                delete_sandboxes_sandbox_id,
            )

            client = await self._get_client()
            response_obj = await delete_sandboxes_sandbox_id.asyncio_detailed(
                client=client,
                sandbox_id=sandbox_id,
            )

            handle_api_error(response_obj, f"Kill sandbox {sandbox_id}")

            logger.info(f"Successfully terminated sandbox: {sandbox_id}")

        except Exception as e:
            logger.error(f"Failed to terminate sandbox: {sandbox_id}", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""OpenSandbox API clients generated from OpenAPI specs."""


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""A client library for accessing OpenSandbox Egress API"""

from .client import AuthenticatedClient, Client

__all__ = (
    "AuthenticatedClient",
    "Client",
)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/api/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains methods for accessing the API"""


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/api/policy/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains endpoint functions for accessing the API"""


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/api/policy/get_policy.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.policy_status_response import PolicyStatusResponse
from ...types import Response


def _get_kwargs() -> dict[str, Any]:
    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/policy",
    }

    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> PolicyStatusResponse | str | None:
    if response.status_code == 200:
        response_200 = PolicyStatusResponse.from_dict(response.json())

        return response_200

    if response.status_code == 401:
        response_401 = response.text
        return response_401

    if response.status_code == 500:
        response_500 = response.text
        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[PolicyStatusResponse | str]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
) -> Response[PolicyStatusResponse | str]:
    """Get current egress policy

     Returns the currently enforced egress policy and the sidecar's derived
    runtime mode metadata.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[PolicyStatusResponse | str]
    """

    kwargs = _get_kwargs()

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
) -> PolicyStatusResponse | str | None:
    """Get current egress policy

     Returns the currently enforced egress policy and the sidecar's derived
    runtime mode metadata.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        PolicyStatusResponse | str
    """

    return sync_detailed(
        client=client,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
) -> Response[PolicyStatusResponse | str]:
    """Get current egress policy

     Returns the currently enforced egress policy and the sidecar's derived
    runtime mode metadata.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[PolicyStatusResponse | str]
    """

    kwargs = _get_kwargs()

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
) -> PolicyStatusResponse | str | None:
    """Get current egress policy

     Returns the currently enforced egress policy and the sidecar's derived
    runtime mode metadata.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        PolicyStatusResponse | str
    """

    return (
        await asyncio_detailed(
            client=client,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/api/policy/patch_policy.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.network_rule import NetworkRule
from ...models.policy_status_response import PolicyStatusResponse
from ...types import Response


def _get_kwargs(
    *,
    body: list[NetworkRule],
) -> dict[str, Any]:
    headers: dict[str, Any] = {}

    _kwargs: dict[str, Any] = {
        "method": "patch",
        "url": "/policy",
    }

    _kwargs["json"] = []
    for body_item_data in body:
        body_item = body_item_data.to_dict()
        _kwargs["json"].append(body_item)

    headers["Content-Type"] = "application/json"

    _kwargs["headers"] = headers
    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> PolicyStatusResponse | str | None:
    if response.status_code == 200:
        response_200 = PolicyStatusResponse.from_dict(response.json())

        return response_200

    if response.status_code == 400:
        response_400 = response.text
        return response_400

    if response.status_code == 401:
        response_401 = response.text
        return response_401

    if response.status_code == 500:
        response_500 = response.text
        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[PolicyStatusResponse | str]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: list[NetworkRule],
) -> Response[PolicyStatusResponse | str]:
    """Patch egress rules

     Merge incoming egress rules with the currently enforced policy.

    This endpoint uses merge semantics:
    - Existing rules remain unless overridden by incoming rules.
    - Incoming rules are applied with higher priority than existing rules.
    - If multiple incoming rules refer to the same `target`, the first one wins.

    Args:
        body (list[NetworkRule]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[PolicyStatusResponse | str]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    body: list[NetworkRule],
) -> PolicyStatusResponse | str | None:
    """Patch egress rules

     Merge incoming egress rules with the currently enforced policy.

    This endpoint uses merge semantics:
    - Existing rules remain unless overridden by incoming rules.
    - Incoming rules are applied with higher priority than existing rules.
    - If multiple incoming rules refer to the same `target`, the first one wins.

    Args:
        body (list[NetworkRule]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        PolicyStatusResponse | str
    """

    return sync_detailed(
        client=client,
        body=body,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: list[NetworkRule],
) -> Response[PolicyStatusResponse | str]:
    """Patch egress rules

     Merge incoming egress rules with the currently enforced policy.

    This endpoint uses merge semantics:
    - Existing rules remain unless overridden by incoming rules.
    - Incoming rules are applied with higher priority than existing rules.
    - If multiple incoming rules refer to the same `target`, the first one wins.

    Args:
        body (list[NetworkRule]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[PolicyStatusResponse | str]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    body: list[NetworkRule],
) -> PolicyStatusResponse | str | None:
    """Patch egress rules

     Merge incoming egress rules with the currently enforced policy.

    This endpoint uses merge semantics:
    - Existing rules remain unless overridden by incoming rules.
    - Incoming rules are applied with higher priority than existing rules.
    - If multiple incoming rules refer to the same `target`, the first one wins.

    Args:
        body (list[NetworkRule]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        PolicyStatusResponse | str
    """

    return (
        await asyncio_detailed(
            client=client,
            body=body,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/client.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import ssl
from typing import Any

import httpx
from attrs import define, evolve, field


@define
class Client:
    """A class for keeping track of data related to the API

    The following are accepted as keyword arguments and will be used to construct httpx Clients internally:

        ``base_url``: The base URL for the API, all requests are made to a relative path to this URL

        ``cookies``: A dictionary of cookies to be sent with every request

        ``headers``: A dictionary of headers to be sent with every request

        ``timeout``: The maximum amount of a time a request can take. API functions will raise
        httpx.TimeoutException if this is exceeded.

        ``verify_ssl``: Whether or not to verify the SSL certificate of the API server. This should be True in production,
        but can be set to False for testing purposes.

        ``follow_redirects``: Whether or not to follow redirects. Default value is False.

        ``httpx_args``: A dictionary of additional arguments to be passed to the ``httpx.Client`` and ``httpx.AsyncClient`` constructor.


    Attributes:
        raise_on_unexpected_status: Whether or not to raise an errors.UnexpectedStatus if the API returns a
            status code that was not documented in the source OpenAPI document. Can also be provided as a keyword
            argument to the constructor.
    """

    raise_on_unexpected_status: bool = field(default=False, kw_only=True)
    _base_url: str = field(alias="base_url")
    _cookies: dict[str, str] = field(factory=dict, kw_only=True, alias="cookies")
    _headers: dict[str, str] = field(factory=dict, kw_only=True, alias="headers")
    _timeout: httpx.Timeout | None = field(default=None, kw_only=True, alias="timeout")
    _verify_ssl: str | bool | ssl.SSLContext = field(default=True, kw_only=True, alias="verify_ssl")
    _follow_redirects: bool = field(default=False, kw_only=True, alias="follow_redirects")
    _httpx_args: dict[str, Any] = field(factory=dict, kw_only=True, alias="httpx_args")
    _client: httpx.Client | None = field(default=None, init=False)
    _async_client: httpx.AsyncClient | None = field(default=None, init=False)

    def with_headers(self, headers: dict[str, str]) -> "Client":
        """Get a new client matching this one with additional headers"""
        if self._client is not None:
            self._client.headers.update(headers)
        if self._async_client is not None:
            self._async_client.headers.update(headers)
        return evolve(self, headers={**self._headers, **headers})

    def with_cookies(self, cookies: dict[str, str]) -> "Client":
        """Get a new client matching this one with additional cookies"""
        if self._client is not None:
            self._client.cookies.update(cookies)
        if self._async_client is not None:
            self._async_client.cookies.update(cookies)
        return evolve(self, cookies={**self._cookies, **cookies})

    def with_timeout(self, timeout: httpx.Timeout) -> "Client":
        """Get a new client matching this one with a new timeout configuration"""
        if self._client is not None:
            self._client.timeout = timeout
        if self._async_client is not None:
            self._async_client.timeout = timeout
        return evolve(self, timeout=timeout)

    def set_httpx_client(self, client: httpx.Client) -> "Client":
        """Manually set the underlying httpx.Client

        **NOTE**: This will override any other settings on the client, including cookies, headers, and timeout.
        """
        self._client = client
        return self

    def get_httpx_client(self) -> httpx.Client:
        """Get the underlying httpx.Client, constructing a new one if not previously set"""
        if self._client is None:
            self._client = httpx.Client(
                base_url=self._base_url,
                cookies=self._cookies,
                headers=self._headers,
                timeout=self._timeout,
                verify=self._verify_ssl,
                follow_redirects=self._follow_redirects,
                **self._httpx_args,
            )
        return self._client

    def __enter__(self) -> "Client":
        """Enter a context manager for self.client—you cannot enter twice (see httpx docs)"""
        self.get_httpx_client().__enter__()
        return self

    def __exit__(self, *args: Any, **kwargs: Any) -> None:
        """Exit a context manager for internal httpx.Client (see httpx docs)"""
        self.get_httpx_client().__exit__(*args, **kwargs)

    def set_async_httpx_client(self, async_client: httpx.AsyncClient) -> "Client":
        """Manually set the underlying httpx.AsyncClient

        **NOTE**: This will override any other settings on the client, including cookies, headers, and timeout.
        """
        self._async_client = async_client
        return self

    def get_async_httpx_client(self) -> httpx.AsyncClient:
        """Get the underlying httpx.AsyncClient, constructing a new one if not previously set"""
        if self._async_client is None:
            self._async_client = httpx.AsyncClient(
                base_url=self._base_url,
                cookies=self._cookies,
                headers=self._headers,
                timeout=self._timeout,
                verify=self._verify_ssl,
                follow_redirects=self._follow_redirects,
                **self._httpx_args,
            )
        return self._async_client

    async def __aenter__(self) -> "Client":
        """Enter a context manager for underlying httpx.AsyncClient—you cannot enter twice (see httpx docs)"""
        await self.get_async_httpx_client().__aenter__()
        return self

    async def __aexit__(self, *args: Any, **kwargs: Any) -> None:
        """Exit a context manager for underlying httpx.AsyncClient (see httpx docs)"""
        await self.get_async_httpx_client().__aexit__(*args, **kwargs)


@define
class AuthenticatedClient:
    """A Client which has been authenticated for use on secured endpoints

    The following are accepted as keyword arguments and will be used to construct httpx Clients internally:

        ``base_url``: The base URL for the API, all requests are made to a relative path to this URL

        ``cookies``: A dictionary of cookies to be sent with every request

        ``headers``: A dictionary of headers to be sent with every request

        ``timeout``: The maximum amount of a time a request can take. API functions will raise
        httpx.TimeoutException if this is exceeded.

        ``verify_ssl``: Whether or not to verify the SSL certificate of the API server. This should be True in production,
        but can be set to False for testing purposes.

        ``follow_redirects``: Whether or not to follow redirects. Default value is False.

        ``httpx_args``: A dictionary of additional arguments to be passed to the ``httpx.Client`` and ``httpx.AsyncClient`` constructor.


    Attributes:
        raise_on_unexpected_status: Whether or not to raise an errors.UnexpectedStatus if the API returns a
            status code that was not documented in the source OpenAPI document. Can also be provided as a keyword
            argument to the constructor.
        token: The token to use for authentication
        prefix: The prefix to use for the Authorization header
        auth_header_name: The name of the Authorization header
    """

    raise_on_unexpected_status: bool = field(default=False, kw_only=True)
    _base_url: str = field(alias="base_url")
    _cookies: dict[str, str] = field(factory=dict, kw_only=True, alias="cookies")
    _headers: dict[str, str] = field(factory=dict, kw_only=True, alias="headers")
    _timeout: httpx.Timeout | None = field(default=None, kw_only=True, alias="timeout")
    _verify_ssl: str | bool | ssl.SSLContext = field(default=True, kw_only=True, alias="verify_ssl")
    _follow_redirects: bool = field(default=False, kw_only=True, alias="follow_redirects")
    _httpx_args: dict[str, Any] = field(factory=dict, kw_only=True, alias="httpx_args")
    _client: httpx.Client | None = field(default=None, init=False)
    _async_client: httpx.AsyncClient | None = field(default=None, init=False)

    token: str
    prefix: str = "Bearer"
    auth_header_name: str = "Authorization"

    def with_headers(self, headers: dict[str, str]) -> "AuthenticatedClient":
        """Get a new client matching this one with additional headers"""
        if self._client is not None:
            self._client.headers.update(headers)
        if self._async_client is not None:
            self._async_client.headers.update(headers)
        return evolve(self, headers={**self._headers, **headers})

    def with_cookies(self, cookies: dict[str, str]) -> "AuthenticatedClient":
        """Get a new client matching this one with additional cookies"""
        if self._client is not None:
            self._client.cookies.update(cookies)
        if self._async_client is not None:
            self._async_client.cookies.update(cookies)
        return evolve(self, cookies={**self._cookies, **cookies})

    def with_timeout(self, timeout: httpx.Timeout) -> "AuthenticatedClient":
        """Get a new client matching this one with a new timeout configuration"""
        if self._client is not None:
            self._client.timeout = timeout
        if self._async_client is not None:
            self._async_client.timeout = timeout
        return evolve(self, timeout=timeout)

    def set_httpx_client(self, client: httpx.Client) -> "AuthenticatedClient":
        """Manually set the underlying httpx.Client

        **NOTE**: This will override any other settings on the client, including cookies, headers, and timeout.
        """
        self._client = client
        return self

    def get_httpx_client(self) -> httpx.Client:
        """Get the underlying httpx.Client, constructing a new one if not previously set"""
        if self._client is None:
            self._headers[self.auth_header_name] = f"{self.prefix} {self.token}" if self.prefix else self.token
            self._client = httpx.Client(
                base_url=self._base_url,
                cookies=self._cookies,
                headers=self._headers,
                timeout=self._timeout,
                verify=self._verify_ssl,
                follow_redirects=self._follow_redirects,
                **self._httpx_args,
            )
        return self._client

    def __enter__(self) -> "AuthenticatedClient":
        """Enter a context manager for self.client—you cannot enter twice (see httpx docs)"""
        self.get_httpx_client().__enter__()
        return self

    def __exit__(self, *args: Any, **kwargs: Any) -> None:
        """Exit a context manager for internal httpx.Client (see httpx docs)"""
        self.get_httpx_client().__exit__(*args, **kwargs)

    def set_async_httpx_client(self, async_client: httpx.AsyncClient) -> "AuthenticatedClient":
        """Manually set the underlying httpx.AsyncClient

        **NOTE**: This will override any other settings on the client, including cookies, headers, and timeout.
        """
        self._async_client = async_client
        return self

    def get_async_httpx_client(self) -> httpx.AsyncClient:
        """Get the underlying httpx.AsyncClient, constructing a new one if not previously set"""
        if self._async_client is None:
            self._headers[self.auth_header_name] = f"{self.prefix} {self.token}" if self.prefix else self.token
            self._async_client = httpx.AsyncClient(
                base_url=self._base_url,
                cookies=self._cookies,
                headers=self._headers,
                timeout=self._timeout,
                verify=self._verify_ssl,
                follow_redirects=self._follow_redirects,
                **self._httpx_args,
            )
        return self._async_client

    async def __aenter__(self) -> "AuthenticatedClient":
        """Enter a context manager for underlying httpx.AsyncClient—you cannot enter twice (see httpx docs)"""
        await self.get_async_httpx_client().__aenter__()
        return self

    async def __aexit__(self, *args: Any, **kwargs: Any) -> None:
        """Exit a context manager for underlying httpx.AsyncClient (see httpx docs)"""
        await self.get_async_httpx_client().__aexit__(*args, **kwargs)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/errors.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains shared errors types that can be raised from API functions"""


class UnexpectedStatus(Exception):
    """Raised by api functions when the response status an undocumented status and Client.raise_on_unexpected_status is True"""

    def __init__(self, status_code: int, content: bytes):
        self.status_code = status_code
        self.content = content

        super().__init__(
            f"Unexpected status code: {status_code}\n\nResponse content:\n{content.decode(errors='ignore')}"
        )


__all__ = ["UnexpectedStatus"]


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/models/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains all the data models used in inputs/outputs"""

from .network_policy import NetworkPolicy
from .network_policy_default_action import NetworkPolicyDefaultAction
from .network_rule import NetworkRule
from .network_rule_action import NetworkRuleAction
from .policy_status_response import PolicyStatusResponse

__all__ = (
    "NetworkPolicy",
    "NetworkPolicyDefaultAction",
    "NetworkRule",
    "NetworkRuleAction",
    "PolicyStatusResponse",
)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/models/network_policy.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define

from ..models.network_policy_default_action import NetworkPolicyDefaultAction
from ..types import UNSET, Unset

if TYPE_CHECKING:
    from ..models.network_rule import NetworkRule


T = TypeVar("T", bound="NetworkPolicy")


@_attrs_define
class NetworkPolicy:
    """Egress network policy matching the sidecar `/policy` request body.
    If `defaultAction` is omitted, the sidecar defaults to "deny"; passing an empty
    object or null results in allow-all behavior at startup.

        Attributes:
            default_action (NetworkPolicyDefaultAction | Unset): Default action when no egress rule matches. Defaults to
                "deny".
            egress (list[NetworkRule] | Unset): List of egress rules evaluated in order.
    """

    default_action: NetworkPolicyDefaultAction | Unset = UNSET
    egress: list[NetworkRule] | Unset = UNSET

    def to_dict(self) -> dict[str, Any]:
        default_action: str | Unset = UNSET
        if not isinstance(self.default_action, Unset):
            default_action = self.default_action.value

        egress: list[dict[str, Any]] | Unset = UNSET
        if not isinstance(self.egress, Unset):
            egress = []
            for egress_item_data in self.egress:
                egress_item = egress_item_data.to_dict()
                egress.append(egress_item)

        field_dict: dict[str, Any] = {}

        field_dict.update({})
        if default_action is not UNSET:
            field_dict["defaultAction"] = default_action
        if egress is not UNSET:
            field_dict["egress"] = egress

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.network_rule import NetworkRule

        d = dict(src_dict)
        _default_action = d.pop("defaultAction", UNSET)
        default_action: NetworkPolicyDefaultAction | Unset
        if isinstance(_default_action, Unset):
            default_action = UNSET
        else:
            default_action = NetworkPolicyDefaultAction(_default_action)

        _egress = d.pop("egress", UNSET)
        egress: list[NetworkRule] | Unset = UNSET
        if _egress is not UNSET:
            egress = []
            for egress_item_data in _egress:
                egress_item = NetworkRule.from_dict(egress_item_data)

                egress.append(egress_item)

        network_policy = cls(
            default_action=default_action,
            egress=egress,
        )

        return network_policy


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/models/network_policy_default_action.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from enum import Enum


class NetworkPolicyDefaultAction(str, Enum):
    ALLOW = "allow"
    DENY = "deny"

    def __str__(self) -> str:
        return str(self.value)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/models/network_rule.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define

from ..models.network_rule_action import NetworkRuleAction

T = TypeVar("T", bound="NetworkRule")


@_attrs_define
class NetworkRule:
    """
    Attributes:
        action (NetworkRuleAction): Whether to allow or deny matching targets.
        target (str): FQDN or wildcard domain (e.g., "example.com", "*.example.com").
            IP/CIDR not yet supported in the egress MVP.
    """

    action: NetworkRuleAction
    target: str

    def to_dict(self) -> dict[str, Any]:
        action = self.action.value

        target = self.target

        field_dict: dict[str, Any] = {}

        field_dict.update(
            {
                "action": action,
                "target": target,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        action = NetworkRuleAction(d.pop("action"))

        target = d.pop("target")

        network_rule = cls(
            action=action,
            target=target,
        )

        return network_rule


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/models/network_rule_action.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from enum import Enum


class NetworkRuleAction(str, Enum):
    ALLOW = "allow"
    DENY = "deny"

    def __str__(self) -> str:
        return str(self.value)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/models/policy_status_response.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define

from ..types import UNSET, Unset

if TYPE_CHECKING:
    from ..models.network_policy import NetworkPolicy


T = TypeVar("T", bound="PolicyStatusResponse")


@_attrs_define
class PolicyStatusResponse:
    """
    Attributes:
        status (str | Unset): Operation status reported by the sidecar. Example: ok.
        mode (str | Unset): Derived runtime mode for the current policy. Example: deny_all.
        enforcement_mode (str | Unset): Egress sidecar enforcement backend mode. Example: dns.
        reason (str | Unset): Optional human-readable reason when the sidecar returns extra context.
        policy (NetworkPolicy | Unset): Egress network policy matching the sidecar `/policy` request body.
            If `defaultAction` is omitted, the sidecar defaults to "deny"; passing an empty
            object or null results in allow-all behavior at startup.
    """

    status: str | Unset = UNSET
    mode: str | Unset = UNSET
    enforcement_mode: str | Unset = UNSET
    reason: str | Unset = UNSET
    policy: NetworkPolicy | Unset = UNSET

    def to_dict(self) -> dict[str, Any]:
        status = self.status

        mode = self.mode

        enforcement_mode = self.enforcement_mode

        reason = self.reason

        policy: dict[str, Any] | Unset = UNSET
        if not isinstance(self.policy, Unset):
            policy = self.policy.to_dict()

        field_dict: dict[str, Any] = {}

        field_dict.update({})
        if status is not UNSET:
            field_dict["status"] = status
        if mode is not UNSET:
            field_dict["mode"] = mode
        if enforcement_mode is not UNSET:
            field_dict["enforcementMode"] = enforcement_mode
        if reason is not UNSET:
            field_dict["reason"] = reason
        if policy is not UNSET:
            field_dict["policy"] = policy

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.network_policy import NetworkPolicy

        d = dict(src_dict)
        status = d.pop("status", UNSET)

        mode = d.pop("mode", UNSET)

        enforcement_mode = d.pop("enforcementMode", UNSET)

        reason = d.pop("reason", UNSET)

        _policy = d.pop("policy", UNSET)
        policy: NetworkPolicy | Unset
        if isinstance(_policy, Unset):
            policy = UNSET
        else:
            policy = NetworkPolicy.from_dict(_policy)

        policy_status_response = cls(
            status=status,
            mode=mode,
            enforcement_mode=enforcement_mode,
            reason=reason,
            policy=policy,
        )

        return policy_status_response


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/py.typed
================================================
# Marker file for PEP 561

================================================
FILE: sdks/sandbox/python/src/opensandbox/api/egress/types.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains some shared types for properties"""

from collections.abc import Mapping, MutableMapping
from http import HTTPStatus
from typing import IO, BinaryIO, Generic, Literal, TypeVar

from attrs import define


class Unset:
    def __bool__(self) -> Literal[False]:
        return False


UNSET: Unset = Unset()

# The types that `httpx.Client(files=)` can accept, copied from that library.
FileContent = IO[bytes] | bytes | str
FileTypes = (
    # (filename, file (or bytes), content_type)
    tuple[str | None, FileContent, str | None]
    # (filename, file (or bytes), content_type, headers)
    | tuple[str | None, FileContent, str | None, Mapping[str, str]]
)
RequestFiles = list[tuple[str, FileTypes]]


@define
class File:
    """Contains information for file uploads"""

    payload: BinaryIO
    file_name: str | None = None
    mime_type: str | None = None

    def to_tuple(self) -> FileTypes:
        """Return a tuple representation that httpx will accept for multipart/form-data"""
        return self.file_name, self.payload, self.mime_type


T = TypeVar("T")


@define
class Response(Generic[T]):
    """A response from an endpoint"""

    status_code: HTTPStatus
    content: bytes
    headers: MutableMapping[str, str]
    parsed: T | None


__all__ = ["UNSET", "File", "FileTypes", "RequestFiles", "Response", "Unset"]


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""A client library for accessing OpenSandbox Execd API"""

from .client import AuthenticatedClient, Client

__all__ = (
    "AuthenticatedClient",
    "Client",
)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains methods for accessing the API"""


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/code_interpreting/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains endpoint functions for accessing the API"""


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/code_interpreting/create_code_context.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.code_context import CodeContext
from ...models.code_context_request import CodeContextRequest
from ...models.error_response import ErrorResponse
from ...types import Response


def _get_kwargs(
    *,
    body: CodeContextRequest,
) -> dict[str, Any]:
    headers: dict[str, Any] = {}

    _kwargs: dict[str, Any] = {
        "method": "post",
        "url": "/code/context",
    }

    _kwargs["json"] = body.to_dict()

    headers["Content-Type"] = "application/json"

    _kwargs["headers"] = headers
    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> CodeContext | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = CodeContext.from_dict(response.json())

        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[CodeContext | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: CodeContextRequest,
) -> Response[CodeContext | ErrorResponse]:
    """Create code execution context

     Creates a new code execution environment and returns a session ID that can be used
    for subsequent code execution requests. The context maintains state across multiple
    code executions within the same session.

    Args:
        body (CodeContextRequest): Request to create a code execution context

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[CodeContext | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    body: CodeContextRequest,
) -> CodeContext | ErrorResponse | None:
    """Create code execution context

     Creates a new code execution environment and returns a session ID that can be used
    for subsequent code execution requests. The context maintains state across multiple
    code executions within the same session.

    Args:
        body (CodeContextRequest): Request to create a code execution context

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        CodeContext | ErrorResponse
    """

    return sync_detailed(
        client=client,
        body=body,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: CodeContextRequest,
) -> Response[CodeContext | ErrorResponse]:
    """Create code execution context

     Creates a new code execution environment and returns a session ID that can be used
    for subsequent code execution requests. The context maintains state across multiple
    code executions within the same session.

    Args:
        body (CodeContextRequest): Request to create a code execution context

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[CodeContext | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    body: CodeContextRequest,
) -> CodeContext | ErrorResponse | None:
    """Create code execution context

     Creates a new code execution environment and returns a session ID that can be used
    for subsequent code execution requests. The context maintains state across multiple
    code executions within the same session.

    Args:
        body (CodeContextRequest): Request to create a code execution context

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        CodeContext | ErrorResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            body=body,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/code_interpreting/delete_context.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast
from urllib.parse import quote

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...types import Response


def _get_kwargs(
    context_id: str,
) -> dict[str, Any]:
    _kwargs: dict[str, Any] = {
        "method": "delete",
        "url": "/code/contexts/{context_id}".format(
            context_id=quote(str(context_id), safe=""),
        ),
    }

    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = cast(Any, None)
        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    context_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[Any | ErrorResponse]:
    """Delete a code execution context by id

     Deletes an existing code execution context (session) by id.
    This should terminate the underlying context thread/process and release resources.

    Args:
        context_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        context_id=context_id,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    context_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Any | ErrorResponse | None:
    """Delete a code execution context by id

     Deletes an existing code execution context (session) by id.
    This should terminate the underlying context thread/process and release resources.

    Args:
        context_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        context_id=context_id,
        client=client,
    ).parsed


async def asyncio_detailed(
    context_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[Any | ErrorResponse]:
    """Delete a code execution context by id

     Deletes an existing code execution context (session) by id.
    This should terminate the underlying context thread/process and release resources.

    Args:
        context_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        context_id=context_id,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    context_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Any | ErrorResponse | None:
    """Delete a code execution context by id

     Deletes an existing code execution context (session) by id.
    This should terminate the underlying context thread/process and release resources.

    Args:
        context_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            context_id=context_id,
            client=client,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/code_interpreting/delete_contexts_by_language.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...types import UNSET, Response


def _get_kwargs(
    *,
    language: str,
) -> dict[str, Any]:
    params: dict[str, Any] = {}

    params["language"] = language

    params = {k: v for k, v in params.items() if v is not UNSET and v is not None}

    _kwargs: dict[str, Any] = {
        "method": "delete",
        "url": "/code/contexts",
        "params": params,
    }

    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = cast(Any, None)
        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    language: str,
) -> Response[Any | ErrorResponse]:
    """Delete all contexts under a language

     Deletes all existing code execution contexts under the specified `language`/runtime.
    This is a bulk operation intended for code-interpreter context cleanup.

    Args:
        language (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        language=language,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    language: str,
) -> Any | ErrorResponse | None:
    """Delete all contexts under a language

     Deletes all existing code execution contexts under the specified `language`/runtime.
    This is a bulk operation intended for code-interpreter context cleanup.

    Args:
        language (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        client=client,
        language=language,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    language: str,
) -> Response[Any | ErrorResponse]:
    """Delete all contexts under a language

     Deletes all existing code execution contexts under the specified `language`/runtime.
    This is a bulk operation intended for code-interpreter context cleanup.

    Args:
        language (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        language=language,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    language: str,
) -> Any | ErrorResponse | None:
    """Delete all contexts under a language

     Deletes all existing code execution contexts under the specified `language`/runtime.
    This is a bulk operation intended for code-interpreter context cleanup.

    Args:
        language (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            language=language,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/code_interpreting/get_context.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any
from urllib.parse import quote

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.code_context import CodeContext
from ...models.error_response import ErrorResponse
from ...types import Response


def _get_kwargs(
    context_id: str,
) -> dict[str, Any]:
    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/code/contexts/{context_id}".format(
            context_id=quote(str(context_id), safe=""),
        ),
    }

    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> CodeContext | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = CodeContext.from_dict(response.json())

        return response_200

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[CodeContext | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    context_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[CodeContext | ErrorResponse]:
    """Get a code execution context by id

     Retrieves the details of an existing code execution context (session) by id.
    Returns the context ID, language, and any associated metadata.

    Args:
        context_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[CodeContext | ErrorResponse]
    """

    kwargs = _get_kwargs(
        context_id=context_id,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    context_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> CodeContext | ErrorResponse | None:
    """Get a code execution context by id

     Retrieves the details of an existing code execution context (session) by id.
    Returns the context ID, language, and any associated metadata.

    Args:
        context_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        CodeContext | ErrorResponse
    """

    return sync_detailed(
        context_id=context_id,
        client=client,
    ).parsed


async def asyncio_detailed(
    context_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[CodeContext | ErrorResponse]:
    """Get a code execution context by id

     Retrieves the details of an existing code execution context (session) by id.
    Returns the context ID, language, and any associated metadata.

    Args:
        context_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[CodeContext | ErrorResponse]
    """

    kwargs = _get_kwargs(
        context_id=context_id,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    context_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> CodeContext | ErrorResponse | None:
    """Get a code execution context by id

     Retrieves the details of an existing code execution context (session) by id.
    Returns the context ID, language, and any associated metadata.

    Args:
        context_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        CodeContext | ErrorResponse
    """

    return (
        await asyncio_detailed(
            context_id=context_id,
            client=client,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/code_interpreting/interrupt_code.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...types import UNSET, Response


def _get_kwargs(
    *,
    id: str,
) -> dict[str, Any]:
    params: dict[str, Any] = {}

    params["id"] = id

    params = {k: v for k, v in params.items() if v is not UNSET and v is not None}

    _kwargs: dict[str, Any] = {
        "method": "delete",
        "url": "/code",
        "params": params,
    }

    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = cast(Any, None)
        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    id: str,
) -> Response[Any | ErrorResponse]:
    """Interrupt code execution

     Interrupts the currently running code execution in the specified context.
    This sends a signal to terminate the execution process and releases associated resources.

    Args:
        id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        id=id,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    id: str,
) -> Any | ErrorResponse | None:
    """Interrupt code execution

     Interrupts the currently running code execution in the specified context.
    This sends a signal to terminate the execution process and releases associated resources.

    Args:
        id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        client=client,
        id=id,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    id: str,
) -> Response[Any | ErrorResponse]:
    """Interrupt code execution

     Interrupts the currently running code execution in the specified context.
    This sends a signal to terminate the execution process and releases associated resources.

    Args:
        id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        id=id,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    id: str,
) -> Any | ErrorResponse | None:
    """Interrupt code execution

     Interrupts the currently running code execution in the specified context.
    This sends a signal to terminate the execution process and releases associated resources.

    Args:
        id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            id=id,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/code_interpreting/list_contexts.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.code_context import CodeContext
from ...models.error_response import ErrorResponse
from ...types import UNSET, Response


def _get_kwargs(
    *,
    language: str,
) -> dict[str, Any]:
    params: dict[str, Any] = {}

    params["language"] = language

    params = {k: v for k, v in params.items() if v is not UNSET and v is not None}

    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/code/contexts",
        "params": params,
    }

    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> ErrorResponse | list[CodeContext] | None:
    if response.status_code == 200:
        response_200 = []
        _response_200 = response.json()
        for response_200_item_data in _response_200:
            response_200_item = CodeContext.from_dict(response_200_item_data)

            response_200.append(response_200_item)

        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[ErrorResponse | list[CodeContext]]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    language: str,
) -> Response[ErrorResponse | list[CodeContext]]:
    """List active code execution contexts

     Lists all active/available code execution contexts.
    If `language` is provided, only contexts under that language/runtime are returned.

    Args:
        language (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | list[CodeContext]]
    """

    kwargs = _get_kwargs(
        language=language,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    language: str,
) -> ErrorResponse | list[CodeContext] | None:
    """List active code execution contexts

     Lists all active/available code execution contexts.
    If `language` is provided, only contexts under that language/runtime are returned.

    Args:
        language (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | list[CodeContext]
    """

    return sync_detailed(
        client=client,
        language=language,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    language: str,
) -> Response[ErrorResponse | list[CodeContext]]:
    """List active code execution contexts

     Lists all active/available code execution contexts.
    If `language` is provided, only contexts under that language/runtime are returned.

    Args:
        language (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | list[CodeContext]]
    """

    kwargs = _get_kwargs(
        language=language,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    language: str,
) -> ErrorResponse | list[CodeContext] | None:
    """List active code execution contexts

     Lists all active/available code execution contexts.
    If `language` is provided, only contexts under that language/runtime are returned.

    Args:
        language (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | list[CodeContext]
    """

    return (
        await asyncio_detailed(
            client=client,
            language=language,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/code_interpreting/run_code.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.run_code_request import RunCodeRequest
from ...models.server_stream_event import ServerStreamEvent
from ...types import Response


def _get_kwargs(
    *,
    body: RunCodeRequest,
) -> dict[str, Any]:
    headers: dict[str, Any] = {}

    _kwargs: dict[str, Any] = {
        "method": "post",
        "url": "/code",
    }

    _kwargs["json"] = body.to_dict()

    headers["Content-Type"] = "application/json"

    _kwargs["headers"] = headers
    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> ErrorResponse | ServerStreamEvent | None:
    if response.status_code == 200:
        response_200 = ServerStreamEvent.from_dict(response.text)

        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[ErrorResponse | ServerStreamEvent]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: RunCodeRequest,
) -> Response[ErrorResponse | ServerStreamEvent]:
    """Execute code in context

     Executes code using Jupyter kernel in a specified execution context and streams
    the output in real-time using SSE (Server-Sent Events). Supports multiple programming
    languages (Python, JavaScript, etc.) and maintains execution state within the session.
    Returns execution results, output streams, execution count, and any errors.

    Args:
        body (RunCodeRequest): Request to execute code in a context

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | ServerStreamEvent]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    body: RunCodeRequest,
) -> ErrorResponse | ServerStreamEvent | None:
    """Execute code in context

     Executes code using Jupyter kernel in a specified execution context and streams
    the output in real-time using SSE (Server-Sent Events). Supports multiple programming
    languages (Python, JavaScript, etc.) and maintains execution state within the session.
    Returns execution results, output streams, execution count, and any errors.

    Args:
        body (RunCodeRequest): Request to execute code in a context

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | ServerStreamEvent
    """

    return sync_detailed(
        client=client,
        body=body,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: RunCodeRequest,
) -> Response[ErrorResponse | ServerStreamEvent]:
    """Execute code in context

     Executes code using Jupyter kernel in a specified execution context and streams
    the output in real-time using SSE (Server-Sent Events). Supports multiple programming
    languages (Python, JavaScript, etc.) and maintains execution state within the session.
    Returns execution results, output streams, execution count, and any errors.

    Args:
        body (RunCodeRequest): Request to execute code in a context

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | ServerStreamEvent]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    body: RunCodeRequest,
) -> ErrorResponse | ServerStreamEvent | None:
    """Execute code in context

     Executes code using Jupyter kernel in a specified execution context and streams
    the output in real-time using SSE (Server-Sent Events). Supports multiple programming
    languages (Python, JavaScript, etc.) and maintains execution state within the session.
    Returns execution results, output streams, execution count, and any errors.

    Args:
        body (RunCodeRequest): Request to execute code in a context

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | ServerStreamEvent
    """

    return (
        await asyncio_detailed(
            client=client,
            body=body,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/command/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains endpoint functions for accessing the API"""


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/command/get_background_command_logs.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any
from urllib.parse import quote

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...types import UNSET, Response, Unset


def _get_kwargs(
    id: str,
    *,
    cursor: int | Unset = UNSET,
) -> dict[str, Any]:
    params: dict[str, Any] = {}

    params["cursor"] = cursor

    params = {k: v for k, v in params.items() if v is not UNSET and v is not None}

    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/command/{id}/logs".format(
            id=quote(str(id), safe=""),
        ),
        "params": params,
    }

    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> ErrorResponse | str | None:
    if response.status_code == 200:
        response_200 = response.text
        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[ErrorResponse | str]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    id: str,
    *,
    client: AuthenticatedClient | Client,
    cursor: int | Unset = UNSET,
) -> Response[ErrorResponse | str]:
    """Get background command stdout/stderr (non-streamed)

     Returns stdout and stderr for a background (detached) command by command ID.
    Foreground commands should be consumed via SSE; this endpoint is intended for
    polling logs of background commands. Supports incremental reads similar to a file seek:
    pass a starting line via query to fetch output after that line and receive the latest
    tail cursor for the next poll. When no starting line is provided, the full logs are returned.
    Response body is plain text so it can be rendered directly in browsers; the latest line index
    is provided via response header `EXECD-COMMANDS-TAIL-CURSOR` for subsequent incremental requests.

    Args:
        id (str):
        cursor (int | Unset):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | str]
    """

    kwargs = _get_kwargs(
        id=id,
        cursor=cursor,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    id: str,
    *,
    client: AuthenticatedClient | Client,
    cursor: int | Unset = UNSET,
) -> ErrorResponse | str | None:
    """Get background command stdout/stderr (non-streamed)

     Returns stdout and stderr for a background (detached) command by command ID.
    Foreground commands should be consumed via SSE; this endpoint is intended for
    polling logs of background commands. Supports incremental reads similar to a file seek:
    pass a starting line via query to fetch output after that line and receive the latest
    tail cursor for the next poll. When no starting line is provided, the full logs are returned.
    Response body is plain text so it can be rendered directly in browsers; the latest line index
    is provided via response header `EXECD-COMMANDS-TAIL-CURSOR` for subsequent incremental requests.

    Args:
        id (str):
        cursor (int | Unset):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | str
    """

    return sync_detailed(
        id=id,
        client=client,
        cursor=cursor,
    ).parsed


async def asyncio_detailed(
    id: str,
    *,
    client: AuthenticatedClient | Client,
    cursor: int | Unset = UNSET,
) -> Response[ErrorResponse | str]:
    """Get background command stdout/stderr (non-streamed)

     Returns stdout and stderr for a background (detached) command by command ID.
    Foreground commands should be consumed via SSE; this endpoint is intended for
    polling logs of background commands. Supports incremental reads similar to a file seek:
    pass a starting line via query to fetch output after that line and receive the latest
    tail cursor for the next poll. When no starting line is provided, the full logs are returned.
    Response body is plain text so it can be rendered directly in browsers; the latest line index
    is provided via response header `EXECD-COMMANDS-TAIL-CURSOR` for subsequent incremental requests.

    Args:
        id (str):
        cursor (int | Unset):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | str]
    """

    kwargs = _get_kwargs(
        id=id,
        cursor=cursor,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    id: str,
    *,
    client: AuthenticatedClient | Client,
    cursor: int | Unset = UNSET,
) -> ErrorResponse | str | None:
    """Get background command stdout/stderr (non-streamed)

     Returns stdout and stderr for a background (detached) command by command ID.
    Foreground commands should be consumed via SSE; this endpoint is intended for
    polling logs of background commands. Supports incremental reads similar to a file seek:
    pass a starting line via query to fetch output after that line and receive the latest
    tail cursor for the next poll. When no starting line is provided, the full logs are returned.
    Response body is plain text so it can be rendered directly in browsers; the latest line index
    is provided via response header `EXECD-COMMANDS-TAIL-CURSOR` for subsequent incremental requests.

    Args:
        id (str):
        cursor (int | Unset):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | str
    """

    return (
        await asyncio_detailed(
            id=id,
            client=client,
            cursor=cursor,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/command/get_command_status.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any
from urllib.parse import quote

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.command_status_response import CommandStatusResponse
from ...models.error_response import ErrorResponse
from ...types import Response


def _get_kwargs(
    id: str,
) -> dict[str, Any]:
    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/command/status/{id}".format(
            id=quote(str(id), safe=""),
        ),
    }

    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> CommandStatusResponse | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = CommandStatusResponse.from_dict(response.json())

        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[CommandStatusResponse | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[CommandStatusResponse | ErrorResponse]:
    """Get command running status

     Returns the current status of a command (foreground or background) by command ID.
    Includes running flag, exit code, error (if any), and start/finish timestamps.

    Args:
        id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[CommandStatusResponse | ErrorResponse]
    """

    kwargs = _get_kwargs(
        id=id,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    id: str,
    *,
    client: AuthenticatedClient | Client,
) -> CommandStatusResponse | ErrorResponse | None:
    """Get command running status

     Returns the current status of a command (foreground or background) by command ID.
    Includes running flag, exit code, error (if any), and start/finish timestamps.

    Args:
        id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        CommandStatusResponse | ErrorResponse
    """

    return sync_detailed(
        id=id,
        client=client,
    ).parsed


async def asyncio_detailed(
    id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[CommandStatusResponse | ErrorResponse]:
    """Get command running status

     Returns the current status of a command (foreground or background) by command ID.
    Includes running flag, exit code, error (if any), and start/finish timestamps.

    Args:
        id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[CommandStatusResponse | ErrorResponse]
    """

    kwargs = _get_kwargs(
        id=id,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    id: str,
    *,
    client: AuthenticatedClient | Client,
) -> CommandStatusResponse | ErrorResponse | None:
    """Get command running status

     Returns the current status of a command (foreground or background) by command ID.
    Includes running flag, exit code, error (if any), and start/finish timestamps.

    Args:
        id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        CommandStatusResponse | ErrorResponse
    """

    return (
        await asyncio_detailed(
            id=id,
            client=client,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/command/interrupt_command.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...types import UNSET, Response


def _get_kwargs(
    *,
    id: str,
) -> dict[str, Any]:
    params: dict[str, Any] = {}

    params["id"] = id

    params = {k: v for k, v in params.items() if v is not UNSET and v is not None}

    _kwargs: dict[str, Any] = {
        "method": "delete",
        "url": "/command",
        "params": params,
    }

    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = cast(Any, None)
        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    id: str,
) -> Response[Any | ErrorResponse]:
    """Interrupt command execution

     Interrupts the currently running command execution in the specified context.
    This sends a signal to terminate the execution process and releases associated resources.

    Args:
        id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        id=id,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    id: str,
) -> Any | ErrorResponse | None:
    """Interrupt command execution

     Interrupts the currently running command execution in the specified context.
    This sends a signal to terminate the execution process and releases associated resources.

    Args:
        id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        client=client,
        id=id,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    id: str,
) -> Response[Any | ErrorResponse]:
    """Interrupt command execution

     Interrupts the currently running command execution in the specified context.
    This sends a signal to terminate the execution process and releases associated resources.

    Args:
        id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        id=id,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    id: str,
) -> Any | ErrorResponse | None:
    """Interrupt command execution

     Interrupts the currently running command execution in the specified context.
    This sends a signal to terminate the execution process and releases associated resources.

    Args:
        id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            id=id,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/command/run_command.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.run_command_request import RunCommandRequest
from ...models.server_stream_event import ServerStreamEvent
from ...types import Response


def _get_kwargs(
    *,
    body: RunCommandRequest,
) -> dict[str, Any]:
    headers: dict[str, Any] = {}

    _kwargs: dict[str, Any] = {
        "method": "post",
        "url": "/command",
    }

    _kwargs["json"] = body.to_dict()

    headers["Content-Type"] = "application/json"

    _kwargs["headers"] = headers
    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> ErrorResponse | ServerStreamEvent | None:
    if response.status_code == 200:
        response_200 = ServerStreamEvent.from_dict(response.text)

        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[ErrorResponse | ServerStreamEvent]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: RunCommandRequest,
) -> Response[ErrorResponse | ServerStreamEvent]:
    """Execute shell command

     Executes a shell command and streams the output in real-time using SSE (Server-Sent Events).
    The command can run in foreground or background mode. The response includes stdout, stderr,
    execution status, and completion events.
    Optionally specify `timeout` (milliseconds) to enforce a maximum runtime; the server will
    terminate the process when the timeout is reached. You can also pass `uid`/`gid` to run
    with specific user/group IDs, and `envs` to inject environment variables.

    Args:
        body (RunCommandRequest): Request to execute a shell command

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | ServerStreamEvent]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    body: RunCommandRequest,
) -> ErrorResponse | ServerStreamEvent | None:
    """Execute shell command

     Executes a shell command and streams the output in real-time using SSE (Server-Sent Events).
    The command can run in foreground or background mode. The response includes stdout, stderr,
    execution status, and completion events.
    Optionally specify `timeout` (milliseconds) to enforce a maximum runtime; the server will
    terminate the process when the timeout is reached. You can also pass `uid`/`gid` to run
    with specific user/group IDs, and `envs` to inject environment variables.

    Args:
        body (RunCommandRequest): Request to execute a shell command

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | ServerStreamEvent
    """

    return sync_detailed(
        client=client,
        body=body,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: RunCommandRequest,
) -> Response[ErrorResponse | ServerStreamEvent]:
    """Execute shell command

     Executes a shell command and streams the output in real-time using SSE (Server-Sent Events).
    The command can run in foreground or background mode. The response includes stdout, stderr,
    execution status, and completion events.
    Optionally specify `timeout` (milliseconds) to enforce a maximum runtime; the server will
    terminate the process when the timeout is reached. You can also pass `uid`/`gid` to run
    with specific user/group IDs, and `envs` to inject environment variables.

    Args:
        body (RunCommandRequest): Request to execute a shell command

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | ServerStreamEvent]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    body: RunCommandRequest,
) -> ErrorResponse | ServerStreamEvent | None:
    """Execute shell command

     Executes a shell command and streams the output in real-time using SSE (Server-Sent Events).
    The command can run in foreground or background mode. The response includes stdout, stderr,
    execution status, and completion events.
    Optionally specify `timeout` (milliseconds) to enforce a maximum runtime; the server will
    terminate the process when the timeout is reached. You can also pass `uid`/`gid` to run
    with specific user/group IDs, and `envs` to inject environment variables.

    Args:
        body (RunCommandRequest): Request to execute a shell command

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | ServerStreamEvent
    """

    return (
        await asyncio_detailed(
            client=client,
            body=body,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/filesystem/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains endpoint functions for accessing the API"""


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/filesystem/chmod_files.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.chmod_files_body import ChmodFilesBody
from ...models.error_response import ErrorResponse
from ...types import Response


def _get_kwargs(
    *,
    body: ChmodFilesBody,
) -> dict[str, Any]:
    headers: dict[str, Any] = {}

    _kwargs: dict[str, Any] = {
        "method": "post",
        "url": "/files/permissions",
    }

    _kwargs["json"] = body.to_dict()

    headers["Content-Type"] = "application/json"

    _kwargs["headers"] = headers
    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = cast(Any, None)
        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: ChmodFilesBody,
) -> Response[Any | ErrorResponse]:
    """Change file permissions

     Changes permissions (mode), owner, and group for one or multiple files.
    Accepts a map of file paths to permission settings including octal mode,
    owner username, and group name.

    Args:
        body (ChmodFilesBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    body: ChmodFilesBody,
) -> Any | ErrorResponse | None:
    """Change file permissions

     Changes permissions (mode), owner, and group for one or multiple files.
    Accepts a map of file paths to permission settings including octal mode,
    owner username, and group name.

    Args:
        body (ChmodFilesBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        client=client,
        body=body,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: ChmodFilesBody,
) -> Response[Any | ErrorResponse]:
    """Change file permissions

     Changes permissions (mode), owner, and group for one or multiple files.
    Accepts a map of file paths to permission settings including octal mode,
    owner username, and group name.

    Args:
        body (ChmodFilesBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    body: ChmodFilesBody,
) -> Any | ErrorResponse | None:
    """Change file permissions

     Changes permissions (mode), owner, and group for one or multiple files.
    Accepts a map of file paths to permission settings including octal mode,
    owner username, and group name.

    Args:
        body (ChmodFilesBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            body=body,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/filesystem/download_file.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from io import BytesIO
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...types import UNSET, File, Response, Unset


def _get_kwargs(
    *,
    path: str,
    range_: str | Unset = UNSET,
) -> dict[str, Any]:
    headers: dict[str, Any] = {}
    if not isinstance(range_, Unset):
        headers["Range"] = range_

    params: dict[str, Any] = {}

    params["path"] = path

    params = {k: v for k, v in params.items() if v is not UNSET and v is not None}

    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/files/download",
        "params": params,
    }

    _kwargs["headers"] = headers
    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> ErrorResponse | File | None:
    if response.status_code == 200:
        response_200 = File(payload=BytesIO(response.content))

        return response_200

    if response.status_code == 206:
        response_206 = File(payload=BytesIO(response.content))

        return response_206

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 416:
        response_416 = ErrorResponse.from_dict(response.json())

        return response_416

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[ErrorResponse | File]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    path: str,
    range_: str | Unset = UNSET,
) -> Response[ErrorResponse | File]:
    """Download file from sandbox

     Downloads a file from the specified path within the sandbox. Supports HTTP
    range requests for resumable downloads and partial content retrieval.
    Returns file as octet-stream with appropriate headers.

    Args:
        path (str):
        range_ (str | Unset):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | File]
    """

    kwargs = _get_kwargs(
        path=path,
        range_=range_,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    path: str,
    range_: str | Unset = UNSET,
) -> ErrorResponse | File | None:
    """Download file from sandbox

     Downloads a file from the specified path within the sandbox. Supports HTTP
    range requests for resumable downloads and partial content retrieval.
    Returns file as octet-stream with appropriate headers.

    Args:
        path (str):
        range_ (str | Unset):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | File
    """

    return sync_detailed(
        client=client,
        path=path,
        range_=range_,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    path: str,
    range_: str | Unset = UNSET,
) -> Response[ErrorResponse | File]:
    """Download file from sandbox

     Downloads a file from the specified path within the sandbox. Supports HTTP
    range requests for resumable downloads and partial content retrieval.
    Returns file as octet-stream with appropriate headers.

    Args:
        path (str):
        range_ (str | Unset):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | File]
    """

    kwargs = _get_kwargs(
        path=path,
        range_=range_,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    path: str,
    range_: str | Unset = UNSET,
) -> ErrorResponse | File | None:
    """Download file from sandbox

     Downloads a file from the specified path within the sandbox. Supports HTTP
    range requests for resumable downloads and partial content retrieval.
    Returns file as octet-stream with appropriate headers.

    Args:
        path (str):
        range_ (str | Unset):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | File
    """

    return (
        await asyncio_detailed(
            client=client,
            path=path,
            range_=range_,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/filesystem/get_files_info.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.get_files_info_response_200 import GetFilesInfoResponse200
from ...types import UNSET, Response


def _get_kwargs(
    *,
    path: list[str],
) -> dict[str, Any]:
    params: dict[str, Any] = {}

    json_path = path

    params["path"] = json_path

    params = {k: v for k, v in params.items() if v is not UNSET and v is not None}

    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/files/info",
        "params": params,
    }

    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> ErrorResponse | GetFilesInfoResponse200 | None:
    if response.status_code == 200:
        response_200 = GetFilesInfoResponse200.from_dict(response.json())

        return response_200

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[ErrorResponse | GetFilesInfoResponse200]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    path: list[str],
) -> Response[ErrorResponse | GetFilesInfoResponse200]:
    """Get file metadata

     Retrieves detailed metadata for one or multiple files including permissions, owner,
    group, size, and modification time. Returns a map of file paths to their corresponding
    FileInfo objects.

    Args:
        path (list[str]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | GetFilesInfoResponse200]
    """

    kwargs = _get_kwargs(
        path=path,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    path: list[str],
) -> ErrorResponse | GetFilesInfoResponse200 | None:
    """Get file metadata

     Retrieves detailed metadata for one or multiple files including permissions, owner,
    group, size, and modification time. Returns a map of file paths to their corresponding
    FileInfo objects.

    Args:
        path (list[str]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | GetFilesInfoResponse200
    """

    return sync_detailed(
        client=client,
        path=path,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    path: list[str],
) -> Response[ErrorResponse | GetFilesInfoResponse200]:
    """Get file metadata

     Retrieves detailed metadata for one or multiple files including permissions, owner,
    group, size, and modification time. Returns a map of file paths to their corresponding
    FileInfo objects.

    Args:
        path (list[str]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | GetFilesInfoResponse200]
    """

    kwargs = _get_kwargs(
        path=path,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    path: list[str],
) -> ErrorResponse | GetFilesInfoResponse200 | None:
    """Get file metadata

     Retrieves detailed metadata for one or multiple files including permissions, owner,
    group, size, and modification time. Returns a map of file paths to their corresponding
    FileInfo objects.

    Args:
        path (list[str]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | GetFilesInfoResponse200
    """

    return (
        await asyncio_detailed(
            client=client,
            path=path,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/filesystem/make_dirs.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.make_dirs_body import MakeDirsBody
from ...types import Response


def _get_kwargs(
    *,
    body: MakeDirsBody,
) -> dict[str, Any]:
    headers: dict[str, Any] = {}

    _kwargs: dict[str, Any] = {
        "method": "post",
        "url": "/directories",
    }

    _kwargs["json"] = body.to_dict()

    headers["Content-Type"] = "application/json"

    _kwargs["headers"] = headers
    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = cast(Any, None)
        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: MakeDirsBody,
) -> Response[Any | ErrorResponse]:
    """Create directories

     Creates one or multiple directories with specified permissions. Creates parent
    directories as needed (similar to mkdir -p). Accepts a map of directory paths
    to permission objects.

    Args:
        body (MakeDirsBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    body: MakeDirsBody,
) -> Any | ErrorResponse | None:
    """Create directories

     Creates one or multiple directories with specified permissions. Creates parent
    directories as needed (similar to mkdir -p). Accepts a map of directory paths
    to permission objects.

    Args:
        body (MakeDirsBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        client=client,
        body=body,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: MakeDirsBody,
) -> Response[Any | ErrorResponse]:
    """Create directories

     Creates one or multiple directories with specified permissions. Creates parent
    directories as needed (similar to mkdir -p). Accepts a map of directory paths
    to permission objects.

    Args:
        body (MakeDirsBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    body: MakeDirsBody,
) -> Any | ErrorResponse | None:
    """Create directories

     Creates one or multiple directories with specified permissions. Creates parent
    directories as needed (similar to mkdir -p). Accepts a map of directory paths
    to permission objects.

    Args:
        body (MakeDirsBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            body=body,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/filesystem/remove_dirs.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...types import UNSET, Response


def _get_kwargs(
    *,
    path: list[str],
) -> dict[str, Any]:
    params: dict[str, Any] = {}

    json_path = path

    params["path"] = json_path

    params = {k: v for k, v in params.items() if v is not UNSET and v is not None}

    _kwargs: dict[str, Any] = {
        "method": "delete",
        "url": "/directories",
        "params": params,
    }

    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = cast(Any, None)
        return response_200

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    path: list[str],
) -> Response[Any | ErrorResponse]:
    """Delete directories

     Recursively deletes one or multiple directories and all their contents.
    Similar to rm -rf. Use with caution as this operation cannot be undone.

    Args:
        path (list[str]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        path=path,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    path: list[str],
) -> Any | ErrorResponse | None:
    """Delete directories

     Recursively deletes one or multiple directories and all their contents.
    Similar to rm -rf. Use with caution as this operation cannot be undone.

    Args:
        path (list[str]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        client=client,
        path=path,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    path: list[str],
) -> Response[Any | ErrorResponse]:
    """Delete directories

     Recursively deletes one or multiple directories and all their contents.
    Similar to rm -rf. Use with caution as this operation cannot be undone.

    Args:
        path (list[str]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        path=path,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    path: list[str],
) -> Any | ErrorResponse | None:
    """Delete directories

     Recursively deletes one or multiple directories and all their contents.
    Similar to rm -rf. Use with caution as this operation cannot be undone.

    Args:
        path (list[str]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            path=path,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/filesystem/remove_files.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...types import UNSET, Response


def _get_kwargs(
    *,
    path: list[str],
) -> dict[str, Any]:
    params: dict[str, Any] = {}

    json_path = path

    params["path"] = json_path

    params = {k: v for k, v in params.items() if v is not UNSET and v is not None}

    _kwargs: dict[str, Any] = {
        "method": "delete",
        "url": "/files",
        "params": params,
    }

    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = cast(Any, None)
        return response_200

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    path: list[str],
) -> Response[Any | ErrorResponse]:
    """Delete files

     Deletes one or multiple files from the sandbox. Only removes files, not directories.
    Use RemoveDirs for directory removal.

    Args:
        path (list[str]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        path=path,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    path: list[str],
) -> Any | ErrorResponse | None:
    """Delete files

     Deletes one or multiple files from the sandbox. Only removes files, not directories.
    Use RemoveDirs for directory removal.

    Args:
        path (list[str]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        client=client,
        path=path,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    path: list[str],
) -> Response[Any | ErrorResponse]:
    """Delete files

     Deletes one or multiple files from the sandbox. Only removes files, not directories.
    Use RemoveDirs for directory removal.

    Args:
        path (list[str]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        path=path,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    path: list[str],
) -> Any | ErrorResponse | None:
    """Delete files

     Deletes one or multiple files from the sandbox. Only removes files, not directories.
    Use RemoveDirs for directory removal.

    Args:
        path (list[str]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            path=path,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/filesystem/rename_files.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.rename_file_item import RenameFileItem
from ...types import Response


def _get_kwargs(
    *,
    body: list[RenameFileItem],
) -> dict[str, Any]:
    headers: dict[str, Any] = {}

    _kwargs: dict[str, Any] = {
        "method": "post",
        "url": "/files/mv",
    }

    _kwargs["json"] = []
    for body_item_data in body:
        body_item = body_item_data.to_dict()
        _kwargs["json"].append(body_item)

    headers["Content-Type"] = "application/json"

    _kwargs["headers"] = headers
    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = cast(Any, None)
        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: list[RenameFileItem],
) -> Response[Any | ErrorResponse]:
    """Rename or move files

     Renames or moves one or multiple files to new paths. Can be used for both
    renaming within the same directory and moving to different directories.
    Target directory must exist.

    Args:
        body (list[RenameFileItem]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    body: list[RenameFileItem],
) -> Any | ErrorResponse | None:
    """Rename or move files

     Renames or moves one or multiple files to new paths. Can be used for both
    renaming within the same directory and moving to different directories.
    Target directory must exist.

    Args:
        body (list[RenameFileItem]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        client=client,
        body=body,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: list[RenameFileItem],
) -> Response[Any | ErrorResponse]:
    """Rename or move files

     Renames or moves one or multiple files to new paths. Can be used for both
    renaming within the same directory and moving to different directories.
    Target directory must exist.

    Args:
        body (list[RenameFileItem]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    body: list[RenameFileItem],
) -> Any | ErrorResponse | None:
    """Rename or move files

     Renames or moves one or multiple files to new paths. Can be used for both
    renaming within the same directory and moving to different directories.
    Target directory must exist.

    Args:
        body (list[RenameFileItem]):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            body=body,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/filesystem/replace_content.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.replace_content_body import ReplaceContentBody
from ...types import Response


def _get_kwargs(
    *,
    body: ReplaceContentBody,
) -> dict[str, Any]:
    headers: dict[str, Any] = {}

    _kwargs: dict[str, Any] = {
        "method": "post",
        "url": "/files/replace",
    }

    _kwargs["json"] = body.to_dict()

    headers["Content-Type"] = "application/json"

    _kwargs["headers"] = headers
    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = cast(Any, None)
        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: ReplaceContentBody,
) -> Response[Any | ErrorResponse]:
    """Replace file content

     Performs text replacement in one or multiple files. Replaces all occurrences
    of the old string with the new string (similar to strings.ReplaceAll).
    Preserves file permissions. Useful for batch text substitution across files.

    Args:
        body (ReplaceContentBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    body: ReplaceContentBody,
) -> Any | ErrorResponse | None:
    """Replace file content

     Performs text replacement in one or multiple files. Replaces all occurrences
    of the old string with the new string (similar to strings.ReplaceAll).
    Preserves file permissions. Useful for batch text substitution across files.

    Args:
        body (ReplaceContentBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        client=client,
        body=body,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: ReplaceContentBody,
) -> Response[Any | ErrorResponse]:
    """Replace file content

     Performs text replacement in one or multiple files. Replaces all occurrences
    of the old string with the new string (similar to strings.ReplaceAll).
    Preserves file permissions. Useful for batch text substitution across files.

    Args:
        body (ReplaceContentBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    body: ReplaceContentBody,
) -> Any | ErrorResponse | None:
    """Replace file content

     Performs text replacement in one or multiple files. Replaces all occurrences
    of the old string with the new string (similar to strings.ReplaceAll).
    Preserves file permissions. Useful for batch text substitution across files.

    Args:
        body (ReplaceContentBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            body=body,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/filesystem/search_files.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.file_info import FileInfo
from ...types import UNSET, Response, Unset


def _get_kwargs(
    *,
    path: str,
    pattern: str | Unset = "**",
) -> dict[str, Any]:
    params: dict[str, Any] = {}

    params["path"] = path

    params["pattern"] = pattern

    params = {k: v for k, v in params.items() if v is not UNSET and v is not None}

    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/files/search",
        "params": params,
    }

    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> ErrorResponse | list[FileInfo] | None:
    if response.status_code == 200:
        response_200 = []
        _response_200 = response.json()
        for response_200_item_data in _response_200:
            response_200_item = FileInfo.from_dict(response_200_item_data)

            response_200.append(response_200_item)

        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[ErrorResponse | list[FileInfo]]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    path: str,
    pattern: str | Unset = "**",
) -> Response[ErrorResponse | list[FileInfo]]:
    """Search for files

     Searches for files matching a glob pattern within a specified directory and
    its subdirectories. Returns file metadata including path, permissions, owner,
    and group. Supports glob patterns like **, *.txt, etc. Default pattern is ** (all files).

    Args:
        path (str):
        pattern (str | Unset):  Default: '**'.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | list[FileInfo]]
    """

    kwargs = _get_kwargs(
        path=path,
        pattern=pattern,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    path: str,
    pattern: str | Unset = "**",
) -> ErrorResponse | list[FileInfo] | None:
    """Search for files

     Searches for files matching a glob pattern within a specified directory and
    its subdirectories. Returns file metadata including path, permissions, owner,
    and group. Supports glob patterns like **, *.txt, etc. Default pattern is ** (all files).

    Args:
        path (str):
        pattern (str | Unset):  Default: '**'.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | list[FileInfo]
    """

    return sync_detailed(
        client=client,
        path=path,
        pattern=pattern,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    path: str,
    pattern: str | Unset = "**",
) -> Response[ErrorResponse | list[FileInfo]]:
    """Search for files

     Searches for files matching a glob pattern within a specified directory and
    its subdirectories. Returns file metadata including path, permissions, owner,
    and group. Supports glob patterns like **, *.txt, etc. Default pattern is ** (all files).

    Args:
        path (str):
        pattern (str | Unset):  Default: '**'.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | list[FileInfo]]
    """

    kwargs = _get_kwargs(
        path=path,
        pattern=pattern,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    path: str,
    pattern: str | Unset = "**",
) -> ErrorResponse | list[FileInfo] | None:
    """Search for files

     Searches for files matching a glob pattern within a specified directory and
    its subdirectories. Returns file metadata including path, permissions, owner,
    and group. Supports glob patterns like **, *.txt, etc. Default pattern is ** (all files).

    Args:
        path (str):
        pattern (str | Unset):  Default: '**'.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | list[FileInfo]
    """

    return (
        await asyncio_detailed(
            client=client,
            path=path,
            pattern=pattern,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/filesystem/upload_file.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.upload_file_body import UploadFileBody
from ...types import Response


def _get_kwargs(
    *,
    body: UploadFileBody,
) -> dict[str, Any]:
    headers: dict[str, Any] = {}

    _kwargs: dict[str, Any] = {
        "method": "post",
        "url": "/files/upload",
    }

    _kwargs["files"] = body.to_multipart()

    _kwargs["headers"] = headers
    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = cast(Any, None)
        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: UploadFileBody,
) -> Response[Any | ErrorResponse]:
    """Upload files to sandbox

     Uploads one or multiple files to specified paths within the sandbox.
    Reads metadata and file content from multipart form parts in sequence.
    Each file upload consists of two parts: a metadata part (JSON) followed
    by the actual file part.

    Args:
        body (UploadFileBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    body: UploadFileBody,
) -> Any | ErrorResponse | None:
    """Upload files to sandbox

     Uploads one or multiple files to specified paths within the sandbox.
    Reads metadata and file content from multipart form parts in sequence.
    Each file upload consists of two parts: a metadata part (JSON) followed
    by the actual file part.

    Args:
        body (UploadFileBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        client=client,
        body=body,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: UploadFileBody,
) -> Response[Any | ErrorResponse]:
    """Upload files to sandbox

     Uploads one or multiple files to specified paths within the sandbox.
    Reads metadata and file content from multipart form parts in sequence.
    Each file upload consists of two parts: a metadata part (JSON) followed
    by the actual file part.

    Args:
        body (UploadFileBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    body: UploadFileBody,
) -> Any | ErrorResponse | None:
    """Upload files to sandbox

     Uploads one or multiple files to specified paths within the sandbox.
    Reads metadata and file content from multipart form parts in sequence.
    Each file upload consists of two parts: a metadata part (JSON) followed
    by the actual file part.

    Args:
        body (UploadFileBody):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            body=body,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/health/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains endpoint functions for accessing the API"""


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/health/ping.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...types import Response


def _get_kwargs() -> dict[str, Any]:
    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/ping",
    }

    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | None:
    if response.status_code == 200:
        return None

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
) -> Response[Any]:
    """Health check endpoint

     Performs a simple health check to verify that the server is running and responsive.
    Returns HTTP 200 OK status if the server is healthy. This endpoint is typically used
    by load balancers, monitoring systems, and orchestration platforms (like Kubernetes)
    to check service availability.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any]
    """

    kwargs = _get_kwargs()

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
) -> Response[Any]:
    """Health check endpoint

     Performs a simple health check to verify that the server is running and responsive.
    Returns HTTP 200 OK status if the server is healthy. This endpoint is typically used
    by load balancers, monitoring systems, and orchestration platforms (like Kubernetes)
    to check service availability.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any]
    """

    kwargs = _get_kwargs()

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/metric/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains endpoint functions for accessing the API"""


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/metric/get_metrics.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.metrics import Metrics
from ...types import Response


def _get_kwargs() -> dict[str, Any]:
    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/metrics",
    }

    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> ErrorResponse | Metrics | None:
    if response.status_code == 200:
        response_200 = Metrics.from_dict(response.json())

        return response_200

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[ErrorResponse | Metrics]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
) -> Response[ErrorResponse | Metrics]:
    """Get system metrics

     Retrieves current system resource metrics including CPU usage percentage,
    CPU core count, total memory, used memory, and timestamp. Provides a snapshot
    of system resource utilization at the time of request.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | Metrics]
    """

    kwargs = _get_kwargs()

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
) -> ErrorResponse | Metrics | None:
    """Get system metrics

     Retrieves current system resource metrics including CPU usage percentage,
    CPU core count, total memory, used memory, and timestamp. Provides a snapshot
    of system resource utilization at the time of request.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | Metrics
    """

    return sync_detailed(
        client=client,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
) -> Response[ErrorResponse | Metrics]:
    """Get system metrics

     Retrieves current system resource metrics including CPU usage percentage,
    CPU core count, total memory, used memory, and timestamp. Provides a snapshot
    of system resource utilization at the time of request.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | Metrics]
    """

    kwargs = _get_kwargs()

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
) -> ErrorResponse | Metrics | None:
    """Get system metrics

     Retrieves current system resource metrics including CPU usage percentage,
    CPU core count, total memory, used memory, and timestamp. Provides a snapshot
    of system resource utilization at the time of request.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | Metrics
    """

    return (
        await asyncio_detailed(
            client=client,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/api/metric/watch_metrics.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.metrics import Metrics
from ...types import Response


def _get_kwargs() -> dict[str, Any]:
    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/metrics/watch",
    }

    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> ErrorResponse | Metrics | None:
    if response.status_code == 200:
        response_200 = Metrics.from_dict(response.text)

        return response_200

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[ErrorResponse | Metrics]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
) -> Response[ErrorResponse | Metrics]:
    """Watch system metrics in real-time

     Streams system resource metrics in real-time using Server-Sent Events (SSE).
    Updates are sent every second, providing continuous monitoring of CPU usage,
    memory usage, and other system metrics. The connection remains open until
    the client disconnects.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | Metrics]
    """

    kwargs = _get_kwargs()

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
) -> ErrorResponse | Metrics | None:
    """Watch system metrics in real-time

     Streams system resource metrics in real-time using Server-Sent Events (SSE).
    Updates are sent every second, providing continuous monitoring of CPU usage,
    memory usage, and other system metrics. The connection remains open until
    the client disconnects.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | Metrics
    """

    return sync_detailed(
        client=client,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
) -> Response[ErrorResponse | Metrics]:
    """Watch system metrics in real-time

     Streams system resource metrics in real-time using Server-Sent Events (SSE).
    Updates are sent every second, providing continuous monitoring of CPU usage,
    memory usage, and other system metrics. The connection remains open until
    the client disconnects.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | Metrics]
    """

    kwargs = _get_kwargs()

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
) -> ErrorResponse | Metrics | None:
    """Watch system metrics in real-time

     Streams system resource metrics in real-time using Server-Sent Events (SSE).
    Updates are sent every second, providing continuous monitoring of CPU usage,
    memory usage, and other system metrics. The connection remains open until
    the client disconnects.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | Metrics
    """

    return (
        await asyncio_detailed(
            client=client,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/client.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import ssl
from typing import Any

import httpx
from attrs import define, evolve, field


@define
class Client:
    """A class for keeping track of data related to the API

    The following are accepted as keyword arguments and will be used to construct httpx Clients internally:

        ``base_url``: The base URL for the API, all requests are made to a relative path to this URL

        ``cookies``: A dictionary of cookies to be sent with every request

        ``headers``: A dictionary of headers to be sent with every request

        ``timeout``: The maximum amount of a time a request can take. API functions will raise
        httpx.TimeoutException if this is exceeded.

        ``verify_ssl``: Whether or not to verify the SSL certificate of the API server. This should be True in production,
        but can be set to False for testing purposes.

        ``follow_redirects``: Whether or not to follow redirects. Default value is False.

        ``httpx_args``: A dictionary of additional arguments to be passed to the ``httpx.Client`` and ``httpx.AsyncClient`` constructor.


    Attributes:
        raise_on_unexpected_status: Whether or not to raise an errors.UnexpectedStatus if the API returns a
            status code that was not documented in the source OpenAPI document. Can also be provided as a keyword
            argument to the constructor.
    """

    raise_on_unexpected_status: bool = field(default=False, kw_only=True)
    _base_url: str = field(alias="base_url")
    _cookies: dict[str, str] = field(factory=dict, kw_only=True, alias="cookies")
    _headers: dict[str, str] = field(factory=dict, kw_only=True, alias="headers")
    _timeout: httpx.Timeout | None = field(default=None, kw_only=True, alias="timeout")
    _verify_ssl: str | bool | ssl.SSLContext = field(default=True, kw_only=True, alias="verify_ssl")
    _follow_redirects: bool = field(default=False, kw_only=True, alias="follow_redirects")
    _httpx_args: dict[str, Any] = field(factory=dict, kw_only=True, alias="httpx_args")
    _client: httpx.Client | None = field(default=None, init=False)
    _async_client: httpx.AsyncClient | None = field(default=None, init=False)

    def with_headers(self, headers: dict[str, str]) -> "Client":
        """Get a new client matching this one with additional headers"""
        if self._client is not None:
            self._client.headers.update(headers)
        if self._async_client is not None:
            self._async_client.headers.update(headers)
        return evolve(self, headers={**self._headers, **headers})

    def with_cookies(self, cookies: dict[str, str]) -> "Client":
        """Get a new client matching this one with additional cookies"""
        if self._client is not None:
            self._client.cookies.update(cookies)
        if self._async_client is not None:
            self._async_client.cookies.update(cookies)
        return evolve(self, cookies={**self._cookies, **cookies})

    def with_timeout(self, timeout: httpx.Timeout) -> "Client":
        """Get a new client matching this one with a new timeout configuration"""
        if self._client is not None:
            self._client.timeout = timeout
        if self._async_client is not None:
            self._async_client.timeout = timeout
        return evolve(self, timeout=timeout)

    def set_httpx_client(self, client: httpx.Client) -> "Client":
        """Manually set the underlying httpx.Client

        **NOTE**: This will override any other settings on the client, including cookies, headers, and timeout.
        """
        self._client = client
        return self

    def get_httpx_client(self) -> httpx.Client:
        """Get the underlying httpx.Client, constructing a new one if not previously set"""
        if self._client is None:
            self._client = httpx.Client(
                base_url=self._base_url,
                cookies=self._cookies,
                headers=self._headers,
                timeout=self._timeout,
                verify=self._verify_ssl,
                follow_redirects=self._follow_redirects,
                **self._httpx_args,
            )
        return self._client

    def __enter__(self) -> "Client":
        """Enter a context manager for self.client—you cannot enter twice (see httpx docs)"""
        self.get_httpx_client().__enter__()
        return self

    def __exit__(self, *args: Any, **kwargs: Any) -> None:
        """Exit a context manager for internal httpx.Client (see httpx docs)"""
        self.get_httpx_client().__exit__(*args, **kwargs)

    def set_async_httpx_client(self, async_client: httpx.AsyncClient) -> "Client":
        """Manually set the underlying httpx.AsyncClient

        **NOTE**: This will override any other settings on the client, including cookies, headers, and timeout.
        """
        self._async_client = async_client
        return self

    def get_async_httpx_client(self) -> httpx.AsyncClient:
        """Get the underlying httpx.AsyncClient, constructing a new one if not previously set"""
        if self._async_client is None:
            self._async_client = httpx.AsyncClient(
                base_url=self._base_url,
                cookies=self._cookies,
                headers=self._headers,
                timeout=self._timeout,
                verify=self._verify_ssl,
                follow_redirects=self._follow_redirects,
                **self._httpx_args,
            )
        return self._async_client

    async def __aenter__(self) -> "Client":
        """Enter a context manager for underlying httpx.AsyncClient—you cannot enter twice (see httpx docs)"""
        await self.get_async_httpx_client().__aenter__()
        return self

    async def __aexit__(self, *args: Any, **kwargs: Any) -> None:
        """Exit a context manager for underlying httpx.AsyncClient (see httpx docs)"""
        await self.get_async_httpx_client().__aexit__(*args, **kwargs)


@define
class AuthenticatedClient:
    """A Client which has been authenticated for use on secured endpoints

    The following are accepted as keyword arguments and will be used to construct httpx Clients internally:

        ``base_url``: The base URL for the API, all requests are made to a relative path to this URL

        ``cookies``: A dictionary of cookies to be sent with every request

        ``headers``: A dictionary of headers to be sent with every request

        ``timeout``: The maximum amount of a time a request can take. API functions will raise
        httpx.TimeoutException if this is exceeded.

        ``verify_ssl``: Whether or not to verify the SSL certificate of the API server. This should be True in production,
        but can be set to False for testing purposes.

        ``follow_redirects``: Whether or not to follow redirects. Default value is False.

        ``httpx_args``: A dictionary of additional arguments to be passed to the ``httpx.Client`` and ``httpx.AsyncClient`` constructor.


    Attributes:
        raise_on_unexpected_status: Whether or not to raise an errors.UnexpectedStatus if the API returns a
            status code that was not documented in the source OpenAPI document. Can also be provided as a keyword
            argument to the constructor.
        token: The token to use for authentication
        prefix: The prefix to use for the Authorization header
        auth_header_name: The name of the Authorization header
    """

    raise_on_unexpected_status: bool = field(default=False, kw_only=True)
    _base_url: str = field(alias="base_url")
    _cookies: dict[str, str] = field(factory=dict, kw_only=True, alias="cookies")
    _headers: dict[str, str] = field(factory=dict, kw_only=True, alias="headers")
    _timeout: httpx.Timeout | None = field(default=None, kw_only=True, alias="timeout")
    _verify_ssl: str | bool | ssl.SSLContext = field(default=True, kw_only=True, alias="verify_ssl")
    _follow_redirects: bool = field(default=False, kw_only=True, alias="follow_redirects")
    _httpx_args: dict[str, Any] = field(factory=dict, kw_only=True, alias="httpx_args")
    _client: httpx.Client | None = field(default=None, init=False)
    _async_client: httpx.AsyncClient | None = field(default=None, init=False)

    token: str
    prefix: str = "Bearer"
    auth_header_name: str = "Authorization"

    def with_headers(self, headers: dict[str, str]) -> "AuthenticatedClient":
        """Get a new client matching this one with additional headers"""
        if self._client is not None:
            self._client.headers.update(headers)
        if self._async_client is not None:
            self._async_client.headers.update(headers)
        return evolve(self, headers={**self._headers, **headers})

    def with_cookies(self, cookies: dict[str, str]) -> "AuthenticatedClient":
        """Get a new client matching this one with additional cookies"""
        if self._client is not None:
            self._client.cookies.update(cookies)
        if self._async_client is not None:
            self._async_client.cookies.update(cookies)
        return evolve(self, cookies={**self._cookies, **cookies})

    def with_timeout(self, timeout: httpx.Timeout) -> "AuthenticatedClient":
        """Get a new client matching this one with a new timeout configuration"""
        if self._client is not None:
            self._client.timeout = timeout
        if self._async_client is not None:
            self._async_client.timeout = timeout
        return evolve(self, timeout=timeout)

    def set_httpx_client(self, client: httpx.Client) -> "AuthenticatedClient":
        """Manually set the underlying httpx.Client

        **NOTE**: This will override any other settings on the client, including cookies, headers, and timeout.
        """
        self._client = client
        return self

    def get_httpx_client(self) -> httpx.Client:
        """Get the underlying httpx.Client, constructing a new one if not previously set"""
        if self._client is None:
            self._headers[self.auth_header_name] = f"{self.prefix} {self.token}" if self.prefix else self.token
            self._client = httpx.Client(
                base_url=self._base_url,
                cookies=self._cookies,
                headers=self._headers,
                timeout=self._timeout,
                verify=self._verify_ssl,
                follow_redirects=self._follow_redirects,
                **self._httpx_args,
            )
        return self._client

    def __enter__(self) -> "AuthenticatedClient":
        """Enter a context manager for self.client—you cannot enter twice (see httpx docs)"""
        self.get_httpx_client().__enter__()
        return self

    def __exit__(self, *args: Any, **kwargs: Any) -> None:
        """Exit a context manager for internal httpx.Client (see httpx docs)"""
        self.get_httpx_client().__exit__(*args, **kwargs)

    def set_async_httpx_client(self, async_client: httpx.AsyncClient) -> "AuthenticatedClient":
        """Manually set the underlying httpx.AsyncClient

        **NOTE**: This will override any other settings on the client, including cookies, headers, and timeout.
        """
        self._async_client = async_client
        return self

    def get_async_httpx_client(self) -> httpx.AsyncClient:
        """Get the underlying httpx.AsyncClient, constructing a new one if not previously set"""
        if self._async_client is None:
            self._headers[self.auth_header_name] = f"{self.prefix} {self.token}" if self.prefix else self.token
            self._async_client = httpx.AsyncClient(
                base_url=self._base_url,
                cookies=self._cookies,
                headers=self._headers,
                timeout=self._timeout,
                verify=self._verify_ssl,
                follow_redirects=self._follow_redirects,
                **self._httpx_args,
            )
        return self._async_client

    async def __aenter__(self) -> "AuthenticatedClient":
        """Enter a context manager for underlying httpx.AsyncClient—you cannot enter twice (see httpx docs)"""
        await self.get_async_httpx_client().__aenter__()
        return self

    async def __aexit__(self, *args: Any, **kwargs: Any) -> None:
        """Exit a context manager for underlying httpx.AsyncClient (see httpx docs)"""
        await self.get_async_httpx_client().__aexit__(*args, **kwargs)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/errors.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains shared errors types that can be raised from API functions"""


class UnexpectedStatus(Exception):
    """Raised by api functions when the response status an undocumented status and Client.raise_on_unexpected_status is True"""

    def __init__(self, status_code: int, content: bytes):
        self.status_code = status_code
        self.content = content

        super().__init__(
            f"Unexpected status code: {status_code}\n\nResponse content:\n{content.decode(errors='ignore')}"
        )


__all__ = ["UnexpectedStatus"]


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains all the data models used in inputs/outputs"""

from .chmod_files_body import ChmodFilesBody
from .code_context import CodeContext
from .code_context_request import CodeContextRequest
from .command_status_response import CommandStatusResponse
from .error_response import ErrorResponse
from .file_info import FileInfo
from .file_metadata import FileMetadata
from .get_files_info_response_200 import GetFilesInfoResponse200
from .make_dirs_body import MakeDirsBody
from .metrics import Metrics
from .permission import Permission
from .rename_file_item import RenameFileItem
from .replace_content_body import ReplaceContentBody
from .replace_file_content_item import ReplaceFileContentItem
from .run_code_request import RunCodeRequest
from .run_command_request import RunCommandRequest
from .run_command_request_envs import RunCommandRequestEnvs
from .server_stream_event import ServerStreamEvent
from .server_stream_event_error import ServerStreamEventError
from .server_stream_event_results import ServerStreamEventResults
from .server_stream_event_type import ServerStreamEventType
from .upload_file_body import UploadFileBody

__all__ = (
    "ChmodFilesBody",
    "CodeContext",
    "CodeContextRequest",
    "CommandStatusResponse",
    "ErrorResponse",
    "FileInfo",
    "FileMetadata",
    "GetFilesInfoResponse200",
    "MakeDirsBody",
    "Metrics",
    "Permission",
    "RenameFileItem",
    "ReplaceContentBody",
    "ReplaceFileContentItem",
    "RunCodeRequest",
    "RunCommandRequest",
    "RunCommandRequestEnvs",
    "ServerStreamEvent",
    "ServerStreamEventError",
    "ServerStreamEventResults",
    "ServerStreamEventType",
    "UploadFileBody",
)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/chmod_files_body.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

if TYPE_CHECKING:
    from ..models.permission import Permission


T = TypeVar("T", bound="ChmodFilesBody")


@_attrs_define
class ChmodFilesBody:
    """ """

    additional_properties: dict[str, Permission] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        for prop_name, prop in self.additional_properties.items():
            field_dict[prop_name] = prop.to_dict()

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.permission import Permission

        d = dict(src_dict)
        chmod_files_body = cls()

        additional_properties = {}
        for prop_name, prop_dict in d.items():
            additional_property = Permission.from_dict(prop_dict)

            additional_properties[prop_name] = additional_property

        chmod_files_body.additional_properties = additional_properties
        return chmod_files_body

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Permission:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Permission) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/code_context.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

from ..types import UNSET, Unset

T = TypeVar("T", bound="CodeContext")


@_attrs_define
class CodeContext:
    """Code execution context with session identifier

    Attributes:
        language (str): Execution runtime Example: python.
        id (str | Unset): Unique session identifier returned by CreateContext Example: session-abc123.
    """

    language: str
    id: str | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        language = self.language

        id = self.id

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "language": language,
            }
        )
        if id is not UNSET:
            field_dict["id"] = id

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        language = d.pop("language")

        id = d.pop("id", UNSET)

        code_context = cls(
            language=language,
            id=id,
        )

        code_context.additional_properties = d
        return code_context

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/code_context_request.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

from ..types import UNSET, Unset

T = TypeVar("T", bound="CodeContextRequest")


@_attrs_define
class CodeContextRequest:
    """Request to create a code execution context

    Attributes:
        language (str | Unset): Execution runtime (python, bash, java, etc.) Example: python.
    """

    language: str | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        language = self.language

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update({})
        if language is not UNSET:
            field_dict["language"] = language

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        language = d.pop("language", UNSET)

        code_context_request = cls(
            language=language,
        )

        code_context_request.additional_properties = d
        return code_context_request

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/command_status_response.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

import datetime
from collections.abc import Mapping
from typing import Any, TypeVar, cast

from attrs import define as _attrs_define
from attrs import field as _attrs_field
from dateutil.parser import isoparse

from ..types import UNSET, Unset

T = TypeVar("T", bound="CommandStatusResponse")


@_attrs_define
class CommandStatusResponse:
    """Command execution status (foreground or background)

    Attributes:
        id (str | Unset): Command ID returned by RunCommand Example: cmd-abc123.
        content (str | Unset): Original command content Example: ls -la.
        running (bool | Unset): Whether the command is still running
        exit_code (int | None | Unset): Exit code if the command has finished
        error (str | Unset): Error message if the command failed Example: permission denied.
        started_at (datetime.datetime | Unset): Start time in RFC3339 format Example: 2025-12-22T09:08:05Z.
        finished_at (datetime.datetime | None | Unset): Finish time in RFC3339 format (null if still running) Example:
            2025-12-22T09:08:09Z.
    """

    id: str | Unset = UNSET
    content: str | Unset = UNSET
    running: bool | Unset = UNSET
    exit_code: int | None | Unset = UNSET
    error: str | Unset = UNSET
    started_at: datetime.datetime | Unset = UNSET
    finished_at: datetime.datetime | None | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        id = self.id

        content = self.content

        running = self.running

        exit_code: int | None | Unset
        if isinstance(self.exit_code, Unset):
            exit_code = UNSET
        else:
            exit_code = self.exit_code

        error = self.error

        started_at: str | Unset = UNSET
        if not isinstance(self.started_at, Unset):
            started_at = self.started_at.isoformat()

        finished_at: None | str | Unset
        if isinstance(self.finished_at, Unset):
            finished_at = UNSET
        elif isinstance(self.finished_at, datetime.datetime):
            finished_at = self.finished_at.isoformat()
        else:
            finished_at = self.finished_at

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update({})
        if id is not UNSET:
            field_dict["id"] = id
        if content is not UNSET:
            field_dict["content"] = content
        if running is not UNSET:
            field_dict["running"] = running
        if exit_code is not UNSET:
            field_dict["exit_code"] = exit_code
        if error is not UNSET:
            field_dict["error"] = error
        if started_at is not UNSET:
            field_dict["started_at"] = started_at
        if finished_at is not UNSET:
            field_dict["finished_at"] = finished_at

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        id = d.pop("id", UNSET)

        content = d.pop("content", UNSET)

        running = d.pop("running", UNSET)

        def _parse_exit_code(data: object) -> int | None | Unset:
            if data is None:
                return data
            if isinstance(data, Unset):
                return data
            return cast(int | None | Unset, data)

        exit_code = _parse_exit_code(d.pop("exit_code", UNSET))

        error = d.pop("error", UNSET)

        _started_at = d.pop("started_at", UNSET)
        started_at: datetime.datetime | Unset
        if isinstance(_started_at, Unset):
            started_at = UNSET
        else:
            started_at = isoparse(_started_at)

        def _parse_finished_at(data: object) -> datetime.datetime | None | Unset:
            if data is None:
                return data
            if isinstance(data, Unset):
                return data
            try:
                if not isinstance(data, str):
                    raise TypeError()
                finished_at_type_0 = isoparse(data)

                return finished_at_type_0
            except (TypeError, ValueError, AttributeError, KeyError):
                pass
            return cast(datetime.datetime | None | Unset, data)

        finished_at = _parse_finished_at(d.pop("finished_at", UNSET))

        command_status_response = cls(
            id=id,
            content=content,
            running=running,
            exit_code=exit_code,
            error=error,
            started_at=started_at,
            finished_at=finished_at,
        )

        command_status_response.additional_properties = d
        return command_status_response

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/error_response.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="ErrorResponse")


@_attrs_define
class ErrorResponse:
    """Standard error response format

    Attributes:
        code (str): Error code for programmatic handling Example: INVALID_REQUEST_BODY.
        message (str): Human-readable error message Example: error parsing request, MAYBE invalid body format.
    """

    code: str
    message: str
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        code = self.code

        message = self.message

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "code": code,
                "message": message,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        code = d.pop("code")

        message = d.pop("message")

        error_response = cls(
            code=code,
            message=message,
        )

        error_response.additional_properties = d
        return error_response

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/file_info.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

import datetime
from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field
from dateutil.parser import isoparse

T = TypeVar("T", bound="FileInfo")


@_attrs_define
class FileInfo:
    """File metadata including path and permissions

    Attributes:
        path (str): Absolute file path Example: /workspace/file.txt.
        size (int): File size in bytes Example: 2048.
        modified_at (datetime.datetime): Last modification time Example: 2025-11-16 14:30:45+00:00.
        created_at (datetime.datetime): File creation time Example: 2025-11-16 14:30:45+00:00.
        owner (str): File owner username Example: admin.
        group (str): File group name Example: admin.
        mode (int): File permissions in octal format Example: 755.
    """

    path: str
    size: int
    modified_at: datetime.datetime
    created_at: datetime.datetime
    owner: str
    group: str
    mode: int
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        path = self.path

        size = self.size

        modified_at = self.modified_at.isoformat()

        created_at = self.created_at.isoformat()

        owner = self.owner

        group = self.group

        mode = self.mode

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "path": path,
                "size": size,
                "modified_at": modified_at,
                "created_at": created_at,
                "owner": owner,
                "group": group,
                "mode": mode,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        path = d.pop("path")

        size = d.pop("size")

        modified_at = isoparse(d.pop("modified_at"))

        created_at = isoparse(d.pop("created_at"))

        owner = d.pop("owner")

        group = d.pop("group")

        mode = d.pop("mode")

        file_info = cls(
            path=path,
            size=size,
            modified_at=modified_at,
            created_at=created_at,
            owner=owner,
            group=group,
            mode=mode,
        )

        file_info.additional_properties = d
        return file_info

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/file_metadata.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

from ..types import UNSET, Unset

T = TypeVar("T", bound="FileMetadata")


@_attrs_define
class FileMetadata:
    """File metadata for upload operations

    Attributes:
        path (str | Unset): Target file path Example: /workspace/upload.txt.
        owner (str | Unset): File owner Example: admin.
        group (str | Unset): File group Example: admin.
        mode (int | Unset): File permissions in octal Example: 755.
    """

    path: str | Unset = UNSET
    owner: str | Unset = UNSET
    group: str | Unset = UNSET
    mode: int | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        path = self.path

        owner = self.owner

        group = self.group

        mode = self.mode

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update({})
        if path is not UNSET:
            field_dict["path"] = path
        if owner is not UNSET:
            field_dict["owner"] = owner
        if group is not UNSET:
            field_dict["group"] = group
        if mode is not UNSET:
            field_dict["mode"] = mode

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        path = d.pop("path", UNSET)

        owner = d.pop("owner", UNSET)

        group = d.pop("group", UNSET)

        mode = d.pop("mode", UNSET)

        file_metadata = cls(
            path=path,
            owner=owner,
            group=group,
            mode=mode,
        )

        file_metadata.additional_properties = d
        return file_metadata

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/get_files_info_response_200.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

if TYPE_CHECKING:
    from ..models.file_info import FileInfo


T = TypeVar("T", bound="GetFilesInfoResponse200")


@_attrs_define
class GetFilesInfoResponse200:
    """ """

    additional_properties: dict[str, FileInfo] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        for prop_name, prop in self.additional_properties.items():
            field_dict[prop_name] = prop.to_dict()

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.file_info import FileInfo

        d = dict(src_dict)
        get_files_info_response_200 = cls()

        additional_properties = {}
        for prop_name, prop_dict in d.items():
            additional_property = FileInfo.from_dict(prop_dict)

            additional_properties[prop_name] = additional_property

        get_files_info_response_200.additional_properties = additional_properties
        return get_files_info_response_200

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> FileInfo:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: FileInfo) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/make_dirs_body.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

if TYPE_CHECKING:
    from ..models.permission import Permission


T = TypeVar("T", bound="MakeDirsBody")


@_attrs_define
class MakeDirsBody:
    """ """

    additional_properties: dict[str, Permission] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        for prop_name, prop in self.additional_properties.items():
            field_dict[prop_name] = prop.to_dict()

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.permission import Permission

        d = dict(src_dict)
        make_dirs_body = cls()

        additional_properties = {}
        for prop_name, prop_dict in d.items():
            additional_property = Permission.from_dict(prop_dict)

            additional_properties[prop_name] = additional_property

        make_dirs_body.additional_properties = additional_properties
        return make_dirs_body

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Permission:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Permission) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/metrics.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="Metrics")


@_attrs_define
class Metrics:
    """System resource usage metrics

    Attributes:
        cpu_count (float): Number of CPU cores Example: 4.0.
        cpu_used_pct (float): CPU usage percentage Example: 45.5.
        mem_total_mib (float): Total memory in MiB Example: 8192.0.
        mem_used_mib (float): Used memory in MiB Example: 4096.0.
        timestamp (int): Timestamp when metrics were collected (Unix milliseconds) Example: 1700000000000.
    """

    cpu_count: float
    cpu_used_pct: float
    mem_total_mib: float
    mem_used_mib: float
    timestamp: int
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        cpu_count = self.cpu_count

        cpu_used_pct = self.cpu_used_pct

        mem_total_mib = self.mem_total_mib

        mem_used_mib = self.mem_used_mib

        timestamp = self.timestamp

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "cpu_count": cpu_count,
                "cpu_used_pct": cpu_used_pct,
                "mem_total_mib": mem_total_mib,
                "mem_used_mib": mem_used_mib,
                "timestamp": timestamp,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        cpu_count = d.pop("cpu_count")

        cpu_used_pct = d.pop("cpu_used_pct")

        mem_total_mib = d.pop("mem_total_mib")

        mem_used_mib = d.pop("mem_used_mib")

        timestamp = d.pop("timestamp")

        metrics = cls(
            cpu_count=cpu_count,
            cpu_used_pct=cpu_used_pct,
            mem_total_mib=mem_total_mib,
            mem_used_mib=mem_used_mib,
            timestamp=timestamp,
        )

        metrics.additional_properties = d
        return metrics

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/permission.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

from ..types import UNSET, Unset

T = TypeVar("T", bound="Permission")


@_attrs_define
class Permission:
    """File ownership and mode settings

    Attributes:
        mode (int): Permission mode in octal format (e.g., 644, 755) Default: 755. Example: 755.
        owner (str | Unset): Owner username Example: root.
        group (str | Unset): Group name Example: root.
    """

    mode: int = 755
    owner: str | Unset = UNSET
    group: str | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        mode = self.mode

        owner = self.owner

        group = self.group

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "mode": mode,
            }
        )
        if owner is not UNSET:
            field_dict["owner"] = owner
        if group is not UNSET:
            field_dict["group"] = group

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        mode = d.pop("mode")

        owner = d.pop("owner", UNSET)

        group = d.pop("group", UNSET)

        permission = cls(
            mode=mode,
            owner=owner,
            group=group,
        )

        permission.additional_properties = d
        return permission

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/rename_file_item.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="RenameFileItem")


@_attrs_define
class RenameFileItem:
    """File rename/move operation

    Attributes:
        src (str): Source file path Example: /workspace/old.txt.
        dest (str): Destination file path Example: /workspace/new.txt.
    """

    src: str
    dest: str
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        src = self.src

        dest = self.dest

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "src": src,
                "dest": dest,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        src = d.pop("src")

        dest = d.pop("dest")

        rename_file_item = cls(
            src=src,
            dest=dest,
        )

        rename_file_item.additional_properties = d
        return rename_file_item

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/replace_content_body.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

if TYPE_CHECKING:
    from ..models.replace_file_content_item import ReplaceFileContentItem


T = TypeVar("T", bound="ReplaceContentBody")


@_attrs_define
class ReplaceContentBody:
    """ """

    additional_properties: dict[str, ReplaceFileContentItem] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        for prop_name, prop in self.additional_properties.items():
            field_dict[prop_name] = prop.to_dict()

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.replace_file_content_item import ReplaceFileContentItem

        d = dict(src_dict)
        replace_content_body = cls()

        additional_properties = {}
        for prop_name, prop_dict in d.items():
            additional_property = ReplaceFileContentItem.from_dict(prop_dict)

            additional_properties[prop_name] = additional_property

        replace_content_body.additional_properties = additional_properties
        return replace_content_body

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> ReplaceFileContentItem:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: ReplaceFileContentItem) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/replace_file_content_item.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="ReplaceFileContentItem")


@_attrs_define
class ReplaceFileContentItem:
    """Content replacement operation

    Attributes:
        old (str): String to be replaced Example: localhost.
        new (str): Replacement string Example: 0.0.0.0.
    """

    old: str
    new: str
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        old = self.old

        new = self.new

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "old": old,
                "new": new,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        old = d.pop("old")

        new = d.pop("new")

        replace_file_content_item = cls(
            old=old,
            new=new,
        )

        replace_file_content_item.additional_properties = d
        return replace_file_content_item

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/run_code_request.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

from ..types import UNSET, Unset

if TYPE_CHECKING:
    from ..models.code_context import CodeContext


T = TypeVar("T", bound="RunCodeRequest")


@_attrs_define
class RunCodeRequest:
    """Request to execute code in a context

    Attributes:
        code (str): Source code to execute Example: import numpy as np
            result = np.array([1, 2, 3])
            print(result)
            .
        context (CodeContext | Unset): Code execution context with session identifier
    """

    code: str
    context: CodeContext | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        code = self.code

        context: dict[str, Any] | Unset = UNSET
        if not isinstance(self.context, Unset):
            context = self.context.to_dict()

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "code": code,
            }
        )
        if context is not UNSET:
            field_dict["context"] = context

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.code_context import CodeContext

        d = dict(src_dict)
        code = d.pop("code")

        _context = d.pop("context", UNSET)
        context: CodeContext | Unset
        if isinstance(_context, Unset):
            context = UNSET
        else:
            context = CodeContext.from_dict(_context)

        run_code_request = cls(
            code=code,
            context=context,
        )

        run_code_request.additional_properties = d
        return run_code_request

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/run_command_request.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

from ..types import UNSET, Unset

if TYPE_CHECKING:
    from ..models.run_command_request_envs import RunCommandRequestEnvs


T = TypeVar("T", bound="RunCommandRequest")


@_attrs_define
class RunCommandRequest:
    """Request to execute a shell command

    Attributes:
        command (str): Shell command to execute Example: ls -la /workspace.
        cwd (str | Unset): Working directory for command execution Example: /workspace.
        background (bool | Unset): Whether to run command in detached mode Default: False.
        timeout (int | Unset): Maximum allowed execution time in milliseconds before the command is forcefully
            terminated by the server. If omitted, the server will not enforce any timeout. Example: 60000.
        uid (int | Unset): Unix user ID used to run the command. If `gid` is provided, `uid` is required.
             Example: 1000.
        gid (int | Unset): Unix group ID used to run the command. Requires `uid` to be provided.
             Example: 1000.
        envs (RunCommandRequestEnvs | Unset): Environment variables injected into the command process. Example: {'PATH':
            '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin', 'PYTHONUNBUFFERED': '1'}.
    """

    command: str
    cwd: str | Unset = UNSET
    background: bool | Unset = False
    timeout: int | Unset = UNSET
    uid: int | Unset = UNSET
    gid: int | Unset = UNSET
    envs: RunCommandRequestEnvs | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        command = self.command

        cwd = self.cwd

        background = self.background

        timeout = self.timeout

        uid = self.uid

        gid = self.gid

        envs: dict[str, Any] | Unset = UNSET
        if not isinstance(self.envs, Unset):
            envs = self.envs.to_dict()

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "command": command,
            }
        )
        if cwd is not UNSET:
            field_dict["cwd"] = cwd
        if background is not UNSET:
            field_dict["background"] = background
        if timeout is not UNSET:
            field_dict["timeout"] = timeout
        if uid is not UNSET:
            field_dict["uid"] = uid
        if gid is not UNSET:
            field_dict["gid"] = gid
        if envs is not UNSET:
            field_dict["envs"] = envs

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.run_command_request_envs import RunCommandRequestEnvs

        d = dict(src_dict)
        command = d.pop("command")

        cwd = d.pop("cwd", UNSET)

        background = d.pop("background", UNSET)

        timeout = d.pop("timeout", UNSET)

        uid = d.pop("uid", UNSET)

        gid = d.pop("gid", UNSET)

        _envs = d.pop("envs", UNSET)
        envs: RunCommandRequestEnvs | Unset
        if isinstance(_envs, Unset):
            envs = UNSET
        else:
            envs = RunCommandRequestEnvs.from_dict(_envs)

        run_command_request = cls(
            command=command,
            cwd=cwd,
            background=background,
            timeout=timeout,
            uid=uid,
            gid=gid,
            envs=envs,
        )

        run_command_request.additional_properties = d
        return run_command_request

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/run_command_request_envs.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="RunCommandRequestEnvs")


@_attrs_define
class RunCommandRequestEnvs:
    """Environment variables injected into the command process.

    Example:
        {'PATH': '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin', 'PYTHONUNBUFFERED': '1'}

    """

    additional_properties: dict[str, str] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        run_command_request_envs = cls()

        run_command_request_envs.additional_properties = d
        return run_command_request_envs

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> str:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: str) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/server_stream_event.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

from ..models.server_stream_event_type import ServerStreamEventType
from ..types import UNSET, Unset

if TYPE_CHECKING:
    from ..models.server_stream_event_error import ServerStreamEventError
    from ..models.server_stream_event_results import ServerStreamEventResults


T = TypeVar("T", bound="ServerStreamEvent")


@_attrs_define
class ServerStreamEvent:
    """Server-sent event for streaming execution output

    Attributes:
        type_ (ServerStreamEventType | Unset): Event type for client-side handling Example: stdout.
        text (str | Unset): Textual data for status, init, and stream events Example: Hello, World!
            .
        execution_count (int | Unset): Cell execution number in the session Example: 1.
        execution_time (int | Unset): Execution duration in milliseconds Example: 150.
        timestamp (int | Unset): When the event was generated (Unix milliseconds) Example: 1700000000000.
        results (ServerStreamEventResults | Unset): Execution output in various MIME types (e.g., "text/plain",
            "text/html") Example: {'text/plain': '4'}.
        error (ServerStreamEventError | Unset): Execution error details if an error occurred
    """

    type_: ServerStreamEventType | Unset = UNSET
    text: str | Unset = UNSET
    execution_count: int | Unset = UNSET
    execution_time: int | Unset = UNSET
    timestamp: int | Unset = UNSET
    results: ServerStreamEventResults | Unset = UNSET
    error: ServerStreamEventError | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        type_: str | Unset = UNSET
        if not isinstance(self.type_, Unset):
            type_ = self.type_.value

        text = self.text

        execution_count = self.execution_count

        execution_time = self.execution_time

        timestamp = self.timestamp

        results: dict[str, Any] | Unset = UNSET
        if not isinstance(self.results, Unset):
            results = self.results.to_dict()

        error: dict[str, Any] | Unset = UNSET
        if not isinstance(self.error, Unset):
            error = self.error.to_dict()

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update({})
        if type_ is not UNSET:
            field_dict["type"] = type_
        if text is not UNSET:
            field_dict["text"] = text
        if execution_count is not UNSET:
            field_dict["execution_count"] = execution_count
        if execution_time is not UNSET:
            field_dict["execution_time"] = execution_time
        if timestamp is not UNSET:
            field_dict["timestamp"] = timestamp
        if results is not UNSET:
            field_dict["results"] = results
        if error is not UNSET:
            field_dict["error"] = error

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.server_stream_event_error import ServerStreamEventError
        from ..models.server_stream_event_results import ServerStreamEventResults

        d = dict(src_dict)
        _type_ = d.pop("type", UNSET)
        type_: ServerStreamEventType | Unset
        if isinstance(_type_, Unset):
            type_ = UNSET
        else:
            type_ = ServerStreamEventType(_type_)

        text = d.pop("text", UNSET)

        execution_count = d.pop("execution_count", UNSET)

        execution_time = d.pop("execution_time", UNSET)

        timestamp = d.pop("timestamp", UNSET)

        _results = d.pop("results", UNSET)
        results: ServerStreamEventResults | Unset
        if isinstance(_results, Unset):
            results = UNSET
        else:
            results = ServerStreamEventResults.from_dict(_results)

        _error = d.pop("error", UNSET)
        error: ServerStreamEventError | Unset
        if isinstance(_error, Unset):
            error = UNSET
        else:
            error = ServerStreamEventError.from_dict(_error)

        server_stream_event = cls(
            type_=type_,
            text=text,
            execution_count=execution_count,
            execution_time=execution_time,
            timestamp=timestamp,
            results=results,
            error=error,
        )

        server_stream_event.additional_properties = d
        return server_stream_event

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/server_stream_event_error.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar, cast

from attrs import define as _attrs_define
from attrs import field as _attrs_field

from ..types import UNSET, Unset

T = TypeVar("T", bound="ServerStreamEventError")


@_attrs_define
class ServerStreamEventError:
    """Execution error details if an error occurred

    Attributes:
        ename (str | Unset): Error name/type Example: NameError.
        evalue (str | Unset): Error value/message Example: name 'undefined_var' is not defined.
        traceback (list[str] | Unset): Stack trace lines Example: ['Traceback (most recent call last):', '  File
            "<stdin>", line 1, in <module>', "NameError: name 'undefined_var' is not defined"].
    """

    ename: str | Unset = UNSET
    evalue: str | Unset = UNSET
    traceback: list[str] | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        ename = self.ename

        evalue = self.evalue

        traceback: list[str] | Unset = UNSET
        if not isinstance(self.traceback, Unset):
            traceback = self.traceback

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update({})
        if ename is not UNSET:
            field_dict["ename"] = ename
        if evalue is not UNSET:
            field_dict["evalue"] = evalue
        if traceback is not UNSET:
            field_dict["traceback"] = traceback

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        ename = d.pop("ename", UNSET)

        evalue = d.pop("evalue", UNSET)

        traceback = cast(list[str], d.pop("traceback", UNSET))

        server_stream_event_error = cls(
            ename=ename,
            evalue=evalue,
            traceback=traceback,
        )

        server_stream_event_error.additional_properties = d
        return server_stream_event_error

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/server_stream_event_results.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="ServerStreamEventResults")


@_attrs_define
class ServerStreamEventResults:
    """Execution output in various MIME types (e.g., "text/plain", "text/html")

    Example:
        {'text/plain': '4'}

    """

    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        server_stream_event_results = cls()

        server_stream_event_results.additional_properties = d
        return server_stream_event_results

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/server_stream_event_type.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from enum import Enum


class ServerStreamEventType(str, Enum):
    ERROR = "error"
    EXECUTION_COMPLETE = "execution_complete"
    EXECUTION_COUNT = "execution_count"
    INIT = "init"
    PING = "ping"
    RESULT = "result"
    STATUS = "status"
    STDERR = "stderr"
    STDOUT = "stdout"

    def __str__(self) -> str:
        return str(self.value)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/models/upload_file_body.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from io import BytesIO
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

from .. import types
from ..types import UNSET, File, FileTypes, Unset

T = TypeVar("T", bound="UploadFileBody")


@_attrs_define
class UploadFileBody:
    """
    Attributes:
        metadata (str | Unset): JSON-encoded file metadata (FileMetadata object) Example:
            {"path":"/workspace/file.txt","owner":"admin","group":"admin","mode":755}.
        file (File | Unset): File to upload
    """

    metadata: str | Unset = UNSET
    file: File | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        metadata = self.metadata

        file: FileTypes | Unset = UNSET
        if not isinstance(self.file, Unset):
            file = self.file.to_tuple()

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update({})
        if metadata is not UNSET:
            field_dict["metadata"] = metadata
        if file is not UNSET:
            field_dict["file"] = file

        return field_dict

    def to_multipart(self) -> types.RequestFiles:
        files: types.RequestFiles = []

        if not isinstance(self.metadata, Unset):
            files.append(("metadata", (None, str(self.metadata).encode(), "text/plain")))

        if not isinstance(self.file, Unset):
            files.append(("file", self.file.to_tuple()))

        for prop_name, prop in self.additional_properties.items():
            files.append((prop_name, (None, str(prop).encode(), "text/plain")))

        return files

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        metadata = d.pop("metadata", UNSET)

        _file = d.pop("file", UNSET)
        file: File | Unset
        if isinstance(_file, Unset):
            file = UNSET
        else:
            file = File(payload=BytesIO(_file))

        upload_file_body = cls(
            metadata=metadata,
            file=file,
        )

        upload_file_body.additional_properties = d
        return upload_file_body

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/py.typed
================================================
# Marker file for PEP 561

================================================
FILE: sdks/sandbox/python/src/opensandbox/api/execd/types.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains some shared types for properties"""

from collections.abc import Mapping, MutableMapping
from http import HTTPStatus
from typing import IO, BinaryIO, Generic, Literal, TypeVar

from attrs import define


class Unset:
    def __bool__(self) -> Literal[False]:
        return False


UNSET: Unset = Unset()

# The types that `httpx.Client(files=)` can accept, copied from that library.
FileContent = IO[bytes] | bytes | str
FileTypes = (
    # (filename, file (or bytes), content_type)
    tuple[str | None, FileContent, str | None]
    # (filename, file (or bytes), content_type, headers)
    | tuple[str | None, FileContent, str | None, Mapping[str, str]]
)
RequestFiles = list[tuple[str, FileTypes]]


@define
class File:
    """Contains information for file uploads"""

    payload: BinaryIO
    file_name: str | None = None
    mime_type: str | None = None

    def to_tuple(self) -> FileTypes:
        """Return a tuple representation that httpx will accept for multipart/form-data"""
        return self.file_name, self.payload, self.mime_type


T = TypeVar("T")


@define
class Response(Generic[T]):
    """A response from an endpoint"""

    status_code: HTTPStatus
    content: bytes
    headers: MutableMapping[str, str]
    parsed: T | None


__all__ = ["UNSET", "File", "FileTypes", "RequestFiles", "Response", "Unset"]


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""A client library for accessing OpenSandbox Lifecycle API"""

from .client import AuthenticatedClient, Client

__all__ = (
    "AuthenticatedClient",
    "Client",
)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/api/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains methods for accessing the API"""


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/api/sandboxes/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains endpoint functions for accessing the API"""


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/api/sandboxes/delete_sandboxes_sandbox_id.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast
from urllib.parse import quote

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...types import Response


def _get_kwargs(
    sandbox_id: str,
) -> dict[str, Any]:
    _kwargs: dict[str, Any] = {
        "method": "delete",
        "url": "/sandboxes/{sandbox_id}".format(
            sandbox_id=quote(str(sandbox_id), safe=""),
        ),
    }

    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 204:
        response_204 = cast(Any, None)
        return response_204

    if response.status_code == 401:
        response_401 = ErrorResponse.from_dict(response.json())

        return response_401

    if response.status_code == 403:
        response_403 = ErrorResponse.from_dict(response.json())

        return response_403

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 409:
        response_409 = ErrorResponse.from_dict(response.json())

        return response_409

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[Any | ErrorResponse]:
    """Delete a sandbox

     Delete a sandbox, terminating its execution. The sandbox will transition through Stopping state to
    Terminated.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        sandbox_id=sandbox_id,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Any | ErrorResponse | None:
    """Delete a sandbox

     Delete a sandbox, terminating its execution. The sandbox will transition through Stopping state to
    Terminated.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        sandbox_id=sandbox_id,
        client=client,
    ).parsed


async def asyncio_detailed(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[Any | ErrorResponse]:
    """Delete a sandbox

     Delete a sandbox, terminating its execution. The sandbox will transition through Stopping state to
    Terminated.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        sandbox_id=sandbox_id,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Any | ErrorResponse | None:
    """Delete a sandbox

     Delete a sandbox, terminating its execution. The sandbox will transition through Stopping state to
    Terminated.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            sandbox_id=sandbox_id,
            client=client,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/api/sandboxes/get_sandboxes.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.list_sandboxes_response import ListSandboxesResponse
from ...types import UNSET, Response, Unset


def _get_kwargs(
    *,
    state: list[str] | Unset = UNSET,
    metadata: str | Unset = UNSET,
    page: int | Unset = 1,
    page_size: int | Unset = 20,
) -> dict[str, Any]:
    params: dict[str, Any] = {}

    json_state: list[str] | Unset = UNSET
    if not isinstance(state, Unset):
        json_state = state

    params["state"] = json_state

    params["metadata"] = metadata

    params["page"] = page

    params["pageSize"] = page_size

    params = {k: v for k, v in params.items() if v is not UNSET and v is not None}

    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/sandboxes",
        "params": params,
    }

    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> ErrorResponse | ListSandboxesResponse | None:
    if response.status_code == 200:
        response_200 = ListSandboxesResponse.from_dict(response.json())

        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 401:
        response_401 = ErrorResponse.from_dict(response.json())

        return response_401

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[ErrorResponse | ListSandboxesResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    state: list[str] | Unset = UNSET,
    metadata: str | Unset = UNSET,
    page: int | Unset = 1,
    page_size: int | Unset = 20,
) -> Response[ErrorResponse | ListSandboxesResponse]:
    """List sandboxes

     List all sandboxes with optional filtering and pagination using query parameters.
    All filter conditions use AND logic. Multiple `state` parameters use OR logic within states.

    Args:
        state (list[str] | Unset):
        metadata (str | Unset):
        page (int | Unset):  Default: 1.
        page_size (int | Unset):  Default: 20.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | ListSandboxesResponse]
    """

    kwargs = _get_kwargs(
        state=state,
        metadata=metadata,
        page=page,
        page_size=page_size,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    state: list[str] | Unset = UNSET,
    metadata: str | Unset = UNSET,
    page: int | Unset = 1,
    page_size: int | Unset = 20,
) -> ErrorResponse | ListSandboxesResponse | None:
    """List sandboxes

     List all sandboxes with optional filtering and pagination using query parameters.
    All filter conditions use AND logic. Multiple `state` parameters use OR logic within states.

    Args:
        state (list[str] | Unset):
        metadata (str | Unset):
        page (int | Unset):  Default: 1.
        page_size (int | Unset):  Default: 20.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | ListSandboxesResponse
    """

    return sync_detailed(
        client=client,
        state=state,
        metadata=metadata,
        page=page,
        page_size=page_size,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    state: list[str] | Unset = UNSET,
    metadata: str | Unset = UNSET,
    page: int | Unset = 1,
    page_size: int | Unset = 20,
) -> Response[ErrorResponse | ListSandboxesResponse]:
    """List sandboxes

     List all sandboxes with optional filtering and pagination using query parameters.
    All filter conditions use AND logic. Multiple `state` parameters use OR logic within states.

    Args:
        state (list[str] | Unset):
        metadata (str | Unset):
        page (int | Unset):  Default: 1.
        page_size (int | Unset):  Default: 20.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | ListSandboxesResponse]
    """

    kwargs = _get_kwargs(
        state=state,
        metadata=metadata,
        page=page,
        page_size=page_size,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    state: list[str] | Unset = UNSET,
    metadata: str | Unset = UNSET,
    page: int | Unset = 1,
    page_size: int | Unset = 20,
) -> ErrorResponse | ListSandboxesResponse | None:
    """List sandboxes

     List all sandboxes with optional filtering and pagination using query parameters.
    All filter conditions use AND logic. Multiple `state` parameters use OR logic within states.

    Args:
        state (list[str] | Unset):
        metadata (str | Unset):
        page (int | Unset):  Default: 1.
        page_size (int | Unset):  Default: 20.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | ListSandboxesResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            state=state,
            metadata=metadata,
            page=page,
            page_size=page_size,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/api/sandboxes/get_sandboxes_sandbox_id.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any
from urllib.parse import quote

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.sandbox import Sandbox
from ...types import Response


def _get_kwargs(
    sandbox_id: str,
) -> dict[str, Any]:
    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/sandboxes/{sandbox_id}".format(
            sandbox_id=quote(str(sandbox_id), safe=""),
        ),
    }

    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> ErrorResponse | Sandbox | None:
    if response.status_code == 200:
        response_200 = Sandbox.from_dict(response.json())

        return response_200

    if response.status_code == 401:
        response_401 = ErrorResponse.from_dict(response.json())

        return response_401

    if response.status_code == 403:
        response_403 = ErrorResponse.from_dict(response.json())

        return response_403

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[ErrorResponse | Sandbox]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[ErrorResponse | Sandbox]:
    """Fetch a sandbox by id

     Returns the complete sandbox information including:
    - `id`, `status`, `metadata`, `expiresAt`, `createdAt`: Core information
    - `image`: Container image specification (not included in create response)
    - `entrypoint`: Entry process specification

    This is the complete representation of the sandbox resource.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | Sandbox]
    """

    kwargs = _get_kwargs(
        sandbox_id=sandbox_id,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> ErrorResponse | Sandbox | None:
    """Fetch a sandbox by id

     Returns the complete sandbox information including:
    - `id`, `status`, `metadata`, `expiresAt`, `createdAt`: Core information
    - `image`: Container image specification (not included in create response)
    - `entrypoint`: Entry process specification

    This is the complete representation of the sandbox resource.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | Sandbox
    """

    return sync_detailed(
        sandbox_id=sandbox_id,
        client=client,
    ).parsed


async def asyncio_detailed(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[ErrorResponse | Sandbox]:
    """Fetch a sandbox by id

     Returns the complete sandbox information including:
    - `id`, `status`, `metadata`, `expiresAt`, `createdAt`: Core information
    - `image`: Container image specification (not included in create response)
    - `entrypoint`: Entry process specification

    This is the complete representation of the sandbox resource.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | Sandbox]
    """

    kwargs = _get_kwargs(
        sandbox_id=sandbox_id,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> ErrorResponse | Sandbox | None:
    """Fetch a sandbox by id

     Returns the complete sandbox information including:
    - `id`, `status`, `metadata`, `expiresAt`, `createdAt`: Core information
    - `image`: Container image specification (not included in create response)
    - `entrypoint`: Entry process specification

    This is the complete representation of the sandbox resource.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | Sandbox
    """

    return (
        await asyncio_detailed(
            sandbox_id=sandbox_id,
            client=client,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/api/sandboxes/get_sandboxes_sandbox_id_endpoints_port.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any
from urllib.parse import quote

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.endpoint import Endpoint
from ...models.error_response import ErrorResponse
from ...types import UNSET, Response, Unset


def _get_kwargs(
    sandbox_id: str,
    port: int,
    *,
    use_server_proxy: bool | Unset = False,
) -> dict[str, Any]:
    params: dict[str, Any] = {}

    params["use_server_proxy"] = use_server_proxy

    params = {k: v for k, v in params.items() if v is not UNSET and v is not None}

    _kwargs: dict[str, Any] = {
        "method": "get",
        "url": "/sandboxes/{sandbox_id}/endpoints/{port}".format(
            sandbox_id=quote(str(sandbox_id), safe=""),
            port=quote(str(port), safe=""),
        ),
        "params": params,
    }

    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Endpoint | ErrorResponse | None:
    if response.status_code == 200:
        response_200 = Endpoint.from_dict(response.json())

        return response_200

    if response.status_code == 401:
        response_401 = ErrorResponse.from_dict(response.json())

        return response_401

    if response.status_code == 403:
        response_403 = ErrorResponse.from_dict(response.json())

        return response_403

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[Endpoint | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    sandbox_id: str,
    port: int,
    *,
    client: AuthenticatedClient | Client,
    use_server_proxy: bool | Unset = False,
) -> Response[Endpoint | ErrorResponse]:
    """Get sandbox access endpoint

     Get the public access endpoint URL for accessing a service running on a specific port
    within the sandbox. The service must be listening on the specified port inside
    the sandbox for the endpoint to be available.

    Args:
        sandbox_id (str):
        port (int):
        use_server_proxy (bool | Unset):  Default: False.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Endpoint | ErrorResponse]
    """

    kwargs = _get_kwargs(
        sandbox_id=sandbox_id,
        port=port,
        use_server_proxy=use_server_proxy,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    sandbox_id: str,
    port: int,
    *,
    client: AuthenticatedClient | Client,
    use_server_proxy: bool | Unset = False,
) -> Endpoint | ErrorResponse | None:
    """Get sandbox access endpoint

     Get the public access endpoint URL for accessing a service running on a specific port
    within the sandbox. The service must be listening on the specified port inside
    the sandbox for the endpoint to be available.

    Args:
        sandbox_id (str):
        port (int):
        use_server_proxy (bool | Unset):  Default: False.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Endpoint | ErrorResponse
    """

    return sync_detailed(
        sandbox_id=sandbox_id,
        port=port,
        client=client,
        use_server_proxy=use_server_proxy,
    ).parsed


async def asyncio_detailed(
    sandbox_id: str,
    port: int,
    *,
    client: AuthenticatedClient | Client,
    use_server_proxy: bool | Unset = False,
) -> Response[Endpoint | ErrorResponse]:
    """Get sandbox access endpoint

     Get the public access endpoint URL for accessing a service running on a specific port
    within the sandbox. The service must be listening on the specified port inside
    the sandbox for the endpoint to be available.

    Args:
        sandbox_id (str):
        port (int):
        use_server_proxy (bool | Unset):  Default: False.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Endpoint | ErrorResponse]
    """

    kwargs = _get_kwargs(
        sandbox_id=sandbox_id,
        port=port,
        use_server_proxy=use_server_proxy,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    sandbox_id: str,
    port: int,
    *,
    client: AuthenticatedClient | Client,
    use_server_proxy: bool | Unset = False,
) -> Endpoint | ErrorResponse | None:
    """Get sandbox access endpoint

     Get the public access endpoint URL for accessing a service running on a specific port
    within the sandbox. The service must be listening on the specified port inside
    the sandbox for the endpoint to be available.

    Args:
        sandbox_id (str):
        port (int):
        use_server_proxy (bool | Unset):  Default: False.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Endpoint | ErrorResponse
    """

    return (
        await asyncio_detailed(
            sandbox_id=sandbox_id,
            port=port,
            client=client,
            use_server_proxy=use_server_proxy,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/api/sandboxes/post_sandboxes.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.create_sandbox_request import CreateSandboxRequest
from ...models.create_sandbox_response import CreateSandboxResponse
from ...models.error_response import ErrorResponse
from ...types import Response


def _get_kwargs(
    *,
    body: CreateSandboxRequest,
) -> dict[str, Any]:
    headers: dict[str, Any] = {}

    _kwargs: dict[str, Any] = {
        "method": "post",
        "url": "/sandboxes",
    }

    _kwargs["json"] = body.to_dict()

    headers["Content-Type"] = "application/json"

    _kwargs["headers"] = headers
    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> CreateSandboxResponse | ErrorResponse | None:
    if response.status_code == 202:
        response_202 = CreateSandboxResponse.from_dict(response.json())

        return response_202

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 401:
        response_401 = ErrorResponse.from_dict(response.json())

        return response_401

    if response.status_code == 409:
        response_409 = ErrorResponse.from_dict(response.json())

        return response_409

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[CreateSandboxResponse | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: CreateSandboxRequest,
) -> Response[CreateSandboxResponse | ErrorResponse]:
    """Create a sandbox from a container image

     Creates a new sandbox from a container image with optional resource limits,
    environment variables, and metadata. Sandboxes are provisioned directly from
    the specified image without requiring a pre-created template.

    ## Authentication

    API Key authentication is required via:
    - `OPEN-SANDBOX-API-KEY: <api-key>` header

    Args:
        body (CreateSandboxRequest): Request to create a new sandbox from a container image.

            **Note**: API Key authentication is required via the `OPEN-SANDBOX-API-KEY` header.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[CreateSandboxResponse | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    *,
    client: AuthenticatedClient | Client,
    body: CreateSandboxRequest,
) -> CreateSandboxResponse | ErrorResponse | None:
    """Create a sandbox from a container image

     Creates a new sandbox from a container image with optional resource limits,
    environment variables, and metadata. Sandboxes are provisioned directly from
    the specified image without requiring a pre-created template.

    ## Authentication

    API Key authentication is required via:
    - `OPEN-SANDBOX-API-KEY: <api-key>` header

    Args:
        body (CreateSandboxRequest): Request to create a new sandbox from a container image.

            **Note**: API Key authentication is required via the `OPEN-SANDBOX-API-KEY` header.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        CreateSandboxResponse | ErrorResponse
    """

    return sync_detailed(
        client=client,
        body=body,
    ).parsed


async def asyncio_detailed(
    *,
    client: AuthenticatedClient | Client,
    body: CreateSandboxRequest,
) -> Response[CreateSandboxResponse | ErrorResponse]:
    """Create a sandbox from a container image

     Creates a new sandbox from a container image with optional resource limits,
    environment variables, and metadata. Sandboxes are provisioned directly from
    the specified image without requiring a pre-created template.

    ## Authentication

    API Key authentication is required via:
    - `OPEN-SANDBOX-API-KEY: <api-key>` header

    Args:
        body (CreateSandboxRequest): Request to create a new sandbox from a container image.

            **Note**: API Key authentication is required via the `OPEN-SANDBOX-API-KEY` header.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[CreateSandboxResponse | ErrorResponse]
    """

    kwargs = _get_kwargs(
        body=body,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    *,
    client: AuthenticatedClient | Client,
    body: CreateSandboxRequest,
) -> CreateSandboxResponse | ErrorResponse | None:
    """Create a sandbox from a container image

     Creates a new sandbox from a container image with optional resource limits,
    environment variables, and metadata. Sandboxes are provisioned directly from
    the specified image without requiring a pre-created template.

    ## Authentication

    API Key authentication is required via:
    - `OPEN-SANDBOX-API-KEY: <api-key>` header

    Args:
        body (CreateSandboxRequest): Request to create a new sandbox from a container image.

            **Note**: API Key authentication is required via the `OPEN-SANDBOX-API-KEY` header.

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        CreateSandboxResponse | ErrorResponse
    """

    return (
        await asyncio_detailed(
            client=client,
            body=body,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/api/sandboxes/post_sandboxes_sandbox_id_pause.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast
from urllib.parse import quote

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...types import Response


def _get_kwargs(
    sandbox_id: str,
) -> dict[str, Any]:
    _kwargs: dict[str, Any] = {
        "method": "post",
        "url": "/sandboxes/{sandbox_id}/pause".format(
            sandbox_id=quote(str(sandbox_id), safe=""),
        ),
    }

    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 202:
        response_202 = cast(Any, None)
        return response_202

    if response.status_code == 401:
        response_401 = ErrorResponse.from_dict(response.json())

        return response_401

    if response.status_code == 403:
        response_403 = ErrorResponse.from_dict(response.json())

        return response_403

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 409:
        response_409 = ErrorResponse.from_dict(response.json())

        return response_409

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[Any | ErrorResponse]:
    """Pause execution while retaining state

     Pause a running sandbox while preserving its state. Poll GET /sandboxes/{sandboxId} to track state
    transition to Paused.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        sandbox_id=sandbox_id,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Any | ErrorResponse | None:
    """Pause execution while retaining state

     Pause a running sandbox while preserving its state. Poll GET /sandboxes/{sandboxId} to track state
    transition to Paused.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        sandbox_id=sandbox_id,
        client=client,
    ).parsed


async def asyncio_detailed(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[Any | ErrorResponse]:
    """Pause execution while retaining state

     Pause a running sandbox while preserving its state. Poll GET /sandboxes/{sandboxId} to track state
    transition to Paused.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        sandbox_id=sandbox_id,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Any | ErrorResponse | None:
    """Pause execution while retaining state

     Pause a running sandbox while preserving its state. Poll GET /sandboxes/{sandboxId} to track state
    transition to Paused.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            sandbox_id=sandbox_id,
            client=client,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/api/sandboxes/post_sandboxes_sandbox_id_renew_expiration.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any
from urllib.parse import quote

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...models.renew_sandbox_expiration_request import RenewSandboxExpirationRequest
from ...models.renew_sandbox_expiration_response import RenewSandboxExpirationResponse
from ...types import Response


def _get_kwargs(
    sandbox_id: str,
    *,
    body: RenewSandboxExpirationRequest,
) -> dict[str, Any]:
    headers: dict[str, Any] = {}

    _kwargs: dict[str, Any] = {
        "method": "post",
        "url": "/sandboxes/{sandbox_id}/renew-expiration".format(
            sandbox_id=quote(str(sandbox_id), safe=""),
        ),
    }

    _kwargs["json"] = body.to_dict()

    headers["Content-Type"] = "application/json"

    _kwargs["headers"] = headers
    return _kwargs


def _parse_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> ErrorResponse | RenewSandboxExpirationResponse | None:
    if response.status_code == 200:
        response_200 = RenewSandboxExpirationResponse.from_dict(response.json())

        return response_200

    if response.status_code == 400:
        response_400 = ErrorResponse.from_dict(response.json())

        return response_400

    if response.status_code == 401:
        response_401 = ErrorResponse.from_dict(response.json())

        return response_401

    if response.status_code == 403:
        response_403 = ErrorResponse.from_dict(response.json())

        return response_403

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 409:
        response_409 = ErrorResponse.from_dict(response.json())

        return response_409

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(
    *, client: AuthenticatedClient | Client, response: httpx.Response
) -> Response[ErrorResponse | RenewSandboxExpirationResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
    body: RenewSandboxExpirationRequest,
) -> Response[ErrorResponse | RenewSandboxExpirationResponse]:
    """Renew sandbox expiration

     Renew the absolute expiration time of a sandbox.

    Args:
        sandbox_id (str):
        body (RenewSandboxExpirationRequest):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | RenewSandboxExpirationResponse]
    """

    kwargs = _get_kwargs(
        sandbox_id=sandbox_id,
        body=body,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
    body: RenewSandboxExpirationRequest,
) -> ErrorResponse | RenewSandboxExpirationResponse | None:
    """Renew sandbox expiration

     Renew the absolute expiration time of a sandbox.

    Args:
        sandbox_id (str):
        body (RenewSandboxExpirationRequest):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | RenewSandboxExpirationResponse
    """

    return sync_detailed(
        sandbox_id=sandbox_id,
        client=client,
        body=body,
    ).parsed


async def asyncio_detailed(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
    body: RenewSandboxExpirationRequest,
) -> Response[ErrorResponse | RenewSandboxExpirationResponse]:
    """Renew sandbox expiration

     Renew the absolute expiration time of a sandbox.

    Args:
        sandbox_id (str):
        body (RenewSandboxExpirationRequest):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[ErrorResponse | RenewSandboxExpirationResponse]
    """

    kwargs = _get_kwargs(
        sandbox_id=sandbox_id,
        body=body,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
    body: RenewSandboxExpirationRequest,
) -> ErrorResponse | RenewSandboxExpirationResponse | None:
    """Renew sandbox expiration

     Renew the absolute expiration time of a sandbox.

    Args:
        sandbox_id (str):
        body (RenewSandboxExpirationRequest):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        ErrorResponse | RenewSandboxExpirationResponse
    """

    return (
        await asyncio_detailed(
            sandbox_id=sandbox_id,
            client=client,
            body=body,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/api/sandboxes/post_sandboxes_sandbox_id_resume.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from http import HTTPStatus
from typing import Any, cast
from urllib.parse import quote

import httpx

from ... import errors
from ...client import AuthenticatedClient, Client
from ...models.error_response import ErrorResponse
from ...types import Response


def _get_kwargs(
    sandbox_id: str,
) -> dict[str, Any]:
    _kwargs: dict[str, Any] = {
        "method": "post",
        "url": "/sandboxes/{sandbox_id}/resume".format(
            sandbox_id=quote(str(sandbox_id), safe=""),
        ),
    }

    return _kwargs


def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ErrorResponse | None:
    if response.status_code == 202:
        response_202 = cast(Any, None)
        return response_202

    if response.status_code == 401:
        response_401 = ErrorResponse.from_dict(response.json())

        return response_401

    if response.status_code == 403:
        response_403 = ErrorResponse.from_dict(response.json())

        return response_403

    if response.status_code == 404:
        response_404 = ErrorResponse.from_dict(response.json())

        return response_404

    if response.status_code == 409:
        response_409 = ErrorResponse.from_dict(response.json())

        return response_409

    if response.status_code == 500:
        response_500 = ErrorResponse.from_dict(response.json())

        return response_500

    if client.raise_on_unexpected_status:
        raise errors.UnexpectedStatus(response.status_code, response.content)
    else:
        return None


def _build_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Response[Any | ErrorResponse]:
    return Response(
        status_code=HTTPStatus(response.status_code),
        content=response.content,
        headers=response.headers,
        parsed=_parse_response(client=client, response=response),
    )


def sync_detailed(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[Any | ErrorResponse]:
    """Resume a paused sandbox

     Resume execution of a paused sandbox. Poll GET /sandboxes/{sandboxId} to track state transition to
    Running.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        sandbox_id=sandbox_id,
    )

    response = client.get_httpx_client().request(
        **kwargs,
    )

    return _build_response(client=client, response=response)


def sync(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Any | ErrorResponse | None:
    """Resume a paused sandbox

     Resume execution of a paused sandbox. Poll GET /sandboxes/{sandboxId} to track state transition to
    Running.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return sync_detailed(
        sandbox_id=sandbox_id,
        client=client,
    ).parsed


async def asyncio_detailed(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Response[Any | ErrorResponse]:
    """Resume a paused sandbox

     Resume execution of a paused sandbox. Poll GET /sandboxes/{sandboxId} to track state transition to
    Running.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Response[Any | ErrorResponse]
    """

    kwargs = _get_kwargs(
        sandbox_id=sandbox_id,
    )

    response = await client.get_async_httpx_client().request(**kwargs)

    return _build_response(client=client, response=response)


async def asyncio(
    sandbox_id: str,
    *,
    client: AuthenticatedClient | Client,
) -> Any | ErrorResponse | None:
    """Resume a paused sandbox

     Resume execution of a paused sandbox. Poll GET /sandboxes/{sandboxId} to track state transition to
    Running.

    Args:
        sandbox_id (str):

    Raises:
        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
        httpx.TimeoutException: If the request takes longer than Client.timeout.

    Returns:
        Any | ErrorResponse
    """

    return (
        await asyncio_detailed(
            sandbox_id=sandbox_id,
            client=client,
        )
    ).parsed


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/client.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import ssl
from typing import Any

import httpx
from attrs import define, evolve, field


@define
class Client:
    """A class for keeping track of data related to the API

    The following are accepted as keyword arguments and will be used to construct httpx Clients internally:

        ``base_url``: The base URL for the API, all requests are made to a relative path to this URL

        ``cookies``: A dictionary of cookies to be sent with every request

        ``headers``: A dictionary of headers to be sent with every request

        ``timeout``: The maximum amount of a time a request can take. API functions will raise
        httpx.TimeoutException if this is exceeded.

        ``verify_ssl``: Whether or not to verify the SSL certificate of the API server. This should be True in production,
        but can be set to False for testing purposes.

        ``follow_redirects``: Whether or not to follow redirects. Default value is False.

        ``httpx_args``: A dictionary of additional arguments to be passed to the ``httpx.Client`` and ``httpx.AsyncClient`` constructor.


    Attributes:
        raise_on_unexpected_status: Whether or not to raise an errors.UnexpectedStatus if the API returns a
            status code that was not documented in the source OpenAPI document. Can also be provided as a keyword
            argument to the constructor.
    """

    raise_on_unexpected_status: bool = field(default=False, kw_only=True)
    _base_url: str = field(alias="base_url")
    _cookies: dict[str, str] = field(factory=dict, kw_only=True, alias="cookies")
    _headers: dict[str, str] = field(factory=dict, kw_only=True, alias="headers")
    _timeout: httpx.Timeout | None = field(default=None, kw_only=True, alias="timeout")
    _verify_ssl: str | bool | ssl.SSLContext = field(default=True, kw_only=True, alias="verify_ssl")
    _follow_redirects: bool = field(default=False, kw_only=True, alias="follow_redirects")
    _httpx_args: dict[str, Any] = field(factory=dict, kw_only=True, alias="httpx_args")
    _client: httpx.Client | None = field(default=None, init=False)
    _async_client: httpx.AsyncClient | None = field(default=None, init=False)

    def with_headers(self, headers: dict[str, str]) -> "Client":
        """Get a new client matching this one with additional headers"""
        if self._client is not None:
            self._client.headers.update(headers)
        if self._async_client is not None:
            self._async_client.headers.update(headers)
        return evolve(self, headers={**self._headers, **headers})

    def with_cookies(self, cookies: dict[str, str]) -> "Client":
        """Get a new client matching this one with additional cookies"""
        if self._client is not None:
            self._client.cookies.update(cookies)
        if self._async_client is not None:
            self._async_client.cookies.update(cookies)
        return evolve(self, cookies={**self._cookies, **cookies})

    def with_timeout(self, timeout: httpx.Timeout) -> "Client":
        """Get a new client matching this one with a new timeout configuration"""
        if self._client is not None:
            self._client.timeout = timeout
        if self._async_client is not None:
            self._async_client.timeout = timeout
        return evolve(self, timeout=timeout)

    def set_httpx_client(self, client: httpx.Client) -> "Client":
        """Manually set the underlying httpx.Client

        **NOTE**: This will override any other settings on the client, including cookies, headers, and timeout.
        """
        self._client = client
        return self

    def get_httpx_client(self) -> httpx.Client:
        """Get the underlying httpx.Client, constructing a new one if not previously set"""
        if self._client is None:
            self._client = httpx.Client(
                base_url=self._base_url,
                cookies=self._cookies,
                headers=self._headers,
                timeout=self._timeout,
                verify=self._verify_ssl,
                follow_redirects=self._follow_redirects,
                **self._httpx_args,
            )
        return self._client

    def __enter__(self) -> "Client":
        """Enter a context manager for self.client—you cannot enter twice (see httpx docs)"""
        self.get_httpx_client().__enter__()
        return self

    def __exit__(self, *args: Any, **kwargs: Any) -> None:
        """Exit a context manager for internal httpx.Client (see httpx docs)"""
        self.get_httpx_client().__exit__(*args, **kwargs)

    def set_async_httpx_client(self, async_client: httpx.AsyncClient) -> "Client":
        """Manually set the underlying httpx.AsyncClient

        **NOTE**: This will override any other settings on the client, including cookies, headers, and timeout.
        """
        self._async_client = async_client
        return self

    def get_async_httpx_client(self) -> httpx.AsyncClient:
        """Get the underlying httpx.AsyncClient, constructing a new one if not previously set"""
        if self._async_client is None:
            self._async_client = httpx.AsyncClient(
                base_url=self._base_url,
                cookies=self._cookies,
                headers=self._headers,
                timeout=self._timeout,
                verify=self._verify_ssl,
                follow_redirects=self._follow_redirects,
                **self._httpx_args,
            )
        return self._async_client

    async def __aenter__(self) -> "Client":
        """Enter a context manager for underlying httpx.AsyncClient—you cannot enter twice (see httpx docs)"""
        await self.get_async_httpx_client().__aenter__()
        return self

    async def __aexit__(self, *args: Any, **kwargs: Any) -> None:
        """Exit a context manager for underlying httpx.AsyncClient (see httpx docs)"""
        await self.get_async_httpx_client().__aexit__(*args, **kwargs)


@define
class AuthenticatedClient:
    """A Client which has been authenticated for use on secured endpoints

    The following are accepted as keyword arguments and will be used to construct httpx Clients internally:

        ``base_url``: The base URL for the API, all requests are made to a relative path to this URL

        ``cookies``: A dictionary of cookies to be sent with every request

        ``headers``: A dictionary of headers to be sent with every request

        ``timeout``: The maximum amount of a time a request can take. API functions will raise
        httpx.TimeoutException if this is exceeded.

        ``verify_ssl``: Whether or not to verify the SSL certificate of the API server. This should be True in production,
        but can be set to False for testing purposes.

        ``follow_redirects``: Whether or not to follow redirects. Default value is False.

        ``httpx_args``: A dictionary of additional arguments to be passed to the ``httpx.Client`` and ``httpx.AsyncClient`` constructor.


    Attributes:
        raise_on_unexpected_status: Whether or not to raise an errors.UnexpectedStatus if the API returns a
            status code that was not documented in the source OpenAPI document. Can also be provided as a keyword
            argument to the constructor.
        token: The token to use for authentication
        prefix: The prefix to use for the Authorization header
        auth_header_name: The name of the Authorization header
    """

    raise_on_unexpected_status: bool = field(default=False, kw_only=True)
    _base_url: str = field(alias="base_url")
    _cookies: dict[str, str] = field(factory=dict, kw_only=True, alias="cookies")
    _headers: dict[str, str] = field(factory=dict, kw_only=True, alias="headers")
    _timeout: httpx.Timeout | None = field(default=None, kw_only=True, alias="timeout")
    _verify_ssl: str | bool | ssl.SSLContext = field(default=True, kw_only=True, alias="verify_ssl")
    _follow_redirects: bool = field(default=False, kw_only=True, alias="follow_redirects")
    _httpx_args: dict[str, Any] = field(factory=dict, kw_only=True, alias="httpx_args")
    _client: httpx.Client | None = field(default=None, init=False)
    _async_client: httpx.AsyncClient | None = field(default=None, init=False)

    token: str
    prefix: str = "Bearer"
    auth_header_name: str = "Authorization"

    def with_headers(self, headers: dict[str, str]) -> "AuthenticatedClient":
        """Get a new client matching this one with additional headers"""
        if self._client is not None:
            self._client.headers.update(headers)
        if self._async_client is not None:
            self._async_client.headers.update(headers)
        return evolve(self, headers={**self._headers, **headers})

    def with_cookies(self, cookies: dict[str, str]) -> "AuthenticatedClient":
        """Get a new client matching this one with additional cookies"""
        if self._client is not None:
            self._client.cookies.update(cookies)
        if self._async_client is not None:
            self._async_client.cookies.update(cookies)
        return evolve(self, cookies={**self._cookies, **cookies})

    def with_timeout(self, timeout: httpx.Timeout) -> "AuthenticatedClient":
        """Get a new client matching this one with a new timeout configuration"""
        if self._client is not None:
            self._client.timeout = timeout
        if self._async_client is not None:
            self._async_client.timeout = timeout
        return evolve(self, timeout=timeout)

    def set_httpx_client(self, client: httpx.Client) -> "AuthenticatedClient":
        """Manually set the underlying httpx.Client

        **NOTE**: This will override any other settings on the client, including cookies, headers, and timeout.
        """
        self._client = client
        return self

    def get_httpx_client(self) -> httpx.Client:
        """Get the underlying httpx.Client, constructing a new one if not previously set"""
        if self._client is None:
            self._headers[self.auth_header_name] = f"{self.prefix} {self.token}" if self.prefix else self.token
            self._client = httpx.Client(
                base_url=self._base_url,
                cookies=self._cookies,
                headers=self._headers,
                timeout=self._timeout,
                verify=self._verify_ssl,
                follow_redirects=self._follow_redirects,
                **self._httpx_args,
            )
        return self._client

    def __enter__(self) -> "AuthenticatedClient":
        """Enter a context manager for self.client—you cannot enter twice (see httpx docs)"""
        self.get_httpx_client().__enter__()
        return self

    def __exit__(self, *args: Any, **kwargs: Any) -> None:
        """Exit a context manager for internal httpx.Client (see httpx docs)"""
        self.get_httpx_client().__exit__(*args, **kwargs)

    def set_async_httpx_client(self, async_client: httpx.AsyncClient) -> "AuthenticatedClient":
        """Manually set the underlying httpx.AsyncClient

        **NOTE**: This will override any other settings on the client, including cookies, headers, and timeout.
        """
        self._async_client = async_client
        return self

    def get_async_httpx_client(self) -> httpx.AsyncClient:
        """Get the underlying httpx.AsyncClient, constructing a new one if not previously set"""
        if self._async_client is None:
            self._headers[self.auth_header_name] = f"{self.prefix} {self.token}" if self.prefix else self.token
            self._async_client = httpx.AsyncClient(
                base_url=self._base_url,
                cookies=self._cookies,
                headers=self._headers,
                timeout=self._timeout,
                verify=self._verify_ssl,
                follow_redirects=self._follow_redirects,
                **self._httpx_args,
            )
        return self._async_client

    async def __aenter__(self) -> "AuthenticatedClient":
        """Enter a context manager for underlying httpx.AsyncClient—you cannot enter twice (see httpx docs)"""
        await self.get_async_httpx_client().__aenter__()
        return self

    async def __aexit__(self, *args: Any, **kwargs: Any) -> None:
        """Exit a context manager for underlying httpx.AsyncClient (see httpx docs)"""
        await self.get_async_httpx_client().__aexit__(*args, **kwargs)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/errors.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains shared errors types that can be raised from API functions"""


class UnexpectedStatus(Exception):
    """Raised by api functions when the response status an undocumented status and Client.raise_on_unexpected_status is True"""

    def __init__(self, status_code: int, content: bytes):
        self.status_code = status_code
        self.content = content

        super().__init__(
            f"Unexpected status code: {status_code}\n\nResponse content:\n{content.decode(errors='ignore')}"
        )


__all__ = ["UnexpectedStatus"]


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/__init__.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains all the data models used in inputs/outputs"""

from .create_sandbox_request import CreateSandboxRequest
from .create_sandbox_request_env import CreateSandboxRequestEnv
from .create_sandbox_request_extensions import CreateSandboxRequestExtensions
from .create_sandbox_request_metadata import CreateSandboxRequestMetadata
from .create_sandbox_response import CreateSandboxResponse
from .create_sandbox_response_metadata import CreateSandboxResponseMetadata
from .endpoint import Endpoint
from .endpoint_headers import EndpointHeaders
from .error_response import ErrorResponse
from .host import Host
from .image_spec import ImageSpec
from .image_spec_auth import ImageSpecAuth
from .list_sandboxes_response import ListSandboxesResponse
from .network_policy import NetworkPolicy
from .network_policy_default_action import NetworkPolicyDefaultAction
from .network_rule import NetworkRule
from .network_rule_action import NetworkRuleAction
from .ossfs import OSSFS
from .ossfs_version import OSSFSVersion
from .pagination_info import PaginationInfo
from .pvc import PVC
from .renew_sandbox_expiration_request import RenewSandboxExpirationRequest
from .renew_sandbox_expiration_response import RenewSandboxExpirationResponse
from .resource_limits import ResourceLimits
from .sandbox import Sandbox
from .sandbox_metadata import SandboxMetadata
from .sandbox_status import SandboxStatus
from .volume import Volume

__all__ = (
    "CreateSandboxRequest",
    "CreateSandboxRequestEnv",
    "CreateSandboxRequestExtensions",
    "CreateSandboxRequestMetadata",
    "CreateSandboxResponse",
    "CreateSandboxResponseMetadata",
    "Endpoint",
    "EndpointHeaders",
    "ErrorResponse",
    "Host",
    "ImageSpec",
    "ImageSpecAuth",
    "ListSandboxesResponse",
    "NetworkPolicy",
    "NetworkPolicyDefaultAction",
    "NetworkRule",
    "NetworkRuleAction",
    "OSSFS",
    "OSSFSVersion",
    "PaginationInfo",
    "PVC",
    "RenewSandboxExpirationRequest",
    "RenewSandboxExpirationResponse",
    "ResourceLimits",
    "Sandbox",
    "SandboxMetadata",
    "SandboxStatus",
    "Volume",
)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/create_sandbox_request.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar, cast

from attrs import define as _attrs_define
from attrs import field as _attrs_field

from ..types import UNSET, Unset

if TYPE_CHECKING:
    from ..models.create_sandbox_request_env import CreateSandboxRequestEnv
    from ..models.create_sandbox_request_extensions import CreateSandboxRequestExtensions
    from ..models.create_sandbox_request_metadata import CreateSandboxRequestMetadata
    from ..models.image_spec import ImageSpec
    from ..models.network_policy import NetworkPolicy
    from ..models.resource_limits import ResourceLimits
    from ..models.volume import Volume


T = TypeVar("T", bound="CreateSandboxRequest")


@_attrs_define
class CreateSandboxRequest:
    """Request to create a new sandbox from a container image.

    **Note**: API Key authentication is required via the `OPEN-SANDBOX-API-KEY` header.

        Attributes:
            image (ImageSpec): Container image specification for sandbox provisioning.

                Supports public registry images and private registry images with authentication.
            resource_limits (ResourceLimits): Runtime resource constraints as key-value pairs. Similar to Kubernetes
                resource specifications,
                allows flexible definition of resource limits. Common resource types include:
                - `cpu`: CPU allocation in millicores (e.g., "250m" for 0.25 CPU cores)
                - `memory`: Memory allocation in bytes or human-readable format (e.g., "512Mi", "1Gi")
                - `gpu`: Number of GPU devices (e.g., "1")

                New resource types can be added without API changes.
                 Example: {'cpu': '500m', 'memory': '512Mi', 'gpu': '1'}.
            entrypoint (list[str]): The command to execute as the sandbox's entry process (required).

                Explicitly specifies the user's expected main process, allowing the sandbox management
                service to reliably inject control processes before executing this command.

                Format: [executable, arg1, arg2, ...]

                Examples:
                - ["python", "/app/main.py"]
                - ["/bin/bash"]
                - ["java", "-jar", "/app/app.jar"]
                - ["node", "server.js"]
                 Example: ['python', '/app/main.py'].
            timeout (int | None | Unset): Sandbox timeout in seconds. The sandbox will automatically terminate after this
                duration.
                The maximum is controlled by the server configuration (`server.max_sandbox_timeout_seconds`).
                Omit or set null to disable automatic expiration and require explicit cleanup.
                Note: manual cleanup support is runtime-dependent; Kubernetes providers may reject
                null timeout when the underlying workload provider does not support non-expiring sandboxes.
            env (CreateSandboxRequestEnv | Unset): Environment variables to inject into the sandbox runtime. Example:
                {'API_KEY': 'secret-key', 'DEBUG': 'true', 'LOG_LEVEL': 'info'}.
            metadata (CreateSandboxRequestMetadata | Unset): Custom key-value metadata for management, filtering, and
                tagging.
                Use "name" key for a human-readable identifier.
                 Example: {'name': 'Data Processing Sandbox', 'project': 'data-processing', 'team': 'ml', 'environment':
                'staging'}.
            network_policy (NetworkPolicy | Unset): Egress network policy matching the sidecar `/policy` request body.
                If `defaultAction` is omitted, the sidecar defaults to "deny"; passing an empty
                object or null results in allow-all behavior at startup.
            volumes (list[Volume] | Unset): Storage mounts for the sandbox. Each volume entry specifies a named backend-
                specific
                storage source and common mount settings. Exactly one backend type must be specified
                per volume entry.
            extensions (CreateSandboxRequestExtensions | Unset): Opaque container for provider-specific or transient
                parameters not supported by the core API.

                **Note**: This field is reserved for internal features, experimental flags, or temporary behaviors. Standard
                parameters should be proposed as core API fields.

                **Best Practices**:
                - **Namespacing**: Use prefixed keys (e.g., `storage.id`) to prevent collisions.
                - **Pass-through**: SDKs and middleware must treat this object as opaque and pass it through transparently.
    """

    image: ImageSpec
    resource_limits: ResourceLimits
    entrypoint: list[str]
    timeout: int | None | Unset = UNSET
    env: CreateSandboxRequestEnv | Unset = UNSET
    metadata: CreateSandboxRequestMetadata | Unset = UNSET
    network_policy: NetworkPolicy | Unset = UNSET
    volumes: list[Volume] | Unset = UNSET
    extensions: CreateSandboxRequestExtensions | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        image = self.image.to_dict()

        resource_limits = self.resource_limits.to_dict()

        entrypoint = self.entrypoint

        timeout: int | None | Unset
        if isinstance(self.timeout, Unset):
            timeout = UNSET
        else:
            timeout = self.timeout

        env: dict[str, Any] | Unset = UNSET
        if not isinstance(self.env, Unset):
            env = self.env.to_dict()

        metadata: dict[str, Any] | Unset = UNSET
        if not isinstance(self.metadata, Unset):
            metadata = self.metadata.to_dict()

        network_policy: dict[str, Any] | Unset = UNSET
        if not isinstance(self.network_policy, Unset):
            network_policy = self.network_policy.to_dict()

        volumes: list[dict[str, Any]] | Unset = UNSET
        if not isinstance(self.volumes, Unset):
            volumes = []
            for volumes_item_data in self.volumes:
                volumes_item = volumes_item_data.to_dict()
                volumes.append(volumes_item)

        extensions: dict[str, Any] | Unset = UNSET
        if not isinstance(self.extensions, Unset):
            extensions = self.extensions.to_dict()

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "image": image,
                "resourceLimits": resource_limits,
                "entrypoint": entrypoint,
            }
        )
        if timeout is not UNSET:
            field_dict["timeout"] = timeout
        if env is not UNSET:
            field_dict["env"] = env
        if metadata is not UNSET:
            field_dict["metadata"] = metadata
        if network_policy is not UNSET:
            field_dict["networkPolicy"] = network_policy
        if volumes is not UNSET:
            field_dict["volumes"] = volumes
        if extensions is not UNSET:
            field_dict["extensions"] = extensions

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.create_sandbox_request_env import CreateSandboxRequestEnv
        from ..models.create_sandbox_request_extensions import CreateSandboxRequestExtensions
        from ..models.create_sandbox_request_metadata import CreateSandboxRequestMetadata
        from ..models.image_spec import ImageSpec
        from ..models.network_policy import NetworkPolicy
        from ..models.resource_limits import ResourceLimits
        from ..models.volume import Volume

        d = dict(src_dict)
        image = ImageSpec.from_dict(d.pop("image"))

        resource_limits = ResourceLimits.from_dict(d.pop("resourceLimits"))

        entrypoint = cast(list[str], d.pop("entrypoint"))

        def _parse_timeout(data: object) -> int | None | Unset:
            if data is None:
                return data
            if isinstance(data, Unset):
                return data
            return cast(int | None | Unset, data)

        timeout = _parse_timeout(d.pop("timeout", UNSET))

        _env = d.pop("env", UNSET)
        env: CreateSandboxRequestEnv | Unset
        if isinstance(_env, Unset):
            env = UNSET
        else:
            env = CreateSandboxRequestEnv.from_dict(_env)

        _metadata = d.pop("metadata", UNSET)
        metadata: CreateSandboxRequestMetadata | Unset
        if isinstance(_metadata, Unset):
            metadata = UNSET
        else:
            metadata = CreateSandboxRequestMetadata.from_dict(_metadata)

        _network_policy = d.pop("networkPolicy", UNSET)
        network_policy: NetworkPolicy | Unset
        if isinstance(_network_policy, Unset):
            network_policy = UNSET
        else:
            network_policy = NetworkPolicy.from_dict(_network_policy)

        _volumes = d.pop("volumes", UNSET)
        volumes: list[Volume] | Unset = UNSET
        if _volumes is not UNSET:
            volumes = []
            for volumes_item_data in _volumes:
                volumes_item = Volume.from_dict(volumes_item_data)

                volumes.append(volumes_item)

        _extensions = d.pop("extensions", UNSET)
        extensions: CreateSandboxRequestExtensions | Unset
        if isinstance(_extensions, Unset):
            extensions = UNSET
        else:
            extensions = CreateSandboxRequestExtensions.from_dict(_extensions)

        create_sandbox_request = cls(
            image=image,
            resource_limits=resource_limits,
            entrypoint=entrypoint,
            timeout=timeout,
            env=env,
            metadata=metadata,
            network_policy=network_policy,
            volumes=volumes,
            extensions=extensions,
        )

        create_sandbox_request.additional_properties = d
        return create_sandbox_request

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/create_sandbox_request_env.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="CreateSandboxRequestEnv")


@_attrs_define
class CreateSandboxRequestEnv:
    """Environment variables to inject into the sandbox runtime.

    Example:
        {'API_KEY': 'secret-key', 'DEBUG': 'true', 'LOG_LEVEL': 'info'}

    """

    additional_properties: dict[str, str] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        create_sandbox_request_env = cls()

        create_sandbox_request_env.additional_properties = d
        return create_sandbox_request_env

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> str:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: str) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/create_sandbox_request_extensions.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="CreateSandboxRequestExtensions")


@_attrs_define
class CreateSandboxRequestExtensions:
    """Opaque container for provider-specific or transient parameters not supported by the core API.

    **Note**: This field is reserved for internal features, experimental flags, or temporary behaviors. Standard
    parameters should be proposed as core API fields.

    **Best Practices**:
    - **Namespacing**: Use prefixed keys (e.g., `storage.id`) to prevent collisions.
    - **Pass-through**: SDKs and middleware must treat this object as opaque and pass it through transparently.

    """

    additional_properties: dict[str, str] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        create_sandbox_request_extensions = cls()

        create_sandbox_request_extensions.additional_properties = d
        return create_sandbox_request_extensions

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> str:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: str) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/create_sandbox_request_metadata.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="CreateSandboxRequestMetadata")


@_attrs_define
class CreateSandboxRequestMetadata:
    """Custom key-value metadata for management, filtering, and tagging.
    Use "name" key for a human-readable identifier.

        Example:
            {'name': 'Data Processing Sandbox', 'project': 'data-processing', 'team': 'ml', 'environment': 'staging'}

    """

    additional_properties: dict[str, str] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        create_sandbox_request_metadata = cls()

        create_sandbox_request_metadata.additional_properties = d
        return create_sandbox_request_metadata

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> str:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: str) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/create_sandbox_response.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

import datetime
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar, cast

from attrs import define as _attrs_define
from attrs import field as _attrs_field
from dateutil.parser import isoparse

from ..types import UNSET, Unset

if TYPE_CHECKING:
    from ..models.create_sandbox_response_metadata import CreateSandboxResponseMetadata
    from ..models.sandbox_status import SandboxStatus


T = TypeVar("T", bound="CreateSandboxResponse")


@_attrs_define
class CreateSandboxResponse:
    """Response from creating a new sandbox. Contains essential information without image and updatedAt.

    Attributes:
        id (str): Unique sandbox identifier
        status (SandboxStatus): Detailed status information with lifecycle state and transition details
        created_at (datetime.datetime): Sandbox creation timestamp
        entrypoint (list[str]): Entry process specification from creation request
        metadata (CreateSandboxResponseMetadata | Unset): Custom metadata from creation request
        expires_at (datetime.datetime | None | Unset): Timestamp when sandbox will auto-terminate. Null when manual
            cleanup is enabled.
    """

    id: str
    status: SandboxStatus
    created_at: datetime.datetime
    entrypoint: list[str]
    metadata: CreateSandboxResponseMetadata | Unset = UNSET
    expires_at: datetime.datetime | None | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        id = self.id

        status = self.status.to_dict()

        created_at = self.created_at.isoformat()

        entrypoint = self.entrypoint

        metadata: dict[str, Any] | Unset = UNSET
        if not isinstance(self.metadata, Unset):
            metadata = self.metadata.to_dict()

        expires_at: None | str | Unset
        if isinstance(self.expires_at, Unset):
            expires_at = UNSET
        elif isinstance(self.expires_at, datetime.datetime):
            expires_at = self.expires_at.isoformat()
        else:
            expires_at = self.expires_at

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "id": id,
                "status": status,
                "createdAt": created_at,
                "entrypoint": entrypoint,
            }
        )
        if metadata is not UNSET:
            field_dict["metadata"] = metadata
        if expires_at is not UNSET:
            field_dict["expiresAt"] = expires_at

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.create_sandbox_response_metadata import CreateSandboxResponseMetadata
        from ..models.sandbox_status import SandboxStatus

        d = dict(src_dict)
        id = d.pop("id")

        status = SandboxStatus.from_dict(d.pop("status"))

        created_at = isoparse(d.pop("createdAt"))

        entrypoint = cast(list[str], d.pop("entrypoint"))

        _metadata = d.pop("metadata", UNSET)
        metadata: CreateSandboxResponseMetadata | Unset
        if isinstance(_metadata, Unset) or _metadata is None:
            metadata = UNSET
        else:
            metadata = CreateSandboxResponseMetadata.from_dict(_metadata)

        def _parse_expires_at(data: object) -> datetime.datetime | None | Unset:
            if data is None:
                return data
            if isinstance(data, Unset):
                return data
            try:
                if not isinstance(data, str):
                    raise TypeError()
                expires_at_type_0 = isoparse(data)

                return expires_at_type_0
            except (TypeError, ValueError, AttributeError, KeyError):
                pass
            return cast(datetime.datetime | None | Unset, data)

        expires_at = _parse_expires_at(d.pop("expiresAt", UNSET))

        create_sandbox_response = cls(
            id=id,
            status=status,
            created_at=created_at,
            entrypoint=entrypoint,
            metadata=metadata,
            expires_at=expires_at,
        )

        create_sandbox_response.additional_properties = d
        return create_sandbox_response

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/create_sandbox_response_metadata.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="CreateSandboxResponseMetadata")


@_attrs_define
class CreateSandboxResponseMetadata:
    """Custom metadata from creation request"""

    additional_properties: dict[str, str] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        create_sandbox_response_metadata = cls()

        create_sandbox_response_metadata.additional_properties = d
        return create_sandbox_response_metadata

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> str:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: str) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/endpoint.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define

from ..types import UNSET, Unset

if TYPE_CHECKING:
    from ..models.endpoint_headers import EndpointHeaders


T = TypeVar("T", bound="Endpoint")


@_attrs_define
class Endpoint:
    """Endpoint for accessing a service running in the sandbox.
    The service must be listening on the specified port inside the sandbox for the endpoint to be available.

        Attributes:
            endpoint (str): Public URL to access the service from outside the sandbox.
                Format: {endpoint-host}/sandboxes/{sandboxId}/port/{port}
                Example: endpoint.opensandbox.io/sandboxes/abc123/port/8080
            headers (EndpointHeaders | Unset): Requests targeting the sandbox must include the corresponding header(s).
    """

    endpoint: str
    headers: EndpointHeaders | Unset = UNSET

    def to_dict(self) -> dict[str, Any]:
        endpoint = self.endpoint

        headers: dict[str, Any] | Unset = UNSET
        if not isinstance(self.headers, Unset):
            headers = self.headers.to_dict()

        field_dict: dict[str, Any] = {}

        field_dict.update(
            {
                "endpoint": endpoint,
            }
        )
        if headers is not UNSET:
            field_dict["headers"] = headers

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.endpoint_headers import EndpointHeaders

        d = dict(src_dict)
        endpoint = d.pop("endpoint")

        _headers = d.pop("headers", UNSET)
        headers: EndpointHeaders | Unset
        if isinstance(_headers, Unset):
            headers = UNSET
        else:
            headers = EndpointHeaders.from_dict(_headers)

        endpoint = cls(
            endpoint=endpoint,
            headers=headers,
        )

        return endpoint


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/endpoint_headers.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="EndpointHeaders")


@_attrs_define
class EndpointHeaders:
    """Requests targeting the sandbox must include the corresponding header(s)."""

    additional_properties: dict[str, str] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        endpoint_headers = cls()

        endpoint_headers.additional_properties = d
        return endpoint_headers

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> str:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: str) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/error_response.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define

T = TypeVar("T", bound="ErrorResponse")


@_attrs_define
class ErrorResponse:
    """Standard error response for all non-2xx HTTP responses.
    HTTP status code indicates the error category; code and message provide details.

        Attributes:
            code (str): Machine-readable error code (e.g., INVALID_REQUEST, NOT_FOUND, INTERNAL_ERROR).
                Use this for programmatic error handling.
            message (str): Human-readable error message describing what went wrong and how to fix it.
    """

    code: str
    message: str

    def to_dict(self) -> dict[str, Any]:
        code = self.code

        message = self.message

        field_dict: dict[str, Any] = {}

        field_dict.update(
            {
                "code": code,
                "message": message,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        code = d.pop("code")

        message = d.pop("message")

        error_response = cls(
            code=code,
            message=message,
        )

        return error_response


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/host.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define

T = TypeVar("T", bound="Host")


@_attrs_define
class Host:
    """Host path bind mount backend. Maps a directory on the host filesystem
    into the container. Only available when the runtime supports host mounts.

    Security note: Host paths are restricted by server-side allowlist.
    Users must specify paths under permitted prefixes.

        Attributes:
            path (str): Absolute path on the host filesystem to mount.
                Must start with '/' and be under an allowed prefix.
    """

    path: str

    def to_dict(self) -> dict[str, Any]:
        path = self.path

        field_dict: dict[str, Any] = {}

        field_dict.update(
            {
                "path": path,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        path = d.pop("path")

        host = cls(
            path=path,
        )

        return host


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/image_spec.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define

from ..types import UNSET, Unset

if TYPE_CHECKING:
    from ..models.image_spec_auth import ImageSpecAuth


T = TypeVar("T", bound="ImageSpec")


@_attrs_define
class ImageSpec:
    """Container image specification for sandbox provisioning.

    Supports public registry images and private registry images with authentication.

        Attributes:
            uri (str): Container image URI in standard format.

                Examples:
                  - "python:3.11" (Docker Hub)
                  - "ubuntu:22.04"
                  - "gcr.io/my-project/model-server:v1.0"
                  - "private-registry.company.com:5000/app:latest"
            auth (ImageSpecAuth | Unset): Registry authentication credentials (required for private registries)
    """

    uri: str
    auth: ImageSpecAuth | Unset = UNSET

    def to_dict(self) -> dict[str, Any]:
        uri = self.uri

        auth: dict[str, Any] | Unset = UNSET
        if not isinstance(self.auth, Unset):
            auth = self.auth.to_dict()

        field_dict: dict[str, Any] = {}

        field_dict.update(
            {
                "uri": uri,
            }
        )
        if auth is not UNSET:
            field_dict["auth"] = auth

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.image_spec_auth import ImageSpecAuth

        d = dict(src_dict)
        uri = d.pop("uri")

        _auth = d.pop("auth", UNSET)
        auth: ImageSpecAuth | Unset
        if isinstance(_auth, Unset) or _auth is None:
            auth = UNSET
        else:
            auth = ImageSpecAuth.from_dict(_auth)

        image_spec = cls(
            uri=uri,
            auth=auth,
        )

        return image_spec


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/image_spec_auth.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define

from ..types import UNSET, Unset

T = TypeVar("T", bound="ImageSpecAuth")


@_attrs_define
class ImageSpecAuth:
    """Registry authentication credentials (required for private registries)

    Attributes:
        username (str | Unset): Registry username or service account
        password (str | Unset): Registry password or authentication token
    """

    username: str | Unset = UNSET
    password: str | Unset = UNSET

    def to_dict(self) -> dict[str, Any]:
        username = self.username

        password = self.password

        field_dict: dict[str, Any] = {}

        field_dict.update({})
        if username is not UNSET:
            field_dict["username"] = username
        if password is not UNSET:
            field_dict["password"] = password

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        username = d.pop("username", UNSET)

        password = d.pop("password", UNSET)

        image_spec_auth = cls(
            username=username,
            password=password,
        )

        return image_spec_auth


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/list_sandboxes_response.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

if TYPE_CHECKING:
    from ..models.pagination_info import PaginationInfo
    from ..models.sandbox import Sandbox


T = TypeVar("T", bound="ListSandboxesResponse")


@_attrs_define
class ListSandboxesResponse:
    """
    Attributes:
        items (list[Sandbox]):
        pagination (PaginationInfo): Pagination metadata for list responses
    """

    items: list[Sandbox]
    pagination: PaginationInfo
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        items = []
        for items_item_data in self.items:
            items_item = items_item_data.to_dict()
            items.append(items_item)

        pagination = self.pagination.to_dict()

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "items": items,
                "pagination": pagination,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.pagination_info import PaginationInfo
        from ..models.sandbox import Sandbox

        d = dict(src_dict)
        items = []
        _items = d.pop("items")
        for items_item_data in _items:
            items_item = Sandbox.from_dict(items_item_data)

            items.append(items_item)

        pagination = PaginationInfo.from_dict(d.pop("pagination"))

        list_sandboxes_response = cls(
            items=items,
            pagination=pagination,
        )

        list_sandboxes_response.additional_properties = d
        return list_sandboxes_response

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/network_policy.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define

from ..models.network_policy_default_action import NetworkPolicyDefaultAction
from ..types import UNSET, Unset

if TYPE_CHECKING:
    from ..models.network_rule import NetworkRule


T = TypeVar("T", bound="NetworkPolicy")


@_attrs_define
class NetworkPolicy:
    """Egress network policy matching the sidecar `/policy` request body.
    If `defaultAction` is omitted, the sidecar defaults to "deny"; passing an empty
    object or null results in allow-all behavior at startup.

        Attributes:
            default_action (NetworkPolicyDefaultAction | Unset): Default action when no egress rule matches. Defaults to
                "deny".
            egress (list[NetworkRule] | Unset): List of egress rules evaluated in order.
    """

    default_action: NetworkPolicyDefaultAction | Unset = UNSET
    egress: list[NetworkRule] | Unset = UNSET

    def to_dict(self) -> dict[str, Any]:
        default_action: str | Unset = UNSET
        if not isinstance(self.default_action, Unset):
            default_action = self.default_action.value

        egress: list[dict[str, Any]] | Unset = UNSET
        if not isinstance(self.egress, Unset):
            egress = []
            for egress_item_data in self.egress:
                egress_item = egress_item_data.to_dict()
                egress.append(egress_item)

        field_dict: dict[str, Any] = {}

        field_dict.update({})
        if default_action is not UNSET:
            field_dict["defaultAction"] = default_action
        if egress is not UNSET:
            field_dict["egress"] = egress

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.network_rule import NetworkRule

        d = dict(src_dict)
        _default_action = d.pop("defaultAction", UNSET)
        default_action: NetworkPolicyDefaultAction | Unset
        if isinstance(_default_action, Unset):
            default_action = UNSET
        else:
            default_action = NetworkPolicyDefaultAction(_default_action)

        _egress = d.pop("egress", UNSET)
        egress: list[NetworkRule] | Unset = UNSET
        if _egress is not UNSET:
            egress = []
            for egress_item_data in _egress:
                egress_item = NetworkRule.from_dict(egress_item_data)

                egress.append(egress_item)

        network_policy = cls(
            default_action=default_action,
            egress=egress,
        )

        return network_policy


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/network_policy_default_action.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from enum import Enum


class NetworkPolicyDefaultAction(str, Enum):
    ALLOW = "allow"
    DENY = "deny"

    def __str__(self) -> str:
        return str(self.value)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/network_rule.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define

from ..models.network_rule_action import NetworkRuleAction

T = TypeVar("T", bound="NetworkRule")


@_attrs_define
class NetworkRule:
    """
    Attributes:
        action (NetworkRuleAction): Whether to allow or deny matching targets.
        target (str): FQDN or wildcard domain (e.g., "example.com", "*.example.com").
            IP/CIDR not yet supported in the egress MVP.
    """

    action: NetworkRuleAction
    target: str

    def to_dict(self) -> dict[str, Any]:
        action = self.action.value

        target = self.target

        field_dict: dict[str, Any] = {}

        field_dict.update(
            {
                "action": action,
                "target": target,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        action = NetworkRuleAction(d.pop("action"))

        target = d.pop("target")

        network_rule = cls(
            action=action,
            target=target,
        )

        return network_rule


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/network_rule_action.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from enum import Enum


class NetworkRuleAction(str, Enum):
    ALLOW = "allow"
    DENY = "deny"

    def __str__(self) -> str:
        return str(self.value)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/ossfs.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar, cast

from attrs import define as _attrs_define

from ..models.ossfs_version import OSSFSVersion
from ..types import UNSET, Unset

T = TypeVar("T", bound="OSSFS")


@_attrs_define
class OSSFS:
    """Alibaba Cloud OSS mount backend via ossfs.

    The runtime mounts a host-side OSS path under `storage.ossfs_mount_root`
    and bind-mounts the resolved path into the sandbox container.
    Prefix selection is expressed via `Volume.subPath`.
    In Docker runtime, OSSFS backend requires OpenSandbox Server to run on a Linux host with FUSE support.

        Attributes:
            bucket (str): OSS bucket name.
            endpoint (str): OSS endpoint (e.g., `oss-cn-hangzhou.aliyuncs.com`).
            access_key_id (str): OSS access key ID for inline credentials mode.
            access_key_secret (str): OSS access key secret for inline credentials mode.
            version (OSSFSVersion | Unset): ossfs major version used by runtime mount integration. Default:
                OSSFSVersion.VALUE_1.
            options (list[str] | Unset): Additional ossfs mount options.
                Runtime encodes options by `version`:
                - `1.0`: mounts with `ossfs ... -o <option>`
                - `2.0`: mounts with `ossfs2 mount ... -c <config-file>` and encodes options as `--<option>` lines in the config
                file
                Option values must be provided as raw payloads without leading `-`.
    """

    bucket: str
    endpoint: str
    access_key_id: str
    access_key_secret: str
    version: OSSFSVersion | Unset = OSSFSVersion.VALUE_1
    options: list[str] | Unset = UNSET

    def to_dict(self) -> dict[str, Any]:
        bucket = self.bucket

        endpoint = self.endpoint

        access_key_id = self.access_key_id

        access_key_secret = self.access_key_secret

        version: str | Unset = UNSET
        if not isinstance(self.version, Unset):
            version = self.version.value

        options: list[str] | Unset = UNSET
        if not isinstance(self.options, Unset):
            options = self.options

        field_dict: dict[str, Any] = {}

        field_dict.update(
            {
                "bucket": bucket,
                "endpoint": endpoint,
                "accessKeyId": access_key_id,
                "accessKeySecret": access_key_secret,
            }
        )
        if version is not UNSET:
            field_dict["version"] = version
        if options is not UNSET:
            field_dict["options"] = options

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        bucket = d.pop("bucket")

        endpoint = d.pop("endpoint")

        access_key_id = d.pop("accessKeyId")

        access_key_secret = d.pop("accessKeySecret")

        _version = d.pop("version", UNSET)
        version: OSSFSVersion | Unset
        if isinstance(_version, Unset):
            version = UNSET
        else:
            version = OSSFSVersion(_version)

        options = cast(list[str], d.pop("options", UNSET))

        ossfs = cls(
            bucket=bucket,
            endpoint=endpoint,
            access_key_id=access_key_id,
            access_key_secret=access_key_secret,
            version=version,
            options=options,
        )

        return ossfs


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/ossfs_version.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from enum import Enum


class OSSFSVersion(str, Enum):
    VALUE_0 = "1.0"
    VALUE_1 = "2.0"

    def __str__(self) -> str:
        return str(self.value)


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/pagination_info.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="PaginationInfo")


@_attrs_define
class PaginationInfo:
    """Pagination metadata for list responses

    Attributes:
        page (int): Current page number
        page_size (int): Number of items per page
        total_items (int): Total number of items matching the filter
        total_pages (int): Total number of pages
        has_next_page (bool): Whether there are more pages after the current one
    """

    page: int
    page_size: int
    total_items: int
    total_pages: int
    has_next_page: bool
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        page = self.page

        page_size = self.page_size

        total_items = self.total_items

        total_pages = self.total_pages

        has_next_page = self.has_next_page

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "page": page,
                "pageSize": page_size,
                "totalItems": total_items,
                "totalPages": total_pages,
                "hasNextPage": has_next_page,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        page = d.pop("page")

        page_size = d.pop("pageSize")

        total_items = d.pop("totalItems")

        total_pages = d.pop("totalPages")

        has_next_page = d.pop("hasNextPage")

        pagination_info = cls(
            page=page,
            page_size=page_size,
            total_items=total_items,
            total_pages=total_pages,
            has_next_page=has_next_page,
        )

        pagination_info.additional_properties = d
        return pagination_info

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/pvc.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define

T = TypeVar("T", bound="PVC")


@_attrs_define
class PVC:
    """Platform-managed named volume backend. A runtime-neutral abstraction
    for referencing a pre-existing, platform-managed named volume.

    - Kubernetes: maps to a PersistentVolumeClaim in the same namespace.
    - Docker: maps to a Docker named volume (created via `docker volume create`).

    The volume must already exist on the target platform before sandbox
    creation.

        Attributes:
            claim_name (str): Name of the volume on the target platform.
                In Kubernetes this is the PVC name; in Docker this is the named
                volume name. Must be a valid DNS label.
    """

    claim_name: str

    def to_dict(self) -> dict[str, Any]:
        claim_name = self.claim_name

        field_dict: dict[str, Any] = {}

        field_dict.update(
            {
                "claimName": claim_name,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        claim_name = d.pop("claimName")

        pvc = cls(
            claim_name=claim_name,
        )

        return pvc


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/renew_sandbox_expiration_request.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

import datetime
from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from dateutil.parser import isoparse

T = TypeVar("T", bound="RenewSandboxExpirationRequest")


@_attrs_define
class RenewSandboxExpirationRequest:
    """
    Attributes:
        expires_at (datetime.datetime): New absolute expiration time in UTC (RFC 3339 format).
            Must be in the future and after the current expiresAt time.

            Example: "2025-11-16T14:30:45Z"
    """

    expires_at: datetime.datetime

    def to_dict(self) -> dict[str, Any]:
        expires_at = self.expires_at.isoformat()

        field_dict: dict[str, Any] = {}

        field_dict.update(
            {
                "expiresAt": expires_at,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        expires_at = isoparse(d.pop("expiresAt"))

        renew_sandbox_expiration_request = cls(
            expires_at=expires_at,
        )

        return renew_sandbox_expiration_request


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/renew_sandbox_expiration_response.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

import datetime
from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from dateutil.parser import isoparse

T = TypeVar("T", bound="RenewSandboxExpirationResponse")


@_attrs_define
class RenewSandboxExpirationResponse:
    """
    Attributes:
        expires_at (datetime.datetime): The new absolute expiration time in UTC (RFC 3339 format).

            Example: "2025-11-16T14:30:45Z"
    """

    expires_at: datetime.datetime

    def to_dict(self) -> dict[str, Any]:
        expires_at = self.expires_at.isoformat()

        field_dict: dict[str, Any] = {}

        field_dict.update(
            {
                "expiresAt": expires_at,
            }
        )

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        expires_at = isoparse(d.pop("expiresAt"))

        renew_sandbox_expiration_response = cls(
            expires_at=expires_at,
        )

        return renew_sandbox_expiration_response


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/resource_limits.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="ResourceLimits")


@_attrs_define
class ResourceLimits:
    """Runtime resource constraints as key-value pairs. Similar to Kubernetes resource specifications,
    allows flexible definition of resource limits. Common resource types include:
    - `cpu`: CPU allocation in millicores (e.g., "250m" for 0.25 CPU cores)
    - `memory`: Memory allocation in bytes or human-readable format (e.g., "512Mi", "1Gi")
    - `gpu`: Number of GPU devices (e.g., "1")

    New resource types can be added without API changes.

        Example:
            {'cpu': '500m', 'memory': '512Mi', 'gpu': '1'}

    """

    additional_properties: dict[str, str] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        resource_limits = cls()

        resource_limits.additional_properties = d
        return resource_limits

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> str:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: str) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/sandbox.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

import datetime
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar, cast

from attrs import define as _attrs_define
from attrs import field as _attrs_field
from dateutil.parser import isoparse

from ..types import UNSET, Unset

if TYPE_CHECKING:
    from ..models.image_spec import ImageSpec
    from ..models.sandbox_metadata import SandboxMetadata
    from ..models.sandbox_status import SandboxStatus


T = TypeVar("T", bound="Sandbox")


@_attrs_define
class Sandbox:
    """Runtime execution environment provisioned from a container image

    Attributes:
        id (str): Unique sandbox identifier
        image (ImageSpec): Container image specification for sandbox provisioning.

            Supports public registry images and private registry images with authentication.
        status (SandboxStatus): Detailed status information with lifecycle state and transition details
        entrypoint (list[str]): The command to execute as the sandbox's entry process.
            Always present in responses since entrypoint is required in creation requests.
        created_at (datetime.datetime): Sandbox creation timestamp
        metadata (SandboxMetadata | Unset): Custom metadata from creation request
        expires_at (datetime.datetime | None | Unset): Timestamp when sandbox will auto-terminate. Null when manual
            cleanup is enabled.
    """

    id: str
    image: ImageSpec
    status: SandboxStatus
    entrypoint: list[str]
    created_at: datetime.datetime
    metadata: SandboxMetadata | Unset = UNSET
    expires_at: datetime.datetime | None | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        id = self.id

        image = self.image.to_dict()

        status = self.status.to_dict()

        entrypoint = self.entrypoint

        created_at = self.created_at.isoformat()

        metadata: dict[str, Any] | Unset = UNSET
        if not isinstance(self.metadata, Unset):
            metadata = self.metadata.to_dict()

        expires_at: None | str | Unset
        if isinstance(self.expires_at, Unset):
            expires_at = UNSET
        elif isinstance(self.expires_at, datetime.datetime):
            expires_at = self.expires_at.isoformat()
        else:
            expires_at = self.expires_at

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "id": id,
                "image": image,
                "status": status,
                "entrypoint": entrypoint,
                "createdAt": created_at,
            }
        )
        if metadata is not UNSET:
            field_dict["metadata"] = metadata
        if expires_at is not UNSET:
            field_dict["expiresAt"] = expires_at

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.image_spec import ImageSpec
        from ..models.sandbox_metadata import SandboxMetadata
        from ..models.sandbox_status import SandboxStatus

        d = dict(src_dict)
        id = d.pop("id")

        image = ImageSpec.from_dict(d.pop("image"))

        status = SandboxStatus.from_dict(d.pop("status"))

        entrypoint = cast(list[str], d.pop("entrypoint"))

        created_at = isoparse(d.pop("createdAt"))

        _metadata = d.pop("metadata", UNSET)
        metadata: SandboxMetadata | Unset
        if isinstance(_metadata, Unset) or _metadata is None:
            metadata = UNSET
        else:
            metadata = SandboxMetadata.from_dict(_metadata)

        def _parse_expires_at(data: object) -> datetime.datetime | None | Unset:
            if data is None:
                return data
            if isinstance(data, Unset):
                return data
            try:
                if not isinstance(data, str):
                    raise TypeError()
                expires_at_type_0 = isoparse(data)

                return expires_at_type_0
            except (TypeError, ValueError, AttributeError, KeyError):
                pass
            return cast(datetime.datetime | None | Unset, data)

        expires_at = _parse_expires_at(d.pop("expiresAt", UNSET))

        sandbox = cls(
            id=id,
            image=image,
            status=status,
            entrypoint=entrypoint,
            created_at=created_at,
            metadata=metadata,
            expires_at=expires_at,
        )

        sandbox.additional_properties = d
        return sandbox

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/sandbox_metadata.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field

T = TypeVar("T", bound="SandboxMetadata")


@_attrs_define
class SandboxMetadata:
    """Custom metadata from creation request"""

    additional_properties: dict[str, str] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        sandbox_metadata = cls()

        sandbox_metadata.additional_properties = d
        return sandbox_metadata

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> str:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: str) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/sandbox_status.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

import datetime
from collections.abc import Mapping
from typing import Any, TypeVar

from attrs import define as _attrs_define
from attrs import field as _attrs_field
from dateutil.parser import isoparse

from ..types import UNSET, Unset

T = TypeVar("T", bound="SandboxStatus")


@_attrs_define
class SandboxStatus:
    """Detailed status information with lifecycle state and transition details

    Attributes:
        state (str): High-level lifecycle state of the sandbox.

            Common state values:
            - Pending: Sandbox is being provisioned
            - Running: Sandbox is running and ready to accept requests
            - Pausing: Sandbox is in the process of pausing
            - Paused: Sandbox has been paused while retaining its state
            - Stopping: Sandbox is being terminated
            - Terminated: Sandbox has been successfully terminated
            - Failed: Sandbox encountered a critical error

            State transitions:
            - Pending → Running (after creation completes)
            - Running → Pausing (when pause is requested)
            - Pausing → Paused (pause operation completes)
            - Paused → Running (when resume is requested)
            - Running/Paused → Stopping (when kill is requested or TTL expires)
            - Stopping → Terminated (kill/timeout operation completes)
            - Pending/Running/Paused → Failed (on error)

            Note: New state values may be added in future versions.
            Clients should handle unknown state values gracefully.
        reason (str | Unset): Short machine-readable reason code for the current state.
            Examples: "user_delete", "ttl_expiry", "provision_timeout", "runtime_error"
        message (str | Unset): Human-readable message describing the current state or reason for state transition
        last_transition_at (datetime.datetime | Unset): Timestamp of the last state transition
    """

    state: str
    reason: str | Unset = UNSET
    message: str | Unset = UNSET
    last_transition_at: datetime.datetime | Unset = UNSET
    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)

    def to_dict(self) -> dict[str, Any]:
        state = self.state

        reason = self.reason

        message = self.message

        last_transition_at: str | Unset = UNSET
        if not isinstance(self.last_transition_at, Unset):
            last_transition_at = self.last_transition_at.isoformat()

        field_dict: dict[str, Any] = {}
        field_dict.update(self.additional_properties)
        field_dict.update(
            {
                "state": state,
            }
        )
        if reason is not UNSET:
            field_dict["reason"] = reason
        if message is not UNSET:
            field_dict["message"] = message
        if last_transition_at is not UNSET:
            field_dict["lastTransitionAt"] = last_transition_at

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        d = dict(src_dict)
        state = d.pop("state")

        reason = d.pop("reason", UNSET)

        message = d.pop("message", UNSET)

        _last_transition_at = d.pop("lastTransitionAt", UNSET)
        last_transition_at: datetime.datetime | Unset
        if isinstance(_last_transition_at, Unset) or _last_transition_at is None:
            last_transition_at = UNSET
        else:
            last_transition_at = isoparse(_last_transition_at)

        sandbox_status = cls(
            state=state,
            reason=reason,
            message=message,
            last_transition_at=last_transition_at,
        )

        sandbox_status.additional_properties = d
        return sandbox_status

    @property
    def additional_keys(self) -> list[str]:
        return list(self.additional_properties.keys())

    def __getitem__(self, key: str) -> Any:
        return self.additional_properties[key]

    def __setitem__(self, key: str, value: Any) -> None:
        self.additional_properties[key] = value

    def __delitem__(self, key: str) -> None:
        del self.additional_properties[key]

    def __contains__(self, key: str) -> bool:
        return key in self.additional_properties


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/models/volume.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, TypeVar

from attrs import define as _attrs_define

from ..types import UNSET, Unset

if TYPE_CHECKING:
    from ..models.host import Host
    from ..models.ossfs import OSSFS
    from ..models.pvc import PVC


T = TypeVar("T", bound="Volume")


@_attrs_define
class Volume:
    """Storage mount definition for a sandbox. Each volume entry contains:
    - A unique name identifier
    - Exactly one backend struct (host, pvc, ossfs, etc.) with backend-specific fields
    - Common mount settings (mountPath, readOnly, subPath)

        Attributes:
            name (str): Unique identifier for the volume within the sandbox.
                Must be a valid DNS label (lowercase alphanumeric, hyphens allowed, max 63 chars).
            mount_path (str): Absolute path inside the container where the volume is mounted.
                Must start with '/'.
            host (Host | Unset): Host path bind mount backend. Maps a directory on the host filesystem
                into the container. Only available when the runtime supports host mounts.

                Security note: Host paths are restricted by server-side allowlist.
                Users must specify paths under permitted prefixes.
            pvc (PVC | Unset): Platform-managed named volume backend. A runtime-neutral abstraction
                for referencing a pre-existing, platform-managed named volume.

                - Kubernetes: maps to a PersistentVolumeClaim in the same namespace.
                - Docker: maps to a Docker named volume (created via `docker volume create`).

                The volume must already exist on the target platform before sandbox
                creation.
            ossfs (OSSFS | Unset): Alibaba Cloud OSS mount backend via ossfs.

                The runtime mounts a host-side OSS path under `storage.ossfs_mount_root`
                and bind-mounts the resolved path into the sandbox container.
                Prefix selection is expressed via `Volume.subPath`.
                In Docker runtime, OSSFS backend requires OpenSandbox Server to run on a Linux host with FUSE support.
            read_only (bool | Unset): If true, the volume is mounted as read-only. Defaults to false (read-write).
                 Default: False.
            sub_path (str | Unset): Optional subdirectory under the backend path to mount.
                For `ossfs` backend, this field is used as the bucket prefix.
                Must be a relative path without '..' components.
    """

    name: str
    mount_path: str
    host: Host | Unset = UNSET
    pvc: PVC | Unset = UNSET
    ossfs: OSSFS | Unset = UNSET
    read_only: bool | Unset = False
    sub_path: str | Unset = UNSET

    def to_dict(self) -> dict[str, Any]:
        name = self.name

        mount_path = self.mount_path

        host: dict[str, Any] | Unset = UNSET
        if not isinstance(self.host, Unset):
            host = self.host.to_dict()

        pvc: dict[str, Any] | Unset = UNSET
        if not isinstance(self.pvc, Unset):
            pvc = self.pvc.to_dict()

        ossfs: dict[str, Any] | Unset = UNSET
        if not isinstance(self.ossfs, Unset):
            ossfs = self.ossfs.to_dict()

        read_only = self.read_only

        sub_path = self.sub_path

        field_dict: dict[str, Any] = {}

        field_dict.update(
            {
                "name": name,
                "mountPath": mount_path,
            }
        )
        if host is not UNSET:
            field_dict["host"] = host
        if pvc is not UNSET:
            field_dict["pvc"] = pvc
        if ossfs is not UNSET:
            field_dict["ossfs"] = ossfs
        if read_only is not UNSET:
            field_dict["readOnly"] = read_only
        if sub_path is not UNSET:
            field_dict["subPath"] = sub_path

        return field_dict

    @classmethod
    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
        from ..models.host import Host
        from ..models.ossfs import OSSFS
        from ..models.pvc import PVC

        d = dict(src_dict)
        name = d.pop("name")

        mount_path = d.pop("mountPath")

        _host = d.pop("host", UNSET)
        host: Host | Unset
        if isinstance(_host, Unset):
            host = UNSET
        else:
            host = Host.from_dict(_host)

        _pvc = d.pop("pvc", UNSET)
        pvc: PVC | Unset
        if isinstance(_pvc, Unset):
            pvc = UNSET
        else:
            pvc = PVC.from_dict(_pvc)

        _ossfs = d.pop("ossfs", UNSET)
        ossfs: OSSFS | Unset
        if isinstance(_ossfs, Unset):
            ossfs = UNSET
        else:
            ossfs = OSSFS.from_dict(_ossfs)

        read_only = d.pop("readOnly", UNSET)

        sub_path = d.pop("subPath", UNSET)

        volume = cls(
            name=name,
            mount_path=mount_path,
            host=host,
            pvc=pvc,
            ossfs=ossfs,
            read_only=read_only,
            sub_path=sub_path,
        )

        return volume


================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/py.typed
================================================
# Marker file for PEP 561

================================================
FILE: sdks/sandbox/python/src/opensandbox/api/lifecycle/types.py
================================================
#
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Contains some shared types for properties"""

from collections.abc import Mapping, MutableMapping
from http import HTTPStatus
from typing import IO, BinaryIO, Generic, Literal, TypeVar

from attrs import define


class Unset:
    def __bool__(self) -> Literal[False]:
        return False


UNSET: Unset = Unset()

# The types that `httpx.Client(files=)` can accept, copied from that library.
FileContent = IO[bytes] | bytes | str
FileTypes = (
    # (filename, file (or bytes), content_type)
    tuple[str | None, FileContent, str | None]
    # (filename, file (or bytes), content_type, headers)
    | tuple[str | None, FileContent, str | None, Mapping[str, str]]
)
RequestFiles = list[tuple[str, FileTypes]]


@define
class File:
    """Contains information for file uploads"""

    payload: BinaryIO
    file_name: str | None = None
    mime_type: str | None = None

    def to_tuple(self) -> FileTypes:
        """Return a tuple representation that httpx will accept for multipart/form-data"""
        return self.file_name, self.payload, self.mime_type


T = TypeVar("T")


@define
class Response(Generic[T]):
    """A response from an endpoint"""

    status_code: HTTPStatus
    content: bytes
    headers: MutableMapping[str, str]
    parsed: T | None


__all__ = ["UNSET", "File", "FileTypes", "RequestFiles", "Response", "Unset"]


================================================
FILE: sdks/sandbox/python/src/opensandbox/config/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Configuration module for OpenSandbox SDK.
"""

from opensandbox.config.connection import ConnectionConfig
from opensandbox.config.connection_sync import ConnectionConfigSync

__all__ = ["ConnectionConfig", "ConnectionConfigSync"]


================================================
FILE: sdks/sandbox/python/src/opensandbox/config/connection.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Connection configuration for OpenSandbox operations.
"""

import os
from datetime import timedelta

import httpx  # type: ignore[reportMissingImports]
from pydantic import (  # type: ignore[reportMissingImports]
    BaseModel,
    ConfigDict,
    Field,
    PrivateAttr,
    field_validator,
)


class ConnectionConfig(BaseModel):
    """
    Sandbox operations connection configuration.

    Transport lifecycle:
    - If `transport` is NOT provided, the SDK creates a default `httpx.AsyncHTTPTransport`
      per Sandbox/Manager instance. In this case, `Sandbox.close()` / `SandboxManager.close()`
      will close the transport.
    - If `transport` IS provided by the user, the SDK will NOT close it; the user owns it.

    Note:
    - Async transports are generally expected to be used within a single asyncio event loop.
      If your test runner creates multiple event loops (common in pytest-asyncio defaults),
      avoid sharing a single `ConnectionConfig(transport=...)` instance across loops.
    """

    model_config = ConfigDict(arbitrary_types_allowed=True)

    _owns_transport: bool = PrivateAttr(default=True)

    api_key: str | None = Field(
        default=None, description="API key for authentication with sandbox service"
    )
    domain: str | None = Field(
        default=None, description="Base domain for the sandbox management API"
    )
    protocol: str = Field(default="http", description="Protocol to use (http/https)")
    request_timeout: timedelta = Field(
        default=timedelta(seconds=30),
        description="Timeout for HTTP requests to the management API",
    )
    debug: bool = Field(
        default=False, description="Enable debug logging for HTTP requests"
    )
    user_agent: str = Field(
        default="OpenSandbox-Python-SDK/0.1.5", description="User agent string"
    )
    headers: dict[str, str] = Field(
        default_factory=dict, description="User defined headers"
    )
    transport: httpx.AsyncBaseTransport | None = Field(
        default=None,
        description=(
            "Shared httpx transport instance used by all HTTP clients within a "
            "Sandbox/Manager instance. Pass a custom transport (e.g. AsyncHTTPTransport "
            "with custom settings) to control connection pooling, proxies, retries, etc."
        ),
    )
    use_server_proxy: bool = Field(
        default=False,
        description=(
            "Using sandbox server as proxy for process execd requests"
            "It's useful when client sdk can't access the created sandbox directly"
        ),
    )

    # Environment variable names
    _ENV_API_KEY = "OPEN_SANDBOX_API_KEY"
    _ENV_DOMAIN = "OPEN_SANDBOX_DOMAIN"
    _DEFAULT_DOMAIN = "localhost:8080"
    _API_VERSION = "v1"

    def model_post_init(self, __context: object) -> None:
        # If the user explicitly provided `transport`, the SDK must not close it.
        self._owns_transport = "transport" not in self.model_fields_set

    def with_transport_if_missing(self) -> "ConnectionConfig":
        """
        Ensure a transport exists for this SDK resource.

        If `transport` is missing, return a copy with a default transport and
        mark it as SDK-owned. If present, return self unchanged.
        """
        if self.transport is not None:
            return self
        transport = httpx.AsyncHTTPTransport(
            limits=httpx.Limits(
                max_connections=100,
                max_keepalive_connections=20,
                keepalive_expiry=30.0,
            ),
        )
        config = self.model_copy(update={"transport": transport})
        config._owns_transport = True
        return config

    async def close_transport_if_owned(self) -> None:
        """Close the transport only if it was created by default_factory."""
        if self.transport is None or not self._owns_transport:
            return
        try:
            await self.transport.aclose()
        except Exception:
            # Avoid raising during cleanup paths
            pass

    @field_validator("protocol")
    @classmethod
    def protocol_must_be_valid(cls, v: str) -> str:
        v = v.lower()
        if v not in ("http", "https"):
            raise ValueError("Protocol must be 'http' or 'https'")
        return v

    @field_validator("request_timeout")
    @classmethod
    def timeout_must_be_positive(cls, v: timedelta) -> timedelta:
        if v.total_seconds() <= 0:
            raise ValueError(f"Request timeout must be positive, got: {v}")
        return v

    def get_api_key(self) -> str:
        """
        Get API key from config or environment variable.
        Returns:
            API key string (may be empty if not configured)
        Note: An empty API key may cause authentication failures.
        Consider checking if the key is set before making API calls.
        """
        return self.api_key or os.getenv(self._ENV_API_KEY, "")

    def get_domain(self) -> str:
        """Get domain from config or environment variable."""
        return self.domain or os.getenv(self._ENV_DOMAIN, self._DEFAULT_DOMAIN)

    def get_base_url(self) -> str:
        """Get the full base URL for API requests."""
        domain = self.get_domain()
        # Allow domain to override protocol if it explicitly starts with a scheme
        if domain.startswith("http://") or domain.startswith(
            "https://"
        ):
            return f"{domain}/{self._API_VERSION}"
        return f"{self.protocol}://{domain}/{self._API_VERSION}"


================================================
FILE: sdks/sandbox/python/src/opensandbox/config/connection_sync.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous connection configuration for OpenSandbox SDK.

This mirrors ConnectionConfig (async) but uses httpx sync transports.
"""

import os
from datetime import timedelta

import httpx
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, field_validator


class ConnectionConfigSync(BaseModel):
    """
    Synchronous connection configuration shared across all sync SDK HTTP clients.

    Ownership rules:
    - If `transport` is not provided, the SDK creates a default HTTPTransport per
      Sandbox/Manager instance and will close it.
    - If `transport` is provided, the SDK will NOT close it (user owns it).
    """

    model_config = ConfigDict(arbitrary_types_allowed=True)

    _owns_transport: bool = PrivateAttr(default=True)

    api_key: str | None = Field(
        default=None, description="API key for authentication with sandbox service"
    )
    domain: str | None = Field(
        default=None, description="Base domain for the sandbox management API"
    )
    protocol: str = Field(default="http", description="Protocol to use (http/https)")
    request_timeout: timedelta = Field(
        default=timedelta(seconds=30),
        description="Timeout for HTTP requests to the management API",
    )
    debug: bool = Field(default=False, description="Enable debug logging for HTTP requests")
    user_agent: str = Field(
        default="OpenSandbox-Python-SDK/0.1.5", description="User agent string"
    )
    headers: dict[str, str] = Field(default_factory=dict, description="User defined headers")

    transport: httpx.BaseTransport | None = Field(
        default=None,
        description=(
            "Shared httpx transport instance used by all HTTP clients within a "
            "Sandbox/Manager instance. Pass a custom transport (e.g. HTTPTransport "
            "with custom limits/proxies) to control connection pooling, proxies, retries, etc."
        ),
    )
    use_server_proxy: bool = Field(
        default=False,
        description=(
            "Using sandbox server as proxy for process execd requests"
            "It's useful when client sdk can't access the created sandbox directly"
        ),
    )

    _ENV_API_KEY = "OPEN_SANDBOX_API_KEY"
    _ENV_DOMAIN = "OPEN_SANDBOX_DOMAIN"
    _DEFAULT_DOMAIN = "localhost:8080"
    _API_VERSION = "v1"

    def model_post_init(self, __context: object) -> None:
        self._owns_transport = "transport" not in self.model_fields_set

    def with_transport_if_missing(self) -> "ConnectionConfigSync":
        """
        Ensure a transport exists for this SDK resource.

        If `transport` is missing, return a copy with a default transport and
        mark it as SDK-owned. If present, return self unchanged.
        """
        if self.transport is not None:
            return self
        transport = httpx.HTTPTransport(
            limits=httpx.Limits(
                max_connections=100,
                max_keepalive_connections=20,
                keepalive_expiry=30.0,
            ),
        )
        config = self.model_copy(update={"transport": transport})
        config._owns_transport = True
        return config

    def close_transport_if_owned(self) -> None:
        """Close the transport only if it was created by default_factory."""
        if self.transport is None or not self._owns_transport:
            return
        try:
            self.transport.close()
        except Exception:
            pass

    @field_validator("protocol")
    @classmethod
    def protocol_must_be_valid(cls, v: str) -> str:
        v = v.lower()
        if v not in ("http", "https"):
            raise ValueError("Protocol must be 'http' or 'https'")
        return v

    @field_validator("request_timeout")
    @classmethod
    def timeout_must_be_positive(cls, v: timedelta) -> timedelta:
        if v.total_seconds() <= 0:
            raise ValueError(f"Request timeout must be positive, got: {v}")
        return v

    def get_api_key(self) -> str:
        return self.api_key or os.getenv(self._ENV_API_KEY, "")

    def get_domain(self) -> str:
        return self.domain or os.getenv(self._ENV_DOMAIN, self._DEFAULT_DOMAIN)

    def get_base_url(self) -> str:
        domain = self.get_domain()
        if domain.startswith("http://") or domain.startswith("https://"):
            return f"{domain}/{self._API_VERSION}"
        return f"{self.protocol}://{domain}/{self._API_VERSION}"


================================================
FILE: sdks/sandbox/python/src/opensandbox/constants.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Shared constants for the Sandbox SDK.
"""

DEFAULT_EXECD_PORT = 44772
DEFAULT_EGRESS_PORT = 18080


================================================
FILE: sdks/sandbox/python/src/opensandbox/exceptions/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Exception definitions for OpenSandbox SDK.
"""

from opensandbox.exceptions.sandbox import (
    InvalidArgumentException,
    SandboxApiException,
    SandboxError,
    SandboxException,
    SandboxInternalException,
    SandboxReadyTimeoutException,
    SandboxUnhealthyException,
)

__all__ = [
    "SandboxException",
    "SandboxApiException",
    "SandboxInternalException",
    "SandboxUnhealthyException",
    "SandboxReadyTimeoutException",
    "InvalidArgumentException",
    "SandboxError",
]


================================================
FILE: sdks/sandbox/python/src/opensandbox/exceptions/sandbox.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Sandbox-related exception definitions.
"""


class SandboxError:
    """
    Defines standardized common error codes and messages for the Sandbox SDK.
    """

    INTERNAL_UNKNOWN_ERROR = "INTERNAL_UNKNOWN_ERROR"
    READY_TIMEOUT = "READY_TIMEOUT"
    UNHEALTHY = "UNHEALTHY"
    INVALID_ARGUMENT = "INVALID_ARGUMENT"
    UNEXPECTED_RESPONSE = "UNEXPECTED_RESPONSE"

    def __init__(self, code: str, message: str | None = None) -> None:
        self.code = code
        self.message = message

    def __repr__(self) -> str:
        return f"SandboxError(code='{self.code}', message='{self.message}')"


class SandboxException(Exception):
    """
    Base exception class for all sandbox-related errors.

    This is the root exception class that all other sandbox exceptions inherit from.
    It provides a consistent error structure across the SDK.
    """

    def __init__(
        self,
        message: str | None = None,
        cause: Exception | None = None,
        error: SandboxError | None = None,
        request_id: str | None = None,
    ) -> None:
        super().__init__(message)
        self.__cause__ = cause
        self.error = error or SandboxError(SandboxError.INTERNAL_UNKNOWN_ERROR)
        self.request_id = request_id


class SandboxApiException(SandboxException):
    """
    Thrown when the Sandbox API returns an error response (e.g., HTTP 4xx or 5xx)
    or meets unexpected error when calling API.
    """

    def __init__(
        self,
        message: str | None = None,
        cause: Exception | None = None,
        status_code: int | None = None,
        error: SandboxError | None = None,
        request_id: str | None = None,
    ) -> None:
        super().__init__(
            message,
            cause,
            error or SandboxError(SandboxError.UNEXPECTED_RESPONSE),
            request_id=request_id,
        )
        self.status_code = status_code


class SandboxInternalException(SandboxException):
    """
    Thrown when an unexpected internal error occurs within the SDK.
    """

    def __init__(
        self,
        message: str | None = None,
        cause: Exception | None = None,
    ) -> None:
        super().__init__(
            message, cause, SandboxError(SandboxError.INTERNAL_UNKNOWN_ERROR)
        )


class SandboxUnhealthyException(SandboxException):
    """
    Thrown when the sandbox is determined to be unhealthy.
    """

    def __init__(
        self,
        message: str | None = None,
        cause: Exception | None = None,
    ) -> None:
        super().__init__(message, cause, SandboxError(SandboxError.UNHEALTHY, message))


class SandboxReadyTimeoutException(SandboxException):
    """
    Thrown when the operation times out waiting for the sandbox to become ready.
    """

    def __init__(
        self,
        message: str | None = None,
        cause: Exception | None = None,
    ) -> None:
        super().__init__(
            message, cause, SandboxError(SandboxError.READY_TIMEOUT, message)
        )


class InvalidArgumentException(SandboxException):
    """
    Thrown when an invalid argument is provided to an SDK method.
    Similar to ValueError but within the SDK's exception hierarchy.
    """

    def __init__(
        self,
        message: str | None = None,
        cause: Exception | None = None,
    ) -> None:
        super().__init__(
            message, cause, SandboxError(SandboxError.INVALID_ARGUMENT, message)
        )


================================================
FILE: sdks/sandbox/python/src/opensandbox/manager.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Sandbox management interface for administrative operations.

This module provides a centralized interface for managing sandbox instances,
enabling administrative operations and sandbox discovery following the Kotlin SDK pattern.
"""

import logging
from datetime import datetime, timedelta, timezone

from opensandbox.adapters.factory import AdapterFactory
from opensandbox.config import ConnectionConfig
from opensandbox.models.sandboxes import (
    PagedSandboxInfos,
    SandboxFilter,
    SandboxInfo,
    SandboxRenewResponse,
)
from opensandbox.services.sandbox import Sandboxes

logger = logging.getLogger(__name__)


class SandboxManager:
    """
    Sandbox management interface for administrative operations and monitoring sandbox instances.

    This class provides a centralized interface for managing sandbox instances,
    enabling administrative operations and sandbox discovery.
    It focuses on high-level management operations rather than individual sandbox interactions.

    Key Features:

    - **Sandbox Discovery**: List and filter sandbox instances by various criteria
    - **Administrative Operations**: Individual sandbox management operations
    - **Connection Pool Management**: Efficient HTTP client reuse for multiple operations

    Usage Example:

    ```python
    # Create manager
    manager = await SandboxManager.create(connection_config=connection_config)

    # List all running sandboxes
    running_sandboxes = await manager.list_sandbox_infos(
        SandboxFilter(states=["RUNNING"])
    )

    # Individual operations
    sandbox_id = "sandbox-id"
    await manager.get_sandbox_info(sandbox_id)
    await manager.pause_sandbox(sandbox_id)
    await manager.resume_sandbox(sandbox_id)
    await manager.kill_sandbox(sandbox_id)

    # Cleanup
    await manager.close()
    ```

    **Note**: This class is designed for administrative operations.
    For individual sandbox interactions, use the Sandbox class directly.
    """

    def __init__(
        self,
        sandbox_service: Sandboxes,
        connection_config: ConnectionConfig,
    ) -> None:
        """
        Internal constructor for SandboxManager.

        Note: Use SandboxManager.create() instead.

        Args:
            sandbox_service: Service for sandbox operations
            connection_config: Connection configuration (shared transport, headers, timeouts)
        """
        self._sandbox_service = sandbox_service
        self._connection_config = connection_config

    @property
    def connection_config(self) -> ConnectionConfig:
        """Provides access to the connection configuration (including shared transport)."""
        return self._connection_config

    @classmethod
    async def create(
        cls, connection_config: ConnectionConfig | None = None
    ) -> "SandboxManager":
        """
        Creates a SandboxManager instance with the provided configuration.

        Args:
            connection_config: Connection configuration for the manager.
                             If None, default configuration will be used.

        Returns:
            SandboxManager: Configured sandbox manager instance
        """
        config = (connection_config or ConnectionConfig()).with_transport_if_missing()
        factory = AdapterFactory(config)
        sandbox_service = factory.create_sandbox_service()
        return cls(sandbox_service, config)

    async def list_sandbox_infos(self, filter: SandboxFilter) -> PagedSandboxInfos:
        """
        List sandboxes with filtering options.

        Args:
            filter: Filter criteria for sandbox listing

        Returns:
            Paged sandbox information matching the filter criteria

        Raises:
            SandboxException: if the operation fails
        """
        return await self._sandbox_service.list_sandboxes(filter)

    async def get_sandbox_info(self, sandbox_id: str) -> SandboxInfo:
        """
        Get information for a single sandbox by its ID.

        Args:
            sandbox_id: Sandbox ID to retrieve information for

        Returns:
            SandboxInfo for the specified sandbox

        Raises:
            SandboxException: if the operation fails
        """
        logger.debug(f"Getting info for sandbox: {sandbox_id}")
        return await self._sandbox_service.get_sandbox_info(sandbox_id)

    async def kill_sandbox(self, sandbox_id: str) -> None:
        """
        Terminate a single sandbox.

        Args:
            sandbox_id: Sandbox ID to terminate

        Raises:
            SandboxException: if the operation fails
        """
        logger.info(f"Terminating sandbox: {sandbox_id}")
        await self._sandbox_service.kill_sandbox(sandbox_id)
        logger.info(f"Successfully terminated sandbox: {sandbox_id}")

    async def renew_sandbox(self, sandbox_id: str, timeout: timedelta) -> SandboxRenewResponse:
        """
        Renew expiration time for a single sandbox.

        The new expiration time will be set to the current time plus the provided duration.

        Args:
            sandbox_id: Sandbox ID to renew
            timeout: Duration to add to the current time to set the new expiration

        Raises:
            SandboxException: if the operation fails
        """
        # Use timezone-aware UTC datetime to avoid cross-timezone ambiguity.
        new_expiration = datetime.now(timezone.utc) + timeout
        logger.info(f"Renew expiration for sandbox {sandbox_id} to {new_expiration}")
        return await self._sandbox_service.renew_sandbox_expiration(
            sandbox_id, new_expiration
        )

    async def pause_sandbox(self, sandbox_id: str) -> None:
        """
        Pause a single sandbox while preserving its state.

        Args:
            sandbox_id: Sandbox ID to pause

        Raises:
            SandboxException: if the operation fails
        """
        logger.info(f"Pausing sandbox: {sandbox_id}")
        await self._sandbox_service.pause_sandbox(sandbox_id)

    async def resume_sandbox(self, sandbox_id: str) -> None:
        """
        Resume a previously paused sandbox.

        Args:
            sandbox_id: Sandbox ID to resume

        Raises:
            SandboxException: if the operation fails
        """
        logger.info(f"Resuming sandbox: {sandbox_id}")
        await self._sandbox_service.resume_sandbox(sandbox_id)

    async def close(self) -> None:
        """
        Close local resources associated with this sandbox manager.

        This method closes HTTP client resources and other local resources.

        Note: This method logs errors but does not raise exceptions to avoid
        issues in context manager cleanup.
        """
        try:
            # Close transport only when SDK owns it (default transport).
            await self._connection_config.close_transport_if_owned()
        except Exception as e:
            logger.warning(
                f"Error closing resources for sandbox manager: {e}",
                exc_info=True
            )

    async def __aenter__(self) -> "SandboxManager":
        """Async context manager entry."""
        return self

    async def __aexit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: object,
    ) -> None:
        """Async context manager exit."""
        await self.close()


================================================
FILE: sdks/sandbox/python/src/opensandbox/models/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
OpenSandbox data models.

Core Pydantic models for sandbox operations.
"""

from opensandbox.models.execd import (
    CommandLogs,
    CommandStatus,
    Execution,
    ExecutionComplete,
    ExecutionError,
    ExecutionInit,
    ExecutionLogs,
    ExecutionResult,
    OutputMessage,
)
from opensandbox.models.filesystem import (
    ContentReplaceEntry,
    EntryInfo,
    MoveEntry,
    SearchEntry,
    SetPermissionEntry,
    WriteEntry,
)
from opensandbox.models.sandboxes import (
    PVC,
    Host,
    NetworkPolicy,
    NetworkRule,
    PagedSandboxInfos,
    PaginationInfo,
    SandboxCreateResponse,
    SandboxEndpoint,
    SandboxFilter,
    SandboxImageAuth,
    SandboxImageSpec,
    SandboxInfo,
    SandboxMetrics,
    SandboxState,
    SandboxStatus,
    Volume,
)

__all__ = [
    # Execution models
    "Execution",
    "ExecutionLogs",
    "OutputMessage",
    "ExecutionResult",
    "ExecutionError",
    "ExecutionComplete",
    "ExecutionInit",
    "CommandStatus",
    "CommandLogs",
    # Filesystem models
    "EntryInfo",
    "WriteEntry",
    "MoveEntry",
    "SetPermissionEntry",
    "ContentReplaceEntry",
    "SearchEntry",
    # Sandbox models
    "SandboxInfo",
    "SandboxStatus",
    "SandboxState",
    "NetworkPolicy",
    "NetworkRule",
    "SandboxCreateResponse",
    "SandboxEndpoint",
    "SandboxImageSpec",
    "SandboxImageAuth",
    "SandboxFilter",
    "SandboxMetrics",
    "PagedSandboxInfos",
    "PaginationInfo",
    # Volume models
    "Volume",
    "Host",
    "PVC",
]


================================================
FILE: sdks/sandbox/python/src/opensandbox/models/execd.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Execution-related data models.

Models for code execution, results, and output handling.
"""

from collections.abc import Awaitable, Callable
from datetime import datetime, timedelta
from typing import Any

from pydantic import BaseModel, ConfigDict, Field, model_validator


class OutputMessage(BaseModel):
    """
    Output message from code execution.

    Represents a single output message from either stdout or stderr streams
    during code execution, including timing information.
    """

    text: str = Field(description="The text content of the output message")
    timestamp: int = Field(
        description="Unix timestamp in milliseconds when message was generated"
    )
    is_error: bool = Field(
        default=False, description="True if message came from stderr"
    )

    model_config = ConfigDict(populate_by_name=True)


class ExecutionResult(BaseModel):
    """
    Result of code execution.

    Represents a single output result from code execution, which may include
    text content, formatting information, and timing data.
    """

    text: str | None = Field(default=None, description="UTF-8 encoded text content")
    timestamp: int = Field(
        description="Unix timestamp in milliseconds when result was created"
    )
    extra_properties: dict[str, str] = Field(
        default_factory=dict,
        description="Additional result content in UTF-8 format",
        alias="extra_properties",
    )

    model_config = ConfigDict(populate_by_name=True)


class ExecutionError(BaseModel):
    """
    Error information when code execution fails.

    Contains detailed error information following standard error reporting format,
    including error type, message, timing, and stack trace for debugging purposes.
    """

    name: str = Field(
        description="Error name/type (e.g., 'SyntaxError', 'RuntimeError')"
    )
    value: str = Field(description="Error message explaining what went wrong")
    timestamp: int = Field(
        description="Unix timestamp in milliseconds when error occurred"
    )
    traceback: list[str] = Field(default_factory=list, description="Stack trace lines")

    model_config = ConfigDict(populate_by_name=True)


class ExecutionLogs(BaseModel):
    """
    Container for execution output logs.

    Separates standard output and error output streams for better organization
    and allows users to process different types of output appropriately.
    """

    stdout: list["OutputMessage"] = Field(
        default_factory=list, description="Standard output messages"
    )
    stderr: list["OutputMessage"] = Field(
        default_factory=list, description="Standard error messages"
    )

    def add_stdout(self, message: OutputMessage) -> None:
        """Add a message to standard output log."""
        self.stdout.append(message)

    def add_stderr(self, message: OutputMessage) -> None:
        """Add a message to standard error log."""
        self.stderr.append(message)


class ExecutionComplete(BaseModel):
    """
    Execution completion event.

    Represents the completion of a code execution,
    including timing information about when the execution finished.
    """

    timestamp: int = Field(description="Unix timestamp when execution completed")
    execution_time_in_millis: int = Field(
        description="Execution time in milliseconds", alias="execution_time_in_millis"
    )

    model_config = ConfigDict(populate_by_name=True)


class ExecutionInit(BaseModel):
    """
    Execution initialization event.

    Represents the initialization of a code execution.
    """

    id: str = Field(description="Execution identifier")
    timestamp: int = Field(description="Unix timestamp when execution started")

    model_config = ConfigDict(populate_by_name=True)


class Execution(BaseModel):
    """
    Complete code execution session.

    This is the main model that tracks the entire lifecycle of code execution,
    including results, errors, and output logs. It serves as the central container
    for all execution-related data that is exposed to users.
    """

    id: str | None = Field(default=None, description="Unique execution identifier")
    execution_count: int | None = Field(
        default=None,
        description="Sequential execution counter",
        alias="execution_count",
    )
    result: list["ExecutionResult"] = Field(
        default_factory=list, description="Execution results"
    )
    error: ExecutionError | None = Field(
        default=None, description="Error information if failed"
    )
    logs: ExecutionLogs = Field(
        default_factory=ExecutionLogs, description="Output logs"
    )

    def add_result(self, result: ExecutionResult) -> None:
        """Add a new execution result."""
        self.result.append(result)

    @property
    def text(self) -> str:
        """Return combined stdout and result text.

        Includes both stdout log messages and execution results,
        stripping trailing newlines from each chunk to avoid double
        line breaks when messages already contain trailing newlines
        (e.g. code-interpreter streaming output).
        """
        chunks: list[str] = []

        for msg in self.logs.stdout:
            chunks.append(msg.text.rstrip("\n"))

        for res in self.result:
            if res.text:
                chunks.append(res.text.rstrip("\n"))

        return "\n".join(chunks)

    def __str__(self) -> str:
        """Return a human-readable summary of the execution."""
        parts: list[str] = []

        if self.logs.stdout or self.result:
            parts.append(self.text)

        if self.logs.stderr:
            stderr_text = "\n".join(msg.text.rstrip("\n") for msg in self.logs.stderr)
            parts.append(f"[stderr]\n{stderr_text}")

        if self.error:
            parts.append(f"[error] {self.error.name}: {self.error.value}")

        return "\n".join(parts)

    model_config = ConfigDict(populate_by_name=True)


# Type aliases for async handlers
AsyncOutputHandler = Callable[[Any], Awaitable[None]]


class ExecutionHandlers(BaseModel):
    """
    Async handlers for code execution output processing.

    Provides optional async callback handlers for different types of execution events.
    All handlers are async functions that will be awaited when events occur.

    Example:
        ```python
        async def handle_stdout(msg: OutputMessage):
            print(f"Output: {msg.text}")
            # Can perform async operations
            await log_to_database(msg.text)

        handlers = ExecutionHandlers(
            on_stdout=handle_stdout,
            on_stderr=lambda msg: print(f"Error: {msg.text}"),
        )
        ```
    """

    on_stdout: AsyncOutputHandler | None = Field(
        default=None, description="Async handler for stdout messages"
    )
    on_stderr: AsyncOutputHandler | None = Field(
        default=None, description="Async handler for stderr messages"
    )
    on_result: AsyncOutputHandler | None = Field(
        default=None, description="Async handler for execution results"
    )
    on_execution_complete: AsyncOutputHandler | None = Field(
        default=None,
        description="Async handler for execution completion",
        alias="on_execution_complete",
    )
    on_error: AsyncOutputHandler | None = Field(
        default=None, description="Async handler for execution errors"
    )
    on_init: AsyncOutputHandler | None = Field(
        default=None, description="Async handler for execution init"
    )

    model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True)


class RunCommandOpts(BaseModel):
    """
    Parameters for command execution.
    """

    background: bool = Field(
        default=False, description="Whether to run in background (detached)"
    )
    working_directory: str | None = Field(
        default=None,
        description="Directory to execute command in",
        alias="working_directory",
    )
    timeout: timedelta | None = Field(
        default=None,
        description="Maximum execution time; server will terminate the command when reached. If omitted, the server will not enforce any timeout.",
    )
    uid: int | None = Field(
        default=None,
        ge=0,
        description="Unix user ID used to run the command process.",
    )
    gid: int | None = Field(
        default=None,
        ge=0,
        description="Unix group ID used to run the command process. Requires uid to be set.",
    )
    envs: dict[str, str] | None = Field(
        default=None,
        description="Environment variables injected into the command process.",
    )

    @model_validator(mode="after")
    def validate_uid_gid_dependency(self) -> "RunCommandOpts":
        """Ensure gid is not used without uid to match server contract."""
        if self.gid is not None and self.uid is None:
            raise ValueError("uid is required when gid is provided")
        return self

    model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True)


class CommandStatus(BaseModel):
    """
    Command execution status for foreground/background commands.
    """

    id: str | None = Field(default=None, description="Command ID")
    content: str | None = Field(default=None, description="Original command content")
    running: bool | None = Field(
        default=None, description="True if command is still running"
    )
    exit_code: int | None = Field(
        default=None, description="Exit code if the command has finished"
    )
    error: str | None = Field(
        default=None, description="Error message if the command failed"
    )
    started_at: datetime | None = Field(
        default=None, description="Command start time (RFC3339)", alias="started_at"
    )
    finished_at: datetime | None = Field(
        default=None, description="Command finish time (RFC3339)", alias="finished_at"
    )

    model_config = ConfigDict(populate_by_name=True)


class CommandLogs(BaseModel):
    """
    Background command logs with optional tail cursor for incremental reads.
    """

    content: str = Field(description="Raw stdout/stderr content")
    cursor: int | None = Field(
        default=None,
        description="Latest tail cursor for incremental reads",
    )


================================================
FILE: sdks/sandbox/python/src/opensandbox/models/execd_sync.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous execution-related models.

This mirrors `opensandbox.models.execd` but uses synchronous handlers.
Core data models (Execution, OutputMessage, etc.) are reused from the async module.
"""

from collections.abc import Callable
from typing import Any

from pydantic import BaseModel, ConfigDict, Field

SyncOutputHandler = Callable[[Any], None]


class ExecutionHandlersSync(BaseModel):
    """
    Synchronous handlers for streaming execution output.
    """

    on_stdout: SyncOutputHandler | None = Field(default=None)
    on_stderr: SyncOutputHandler | None = Field(default=None)
    on_result: SyncOutputHandler | None = Field(default=None)
    on_execution_complete: SyncOutputHandler | None = Field(default=None, alias="on_execution_complete")
    on_error: SyncOutputHandler | None = Field(default=None)
    on_init: SyncOutputHandler | None = Field(default=None)

    model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True)


================================================
FILE: sdks/sandbox/python/src/opensandbox/models/filesystem.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Filesystem-related data models.

Models for file operations, directory listings, and filesystem metadata.
"""

from datetime import datetime
from io import IOBase

from pydantic import BaseModel, ConfigDict, Field, field_validator


class EntryInfo(BaseModel):
    """
    Metadata information for a file or directory entry.

    Contains complete filesystem metadata including path, permissions, ownership,
    size, and timestamp information for files and directories in the sandbox.
    """

    path: str = Field(description="Absolute path of the file or directory")
    mode: int = Field(description="Unix file mode/permissions as integer (e.g., 644)")
    owner: str = Field(description="Owner username of the file or directory")
    group: str = Field(description="Group name of the file or directory")
    size: int = Field(description="Size of the file in bytes (0 for directories)")
    modified_at: datetime = Field(
        description="Timestamp when entry was last modified", alias="modified_at"
    )
    created_at: datetime = Field(
        description="Timestamp when entry was created", alias="created_at"
    )

    model_config = ConfigDict(populate_by_name=True)


class WriteEntry(BaseModel):
    """
    Request to write content to a file.

    Creates or overwrites a file with the specified content, permissions, and ownership.
    Supports both text and binary data through flexible data parameter.
    """

    path: str = Field(description="Destination file path where content will be written")
    data: str | bytes | IOBase | None = Field(
        default=None, description="Content to write - can be str or bytes"
    )
    mode: int = Field(default=755, description="Unix file permissions as integer")
    owner: str | None = Field(default=None, description="Owner username to set")
    group: str | None = Field(default=None, description="Group name to set")
    encoding: str = Field(
        default="utf-8", description="Character encoding for string data"
    )
    model_config = ConfigDict(arbitrary_types_allowed=True)

    @field_validator("path")
    @classmethod
    def path_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Path cannot be blank")
        return v

    @field_validator("mode")
    @classmethod
    def mode_must_be_non_negative(cls, v: int) -> int:
        if v < 0:
            raise ValueError("Mode must be non-negative")
        return v

    @field_validator("encoding")
    @classmethod
    def encoding_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Encoding cannot be blank")
        return v


class MoveEntry(BaseModel):
    """
    Request to move/rename a file or directory.

    Moves a file or directory from one location to another within the sandbox filesystem.
    Can be used for both renaming (same directory) and moving (different directory).
    """

    src: str = Field(
        description="Source path of the file or directory to move", alias="source"
    )
    dest: str = Field(
        description="Destination path where the file or directory should be moved",
        alias="destination",
    )

    @field_validator("src")
    @classmethod
    def src_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Source path cannot be blank")
        return v

    @field_validator("dest")
    @classmethod
    def dest_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Destination path cannot be blank")
        return v

    model_config = ConfigDict(populate_by_name=True)


class SetPermissionEntry(BaseModel):
    """
    Request to set permissions/ownership of a file or directory.

    Updates the permissions and/or ownership of an existing file or directory
    without modifying its content. Only specified properties will be changed.
    """

    path: str = Field(description="Target path of the file or directory to modify")
    owner: str | None = Field(default=None, description="New owner username")
    group: str | None = Field(default=None, description="New group name")
    mode: int = Field(default=755, description="New Unix file permissions as integer")

    @field_validator("path")
    @classmethod
    def path_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Path cannot be blank")
        return v

    @field_validator("mode")
    @classmethod
    def mode_must_be_non_negative(cls, v: int) -> int:
        if v < 0:
            raise ValueError("Mode must be non-negative")
        return v


class ContentReplaceEntry(BaseModel):
    """
    Request to replace content within a file.

    Performs string replacement within a file by finding exact matches of the old content
    and replacing them with new content. Only affects string matches, preserving the rest.
    """

    path: str = Field(description="Target file path containing content to replace")
    old_content: str = Field(
        description="Exact string content to find and replace", alias="old_content"
    )
    new_content: str = Field(
        description="Replacement string content to substitute", alias="new_content"
    )

    @field_validator("path")
    @classmethod
    def path_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Path cannot be blank")
        return v

    model_config = ConfigDict(populate_by_name=True)


class SearchEntry(BaseModel):
    """
    Request to search for files matching a pattern.

    Searches the filesystem starting from the specified path to find files
    that match the given pattern. Used for file discovery and filtering.
    """

    path: str = Field(description="Starting directory path for the search")
    pattern: str = Field(
        description="Search pattern (supports glob patterns like *.py, *.txt)"
    )

    @field_validator("path")
    @classmethod
    def path_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Path cannot be blank")
        return v

    @field_validator("pattern")
    @classmethod
    def pattern_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Pattern cannot be blank")
        return v


================================================
FILE: sdks/sandbox/python/src/opensandbox/models/sandboxes.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Sandbox-related data models.

Models for sandbox creation, configuration, status, and lifecycle management.
"""

from datetime import datetime
from typing import Literal

from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator


class SandboxImageAuth(BaseModel):
    """
    Authentication credentials for container registries.
    """

    username: str = Field(description="Registry username")
    password: str = Field(description="Registry password or access token")

    @field_validator("username")
    @classmethod
    def username_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Username cannot be blank")
        return v

    @field_validator("password")
    @classmethod
    def password_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Password cannot be blank")
        return v


class SandboxImageSpec(BaseModel):
    """
    Specification for a sandbox container image.

    Usage:
        # Simple creation with just image
        spec = SandboxImageSpec("python:3.11")

        # With private registry auth
        spec = SandboxImageSpec(
            "my-registry.com/image:tag",
            auth=SandboxImageAuth(username="user", password="pass")
        )
    """

    image: str = Field(
        description="Image reference (e.g., 'ubuntu:22.04', 'python:3.11')"
    )
    auth: SandboxImageAuth | None = Field(
        default=None, description="Authentication for private registries"
    )

    def __init__(
        self, image: str | None = None, *, auth: SandboxImageAuth | None = None, **data: object
    ) -> None:
        """
        Initialize SandboxImageSpec.

        Args:
            image: Container image reference (positional or keyword)
            auth: Optional authentication for private registries
        """
        if image is not None:
            data["image"] = image
        if auth is not None:
            data["auth"] = auth
        super().__init__(**data)

    @field_validator("image")
    @classmethod
    def image_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Image cannot be blank")
        return v


class NetworkRule(BaseModel):
    """
    Egress rule for matching network targets.
    """

    action: Literal["allow", "deny"] = Field(
        description='Whether to allow or deny matching targets. One of "allow" or "deny".'
    )
    target: str = Field(
        description='FQDN or wildcard domain (e.g., "example.com", "*.example.com").'
    )

    @field_validator("target")
    @classmethod
    def target_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Network rule target cannot be blank")
        return v


class NetworkPolicy(BaseModel):
    """
    Egress network policy matching the sidecar `/policy` request body.
    """

    default_action: Literal["allow", "deny"] | None = Field(
        default="deny",
        description='Default action when no rule matches. Defaults to "deny".',
        alias="defaultAction",
    )
    egress: list[NetworkRule] | None = Field(
        default=None,
        description="List of egress rules evaluated in order.",
    )

    model_config = ConfigDict(populate_by_name=True)


# ============================================================================
# Volume Models
# ============================================================================


class Host(BaseModel):
    """
    Host path bind mount backend.

    Maps a directory on the host filesystem into the container.
    Only available when the runtime supports host mounts.
    """

    path: str = Field(
        description="Absolute path on the host filesystem to mount."
    )

    @field_validator("path")
    @classmethod
    def path_must_be_absolute(cls, v: str) -> str:
        if not v.startswith("/"):
            raise ValueError("Host path must be an absolute path starting with '/'")
        return v


class PVC(BaseModel):
    """
    Kubernetes PersistentVolumeClaim mount backend.

    References an existing PVC in the same namespace as the sandbox pod.
    Only available in Kubernetes runtime.
    """

    claim_name: str = Field(
        description="Name of the PersistentVolumeClaim in the same namespace.",
        alias="claimName",
    )

    model_config = ConfigDict(populate_by_name=True)

    @field_validator("claim_name")
    @classmethod
    def claim_name_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("PVC claim name cannot be blank")
        return v


class OSSFS(BaseModel):
    """Alibaba Cloud OSS mount backend via ossfs."""

    bucket: str = Field(description="OSS bucket name.")
    endpoint: str = Field(description="OSS endpoint (e.g., oss-cn-hangzhou.aliyuncs.com).")
    version: Literal["1.0", "2.0"] = Field(
        default="2.0",
        description="ossfs major version used by runtime mount integration.",
    )
    options: list[str] | None = Field(
        default=None,
        description="Additional ossfs mount options.",
    )
    access_key_id: str | None = Field(
        default=None,
        alias="accessKeyId",
        description="OSS access key ID for inline credentials mode.",
    )
    access_key_secret: str | None = Field(
        default=None,
        alias="accessKeySecret",
        description="OSS access key secret for inline credentials mode.",
    )
    model_config = ConfigDict(populate_by_name=True)

    @model_validator(mode="after")
    def validate_inline_credentials(self) -> "OSSFS":
        if not self.access_key_id or not self.access_key_secret:
            raise ValueError(
                "OSSFS inline credentials are required: accessKeyId and accessKeySecret."
            )
        return self


class Volume(BaseModel):
    """
    Storage mount definition for a sandbox.

    Each volume entry contains:
    - A unique name identifier
    - Exactly one backend (host, pvc, ossfs) with backend-specific fields
    - Common mount settings (mount_path, read_only, sub_path)

    Usage:
        # Host path mount (read-write by default)
        volume = Volume(
            name="workdir",
            host=Host(path="/data/opensandbox"),
            mount_path="/mnt/work",
        )

        # PVC mount (read-only)
        volume = Volume(
            name="models",
            pvc=PVC(claim_name="shared-models-pvc"),
            mount_path="/mnt/models",
            read_only=True,
        )
    """

    name: str = Field(
        description="Unique identifier for the volume within the sandbox."
    )
    host: Host | None = Field(
        default=None,
        description="Host path bind mount backend.",
    )
    pvc: PVC | None = Field(
        default=None,
        description="Kubernetes PersistentVolumeClaim mount backend.",
    )
    ossfs: OSSFS | None = Field(
        default=None,
        description="OSSFS mount backend.",
    )
    mount_path: str = Field(
        description="Absolute path inside the container where the volume is mounted.",
        alias="mountPath",
    )
    read_only: bool = Field(
        default=False,
        description="If true, the volume is mounted as read-only. Defaults to false (read-write).",
        alias="readOnly",
    )
    sub_path: str | None = Field(
        default=None,
        description="Optional subdirectory under the backend path to mount.",
        alias="subPath",
    )

    model_config = ConfigDict(populate_by_name=True)

    @field_validator("name")
    @classmethod
    def name_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("Volume name cannot be blank")
        return v

    @field_validator("mount_path")
    @classmethod
    def mount_path_must_be_absolute(cls, v: str) -> str:
        if not v.startswith("/"):
            raise ValueError("Mount path must be an absolute path starting with '/'")
        return v

    @model_validator(mode="after")
    def validate_exactly_one_backend(self) -> "Volume":
        """Ensure exactly one backend (host, pvc, or ossfs) is specified."""
        backends = [self.host, self.pvc, self.ossfs]
        specified = [b for b in backends if b is not None]
        if len(specified) == 0:
            raise ValueError(
                "Exactly one backend (host, pvc, ossfs) must be specified, but none was provided."
            )
        if len(specified) > 1:
            raise ValueError(
                "Exactly one backend (host, pvc, ossfs) must be specified, but multiple were provided."
            )
        return self


class SandboxStatus(BaseModel):
    """
    Status information for a sandbox.
    """

    state: str = Field(
        description="Current state (e.g., RUNNING, PENDING, PAUSED, TERMINATED)"
    )
    reason: str | None = Field(
        default=None, description="Short reason code for current state"
    )
    message: str | None = Field(
        default=None, description="Human-readable status message"
    )
    last_transition_at: datetime | None = Field(
        default=None,
        description="Timestamp of last state transition",
        alias="last_transition_at",
    )

    model_config = ConfigDict(populate_by_name=True)


class SandboxInfo(BaseModel):
    """
    Detailed information about a sandbox instance.
    """

    id: str = Field(description="Unique identifier of the sandbox")
    status: SandboxStatus = Field(description="Current status of the sandbox")
    entrypoint: list[str] = Field(
        description="Command line arguments used to start the sandbox"
    )
    expires_at: datetime | None = Field(
        default=None,
        description="Scheduled termination timestamp. Null means manual cleanup mode.",
        alias="expires_at",
    )
    created_at: datetime = Field(description="Creation timestamp", alias="created_at")
    image: SandboxImageSpec | None = Field(
        default=None, description="Image specification used to create sandbox"
    )
    metadata: dict[str, str] | None = Field(default=None, description="Custom metadata")

    model_config = ConfigDict(populate_by_name=True)


class SandboxCreateResponse(BaseModel):
    """
    Response returned when a sandbox is created.
    """

    id: str = Field(description="Unique identifier of the newly created sandbox")


class SandboxRenewResponse(BaseModel):
    """
    Response returned when renewing a sandbox expiration time.
    """

    expires_at: datetime = Field(
        description="The new absolute expiration time in UTC (RFC 3339 format).",
        alias="expires_at",
    )

    model_config = ConfigDict(populate_by_name=True)


class SandboxEndpoint(BaseModel):
    """
    Connection endpoint information for a sandbox.
    """

    endpoint: str = Field(description="Sandbox connection endpoint")
    headers: dict[str, str] = Field(
        default_factory=dict,
        description="Headers that must be included on every request targeting this endpoint (e.g. when the server requires them for routing or auth). Empty if not required.",
    )


class PaginationInfo(BaseModel):
    """
    Pagination metadata.
    """

    page: int = Field(description="Current page number (0-indexed)")
    page_size: int = Field(description="Number of items per page", alias="page_size")
    total_items: int = Field(
        description="Total number of items across all pages", alias="total_items"
    )
    total_pages: int = Field(description="Total number of pages", alias="total_pages")
    has_next_page: bool = Field(
        description="True if there is a next page available", alias="has_next_page"
    )

    model_config = ConfigDict(populate_by_name=True)


class PagedSandboxInfos(BaseModel):
    """
    A paginated list of sandbox information.
    """

    sandbox_infos: list[SandboxInfo] = Field(
        description="List of sandbox details for current page", alias="sandbox_infos"
    )
    pagination: PaginationInfo = Field(description="Pagination metadata")

    model_config = ConfigDict(populate_by_name=True)


class SandboxFilter(BaseModel):
    """
    Filter criteria for listing sandboxes.
    """

    states: list[str] | None = Field(
        default=None, description="Filter by sandbox states"
    )
    metadata: dict[str, str] | None = Field(
        default=None, description="Filter by metadata key-value pairs"
    )
    page_size: int | None = Field(
        default=None, description="Number of items per page", alias="page_size"
    )
    page: int | None = Field(default=None, description="Page number (0-indexed)")

    @field_validator("page_size")
    @classmethod
    def page_size_must_be_positive(cls, v: int | None) -> int | None:
        if v is not None and v <= 0:
            raise ValueError("Page size must be positive")
        return v

    @field_validator("page")
    @classmethod
    def page_must_be_non_negative(cls, v: int | None) -> int | None:
        if v is not None and v < 0:
            raise ValueError("Page must be non-negative")
        return v

    model_config = ConfigDict(populate_by_name=True)


class SandboxMetrics(BaseModel):
    """
    Real-time resource usage metrics for a sandbox.
    """

    cpu_count: float = Field(
        description="Number of CPU cores available/allocated", alias="cpu_count"
    )
    cpu_used_percentage: float = Field(
        description="Current CPU usage as percentage (0.0 - 100.0)",
        alias="cpu_used_percentage",
    )
    memory_total_in_mib: float = Field(
        description="Total memory available in Mebibytes", alias="memory_total_in_mib"
    )
    memory_used_in_mib: float = Field(
        description="Memory currently used in Mebibytes", alias="memory_used_in_mib"
    )
    timestamp: int = Field(
        description="Timestamp of metric collection (Unix epoch milliseconds)"
    )

    model_config = ConfigDict(populate_by_name=True)


class SandboxState:
    """High-level lifecycle state of the sandbox.

    This class provides constant string values for sandbox states.
    Note that the sandbox service may introduce new states in future
    versions; clients should handle unknown string values gracefully.

    Common States:
        PENDING (str): Sandbox is being provisioned.
        RUNNING (str): Sandbox is running and ready to accept requests.
        PAUSING (str): Sandbox is in the process of pausing.
        PAUSED (str): Sandbox has been paused while retaining its state.
        STOPPING (str): Sandbox is being terminated.
        TERMINATED (str): Sandbox has been successfully terminated.
        FAILED (str): Sandbox encountered a critical error.
        UNKNOWN (str): State is unknown or unsupported by the current version.

    State Transitions:
        - Pending -> Running: After creation completes.
        - Running -> Pausing: When pause is requested.
        - Pausing -> Paused: After pause operation completes.
        - Paused -> Running: When resume is requested.
        - Running/Paused -> Stopping: When kill is requested or TTL expires.
        - Stopping -> Terminated: After kill/timeout operation completes.
        - Pending/Running/Paused -> Failed: On critical error.
    """

    PENDING = "Pending"
    RUNNING = "Running"
    PAUSING = "Pausing"
    PAUSED = "Paused"
    STOPPING = "Stopping"
    TERMINATED = "Terminated"
    FAILED = "Failed"
    UNKNOWN = "Unknown"

    @classmethod
    def values(cls) -> set[str]:
        """Returns a set of all known state values."""
        return {
            v for k, v in cls.__dict__.items()
            if k.isupper() and not k.startswith("_")
        }


================================================
FILE: sdks/sandbox/python/src/opensandbox/py.typed
================================================


================================================
FILE: sdks/sandbox/python/src/opensandbox/sandbox.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Main Sandbox client implementation.
"""

import asyncio
import logging
import time
from collections.abc import Awaitable, Callable
from datetime import datetime, timedelta, timezone
from typing import Any

from opensandbox.adapters.factory import AdapterFactory
from opensandbox.config import ConnectionConfig
from opensandbox.constants import DEFAULT_EGRESS_PORT, DEFAULT_EXECD_PORT
from opensandbox.exceptions import (
    InvalidArgumentException,
    SandboxException,
    SandboxInternalException,
    SandboxReadyTimeoutException,
)
from opensandbox.models.sandboxes import (
    NetworkPolicy,
    NetworkRule,
    SandboxEndpoint,
    SandboxImageSpec,
    SandboxInfo,
    SandboxMetrics,
    SandboxRenewResponse,
    Volume,
)
from opensandbox.services import (
    Commands,
    Egress,
    Filesystem,
    Health,
    Metrics,
    Sandboxes,
)

logger = logging.getLogger(__name__)


class Sandbox:
    """
    Main entrypoint for the Open Sandbox SDK providing secure, isolated execution environments.

    This class provides a comprehensive interface for interacting with containerized sandbox
    environments, combining lifecycle management with high-level operations for file system
    access, command execution, and real-time monitoring.

    Key Features:

    - **Secure Isolation**: Complete Linux OS access in isolated containers
    - **File System Operations**: Create, read, update, delete files and directories
    - **Multi-language Execution**: Support for Python, Java, Bash, and other languages
    - **Real-time Command Execution**: Streaming output with timeout handling
    - **Resource Management**: CPU, memory, and storage constraints
    - **Lifecycle Management**: Create, pause, resume, terminate operations
    - **Health Monitoring**: Automatic readiness detection and status tracking

    Usage Example:

    ```python
    from opensandbox.models.sandboxes import SandboxImageSpec, SandboxImageAuth
    from opensandbox.models.execd import RunCommandOpts

    # Create with simple image (positional argument)
    sandbox = await Sandbox.create(
        "python:3.11",
        resource={"cpu": "1", "memory": "500Mi"},
        timeout=timedelta(minutes=30)
    )

    # Or with private registry auth
    sandbox = await Sandbox.create(
        SandboxImageSpec(
            "my-registry.com/my-image:latest",
            auth=SandboxImageAuth(username="user", password="pass")
        ),
    )

    # Use the sandbox
    await sandbox.files.write_file("script.py", "print('Hello World')")
    result = await sandbox.commands.run("python script.py")
    print(result.logs.stdout[0].text)  # Output: Hello World

    # Always clean up resources
    await sandbox.kill()
    await sandbox.close()
    ```
    """

    def __init__(
        self,
        sandbox_id: str,
        sandbox_service: Sandboxes,
        filesystem_service: Filesystem,
        command_service: Commands,
        health_service: Health,
        metrics_service: Metrics,
        egress_service: Egress,
        connection_config: ConnectionConfig,
        custom_health_check: Callable[["Sandbox"], Awaitable[bool]] | None = None,
    ) -> None:
        """
        Internal constructor for Sandbox. Use Sandbox.create() or Sandbox.connect() instead.
        """
        self.id = sandbox_id
        self._sandbox_service = sandbox_service
        self._filesystem_service = filesystem_service
        self._command_service = command_service
        self._health_service = health_service
        self._metrics_service = metrics_service
        self._egress_service = egress_service
        self._connection_config = connection_config
        self._custom_health_check = custom_health_check

    @property
    def files(self) -> Filesystem:
        """
        Provides access to file system operations within the sandbox.

        Allows writing, reading, listing, and deleting files and directories.
        """
        return self._filesystem_service

    @property
    def commands(self) -> Commands:
        """
        Provides access to command execution operations.

        Allows running shell commands, capturing output, and managing processes.
        """
        return self._command_service

    @property
    def metrics(self) -> Metrics:
        """
        Provides access to sandbox metrics and monitoring.

        Allows retrieving resource usage statistics (CPU, memory) and other performance metrics.
        """
        return self._metrics_service

    @property
    def connection_config(self) -> ConnectionConfig:
        """Provides access to the connection configuration (including shared transport)."""
        return self._connection_config

    async def get_info(self) -> SandboxInfo:
        """
        Get the current status of this sandbox.

        Returns:
            Current sandbox status including state and metadata

        Raises:
            SandboxException: if status cannot be retrieved
        """
        return await self._sandbox_service.get_sandbox_info(self.id)

    async def get_endpoint(self, port: int) -> SandboxEndpoint:
        """
        Get a specific network endpoint for this sandbox.

        Args:
            port: The port number to get the endpoint for

        Returns:
            Endpoint information including connection details

        Raises:
            SandboxException: if endpoint cannot be retrieved
        """
        return await self._sandbox_service.get_sandbox_endpoint(
            self.id, port, self.connection_config.use_server_proxy
        )

    async def get_metrics(self) -> SandboxMetrics:
        """
        Get the current resource usage metrics for this sandbox.

        Returns:
            Current sandbox metrics including CPU, memory, and I/O statistics

        Raises:
            SandboxException: if metrics cannot be retrieved
        """
        return await self._metrics_service.get_metrics(self.id)

    async def renew(self, timeout: timedelta) -> SandboxRenewResponse:
        """
        Renew the sandbox expiration time to delay automatic termination.

        The new expiration time will be set to the current time plus the provided duration.

        Args:
            timeout: Duration to add to the current time to set the new expiration

        Returns:
            Renew response including the new expiration time.

        Raises:
            SandboxException: if the operation fails
        """
        # Use timezone-aware UTC datetime to avoid cross-timezone ambiguity.
        new_expiration = datetime.now(timezone.utc) + timeout
        logger.info(
            f"Renewing sandbox {self.id} timeout, estimated expiration: {new_expiration}"
        )
        return await self._sandbox_service.renew_sandbox_expiration(self.id, new_expiration)

    async def get_egress_policy(self) -> NetworkPolicy:
        """
        Get current egress policy for this sandbox.
        """
        return await self._egress_service.get_policy()

    async def patch_egress_rules(self, rules: list[NetworkRule]) -> None:
        """
        Patch egress rules for this sandbox using sidecar merge semantics.

        Rules in this patch payload take priority over existing rules with the
        same target. Existing rules for other targets remain unchanged. Within a
        single patch payload, the first rule for a target wins.

        This operation does not replace the entire policy and does not change
        the current defaultAction.
        """
        await self._egress_service.patch_rules(rules)

    async def pause(self) -> None:
        """
        Pause the sandbox while preserving its state.

        The sandbox will transition to PAUSED state and can be resumed later.
        All running processes will be suspended.

        Raises:
            SandboxException: if pause operation fails
        """
        logger.info(f"Pausing sandbox: {self.id}")
        await self._sandbox_service.pause_sandbox(self.id)


    async def kill(self) -> None:
        """
        Send a termination signal to the remote sandbox instance.

        This is an irreversible operation that stops the sandbox immediately.

        Note: This method does NOT close the local resources. Use close() or
        async context manager to clean up local resources.

        Raises:
            SandboxException: if termination fails
        """
        logger.info(f"Killing sandbox: {self.id}")
        await self._sandbox_service.kill_sandbox(self.id)

    async def close(self) -> None:
        """
        Close local resources associated with this sandbox.

        This method closes HTTP client resources and other local resources.
        It does NOT terminate the remote sandbox instance. Call kill() first
        if you want to terminate the remote sandbox.

        Note: This method logs errors but does not raise exceptions to avoid
        issues in context manager cleanup.
        """
        try:
            # Close transport only when SDK owns it (default transport).
            await self._connection_config.close_transport_if_owned()
            logger.debug(f"Closed resources for sandbox {self.id}")
        except Exception as e:
            logger.warning(
                f"Error closing resources for sandbox {self.id}: {e}",
                exc_info=True
            )

    async def is_healthy(self) -> bool:
        """
        Check if the sandbox is healthy and responsive.

        Returns:
            True if sandbox is healthy, False otherwise
        """
        if self._custom_health_check:
            return await self._custom_health_check(self)
        return await self._ping()

    async def _ping(self) -> bool:
        """Check if the sandbox is alive."""
        try:
            return await self._health_service.ping(self.id)
        except Exception:
            return False

    async def check_ready(
        self,
        timeout: timedelta,
        polling_interval: timedelta,
    ) -> None:
        """
        Wait for the sandbox to pass health checks with polling.

        Args:
            timeout: Maximum time to wait for health check to pass
            polling_interval: Time between health check attempts

        Raises:
            SandboxReadyTimeoutException: if health check doesn't pass within timeout
            SandboxException: if health check fails
        """
        logger.info(
            f"Waiting for sandbox {self.id} to pass health check (timeout: {timeout.total_seconds()}s)"
        )

        deadline = time.time() + timeout.total_seconds()
        attempt = 0
        last_exception: Exception | None = None

        while time.time() < deadline:
            attempt += 1
            logger.debug(f"Health check attempt #{attempt} for sandbox {self.id}")

            try:
                is_healthy = await self.is_healthy()
                if is_healthy:
                    logger.info(
                        f"Sandbox {self.id} passed health check after {attempt} attempts"
                    )
                    return
                last_exception = None
                logger.debug(f"Health check attempt #{attempt} returned false")
            except Exception as e:
                last_exception = e
                is_healthy = False
                logger.debug(
                    f"Health check attempt #{attempt} failed with exception: {e}"
                )

            if not is_healthy:
                await asyncio.sleep(polling_interval.total_seconds())

        error_detail = (
            f"Last error: {last_exception}"
            if last_exception
            else "Health check returned false continuously"
        )
        connection_detail = (
            f"ConnectionConfig(domain={self.connection_config.get_domain()}, "
            f"use_server_proxy={self.connection_config.use_server_proxy})"
        )
        if self.connection_config.use_server_proxy:
            hint = (
                "Hint: server proxy mode is enabled. Check server-to-sandbox connectivity "
                "and server API key/auth configuration."
            )
        else:
            hint = (
                "Hint: direct sandbox endpoint access is enabled. If the SDK cannot directly "
                "reach sandbox network/ports, set ConnectionConfig(use_server_proxy=True). "
                "For Docker bridge deployments where server runs in a container, also configure "
                "server [docker].host_ip to a host-reachable address."
            )

        final_message = (
            f"Sandbox health check timed out after {timeout.total_seconds()}s "
            f"({attempt} attempts). {error_detail}. {connection_detail}. {hint}"
        )

        logger.error(final_message)
        raise SandboxReadyTimeoutException(final_message)

    @classmethod
    async def create(
        cls,
        image: SandboxImageSpec | str,
        *,
        timeout: timedelta | None = timedelta(minutes=10),
        ready_timeout: timedelta = timedelta(seconds=30),
        env: dict[str, str] | None = None,
        metadata: dict[str, str] | None = None,
        resource: dict[str, str] | None = None,
        network_policy: NetworkPolicy | None = None,
        extensions: dict[str, str] | None = None,
        entrypoint: list[str] | None = None,
        volumes: list[Volume] | None = None,
        connection_config: ConnectionConfig | None = None,
        health_check: Callable[["Sandbox"], Awaitable[bool]] | None = None,
        health_check_polling_interval: timedelta = timedelta(milliseconds=200),
        skip_health_check: bool = False,
    ) -> "Sandbox":
        """
        Create a new sandbox instance with the specified configuration.

        Args:
            image: Container image specification including image reference and optional auth
            timeout: Maximum sandbox lifetime. Pass None to require explicit cleanup.
            ready_timeout: Maximum time to wait for sandbox to become ready
            env: Environment variables for the sandbox
            metadata: Custom metadata for the sandbox
            resource: Resource limits (CPU, memory, etc.)
            network_policy: Optional outbound network policy (egress).
            extensions: Opaque extension parameters passed through to the server as-is.
                Prefer namespaced keys (e.g. ``storage.id``).
            entrypoint: Command to run as entrypoint
            volumes: Optional list of volume mounts for persistent storage.
                Each volume specifies a backend (host path or PVC) and mount configuration.
            connection_config: Connection configuration
            health_check: Custom async health check function
            health_check_polling_interval: Time between health check attempts
            skip_health_check: If True, do NOT wait for sandbox readiness/health; returned instance may not be ready yet.

        Returns:
            Fully configured and ready Sandbox instance

        Raises:
            SandboxException: if sandbox creation or initialization fails
        """
        config = (connection_config or ConnectionConfig()).with_transport_if_missing()
        entrypoint = entrypoint or ["tail", "-f", "/dev/null"]
        env = env or {}
        metadata = metadata or {}
        resource = resource or {"cpu": "1", "memory": "2Gi"}
        extensions = extensions or {}

        if isinstance(image, str):
            image = SandboxImageSpec(image=image)

        timeout_log = "manual-cleanup" if timeout is None else f"{timeout.total_seconds()}s"
        logger.info(
            "Creating sandbox with image: %s (timeout: %s)",
            image.image,
            timeout_log,
        )
        factory = AdapterFactory(config)
        sandbox_id: str | None = None
        sandbox_service: Sandboxes | None = None

        try:
            sandbox_service = factory.create_sandbox_service()
            response = await sandbox_service.create_sandbox(
                image,
                entrypoint,
                env,
                metadata,
                timeout,
                resource,
                network_policy,
                extensions,
                volumes,
            )
            sandbox_id = response.id

            execd_endpoint = await sandbox_service.get_sandbox_endpoint(
                response.id, DEFAULT_EXECD_PORT, config.use_server_proxy
            )
            egress_endpoint = await sandbox_service.get_sandbox_endpoint(
                response.id, DEFAULT_EGRESS_PORT, config.use_server_proxy
            )

            sandbox = cls(
                sandbox_id=response.id,
                sandbox_service=sandbox_service,
                filesystem_service=factory.create_filesystem_service(execd_endpoint),
                command_service=factory.create_command_service(execd_endpoint),
                health_service=factory.create_health_service(execd_endpoint),
                metrics_service=factory.create_metrics_service(execd_endpoint),
                egress_service=factory.create_egress_service(egress_endpoint),
                connection_config=config,
                custom_health_check=health_check,
            )

            if not skip_health_check:
                await sandbox.check_ready(ready_timeout, health_check_polling_interval)
                logger.info("Sandbox %s is ready", sandbox.id)
            else:
                logger.info(
                    "Sandbox %s created (skip_health_check=true, sandbox may not be ready yet)",
                    sandbox.id,
                )

            return sandbox
        except Exception as e:
            if sandbox_id and sandbox_service:
                try:
                    logger.warning(
                        "Sandbox creation failed during initialization. Attempting to terminate zombie sandbox: %s",
                        sandbox_id,
                    )
                    await sandbox_service.kill_sandbox(sandbox_id)
                except Exception as cleanup_ex:
                    logger.error(
                        "Failed to clean up sandbox %s after creation failure",
                        sandbox_id,
                        exc_info=cleanup_ex,
                    )

            await config.close_transport_if_owned()
            if isinstance(e, SandboxException):
                raise
            logger.error("Unexpected exception during sandbox creation", exc_info=e)
            raise SandboxInternalException(
                f"Internal exception when creating sandbox: {e}"
            ) from e

    @classmethod
    async def connect(
        cls,
        sandbox_id: str,
        connection_config: ConnectionConfig | None = None,
        health_check: Callable[["Sandbox"], Awaitable[bool]] | None = None,
        connect_timeout: timedelta = timedelta(seconds=30),
        health_check_polling_interval: timedelta = timedelta(milliseconds=200),
        skip_health_check: bool = False,
    ) -> "Sandbox":
        """
        Connect to an existing sandbox instance by ID.

        Args:
            sandbox_id: ID of the existing sandbox
            connection_config: Connection configuration
            health_check: Custom async health check function
            connect_timeout: Max time to wait for sandbox readiness/health after connecting.
            health_check_polling_interval: Polling interval used while waiting for readiness/health.
            skip_health_check: If True, do NOT wait for readiness/health; returned instance may not be ready yet.

        Returns:
            Connected Sandbox instance

        Raises:
            InvalidArgumentException: if required configuration is missing
            SandboxException: if sandbox connection fails
        """
        if not sandbox_id:
            raise InvalidArgumentException("Sandbox ID must be specified")
        # Accept any string identifier.
        sandbox_id = str(sandbox_id)

        config = (connection_config or ConnectionConfig()).with_transport_if_missing()

        logger.info(f"Connecting to sandbox: {sandbox_id}")
        factory = AdapterFactory(config)

        try:
            sandbox_service = factory.create_sandbox_service()
            execd_endpoint = await sandbox_service.get_sandbox_endpoint(
                sandbox_id, DEFAULT_EXECD_PORT, config.use_server_proxy
            )
            egress_endpoint = await sandbox_service.get_sandbox_endpoint(
                sandbox_id, DEFAULT_EGRESS_PORT, config.use_server_proxy
            )

            sandbox = cls(
                sandbox_id=sandbox_id,
                sandbox_service=sandbox_service,
                filesystem_service=factory.create_filesystem_service(execd_endpoint),
                command_service=factory.create_command_service(execd_endpoint),
                health_service=factory.create_health_service(execd_endpoint),
                metrics_service=factory.create_metrics_service(execd_endpoint),
                egress_service=factory.create_egress_service(egress_endpoint),
                connection_config=config,
                custom_health_check=health_check,
            )

            if not skip_health_check:
                await sandbox.check_ready(connect_timeout, health_check_polling_interval)
            else:
                logger.info(
                    "Connected to sandbox %s (skip_health_check=true, sandbox may not be ready yet)",
                    sandbox_id,
                )

            logger.info("Connected to sandbox %s", sandbox_id)
            return sandbox
        except Exception as e:
            await config.close_transport_if_owned()
            if isinstance(e, SandboxException):
                raise
            logger.error("Unexpected exception during sandbox connection", exc_info=e)
            raise SandboxInternalException(f"Failed to connect to sandbox: {e}") from e

    @classmethod
    async def resume(
            cls,
            sandbox_id: str,
            connection_config: ConnectionConfig | None = None,
            health_check: Callable[["Sandbox"], Awaitable[bool]] | None = None,
            resume_timeout: timedelta = timedelta(seconds=30),
            health_check_polling_interval: timedelta = timedelta(milliseconds=200),
            skip_health_check: bool = False,
    ) -> "Sandbox":
        """
        Resume a paused sandbox by ID and return a new, usable Sandbox instance.

        This method performs the server-side resume operation, then re-resolves the execd endpoint
        (which may change across pause/resume on some backends), rebuilds service adapters, and
        optionally waits for readiness/health.

        Args:
            sandbox_id: ID of the paused sandbox to resume.
            connection_config: Connection configuration (shared transport, headers, timeouts).
            health_check: Optional custom async health check function (falls back to ping).
            resume_timeout: Max time to wait for sandbox readiness/health after resuming.
            health_check_polling_interval: Polling interval used while waiting for readiness/health.
            skip_health_check: If True, do NOT wait for readiness/health; returned instance may not be ready yet.
        """
        if not sandbox_id:
            raise InvalidArgumentException("Sandbox ID must be specified")
        # Accept any string identifier.
        sandbox_id = str(sandbox_id)

        config = (connection_config or ConnectionConfig()).with_transport_if_missing()

        logger.info("Resuming sandbox: %s", sandbox_id)
        factory = AdapterFactory(config)

        try:
            sandbox_service = factory.create_sandbox_service()
            await sandbox_service.resume_sandbox(sandbox_id)

            execd_endpoint = await sandbox_service.get_sandbox_endpoint(
                sandbox_id, DEFAULT_EXECD_PORT, config.use_server_proxy
            )
            egress_endpoint = await sandbox_service.get_sandbox_endpoint(
                sandbox_id, DEFAULT_EGRESS_PORT, config.use_server_proxy
            )

            sandbox = cls(
                sandbox_id=sandbox_id,
                sandbox_service=sandbox_service,
                filesystem_service=factory.create_filesystem_service(execd_endpoint),
                command_service=factory.create_command_service(execd_endpoint),
                health_service=factory.create_health_service(execd_endpoint),
                metrics_service=factory.create_metrics_service(execd_endpoint),
                egress_service=factory.create_egress_service(egress_endpoint),
                connection_config=config,
                custom_health_check=health_check,
            )

            if not skip_health_check:
                await sandbox.check_ready(resume_timeout, health_check_polling_interval)
            else:
                logger.info(
                    "Resumed sandbox %s (skip_health_check=true, sandbox may not be ready yet)",
                    sandbox_id,
                )

            return sandbox
        except Exception as e:
            await config.close_transport_if_owned()
            if isinstance(e, SandboxException):
                raise
            logger.error("Unexpected exception during sandbox resume", exc_info=e)
            raise SandboxInternalException(f"Failed to resume sandbox: {e}") from e

    async def __aenter__(self) -> "Sandbox":
        """Async context manager entry."""
        return self

    async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
        """Async context manager exit."""
        await self.close()


================================================
FILE: sdks/sandbox/python/src/opensandbox/services/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
OpenSandbox service interfaces.

Protocol definitions for sandbox services.
"""

from opensandbox.services.command import Commands
from opensandbox.services.egress import Egress
from opensandbox.services.filesystem import Filesystem
from opensandbox.services.health import Health
from opensandbox.services.metrics import Metrics
from opensandbox.services.sandbox import Sandboxes

__all__ = [
    "Commands",
    "Egress",
    "Filesystem",
    "Health",
    "Metrics",
    "Sandboxes",
]


================================================
FILE: sdks/sandbox/python/src/opensandbox/services/command.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Command service interface.

Protocol for sandbox command execution operations.
"""

from typing import Protocol

from opensandbox.models.execd import (
    CommandLogs,
    CommandStatus,
    Execution,
    ExecutionHandlers,
    RunCommandOpts,
)


class Commands(Protocol):
    """
    Command execution service for sandbox environments.

    This service provides secure command execution capabilities within sandbox
    environments, with support for streaming output, timeout handling, and
    session management.
    """

    async def run(
        self,
        command: str,
        *,
        opts: RunCommandOpts | None = None,
        handlers: ExecutionHandlers | None = None,
    ) -> Execution:
        """
        Execute a shell command in the sandbox environment.

        The command can be executed in foreground (streaming) or background mode
        based on the request configuration.

        Args:
            command: Shell command text to execute
            opts: Command execution options (e.g. background, working_directory)
            handlers: Optional async handlers for streaming events (stdout/stderr/result/init/complete/error)

        Returns:
            An Execution handle representing the running command instance

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def interrupt(self, execution_id: str) -> None:
        """
        Interrupt and terminate a running command execution.

        This sends a termination signal (usually SIGTERM/SIGKILL) to the process
        associated with the given execution ID.

        Args:
            execution_id: Unique identifier of the execution to interrupt

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def get_command_status(self, execution_id: str) -> CommandStatus:
        """
        Get the current running status for a command.

        Args:
            execution_id: Unique identifier of the execution to query

        Returns:
            CommandStatus describing running state and exit code if available

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def get_background_command_logs(
        self, execution_id: str, cursor: int | None = None
    ) -> CommandLogs:
        """
        Get background command logs (non-streamed).

        Args:
            execution_id: Unique identifier of the execution to query
            cursor: Optional line cursor for incremental reads

        Returns:
            CommandLogs containing raw output and latest cursor

        Raises:
            SandboxException: if the operation fails
        """
        ...


================================================
FILE: sdks/sandbox/python/src/opensandbox/services/egress.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Egress service interface.

Protocol for direct egress sidecar operations.
"""

from typing import Protocol

from opensandbox.models.sandboxes import NetworkPolicy, NetworkRule


class Egress(Protocol):
    """Direct runtime egress policy service."""

    async def get_policy(self) -> NetworkPolicy:
        """
        Retrieve the current egress policy from the sidecar.

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def patch_rules(self, rules: list[NetworkRule]) -> None:
        """
        Patch egress rules via the sidecar policy API.

        Merge semantics:
        - Incoming rules take priority over existing rules with the same target.
        - Existing rules for other targets remain in place.
        - Within one patch payload, the first rule for a target wins.
        - The current defaultAction is preserved.

        Raises:
            SandboxException: if the operation fails
        """
        ...


================================================
FILE: sdks/sandbox/python/src/opensandbox/services/filesystem.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Filesystem service interface.

Protocol for sandbox filesystem operations.
"""
from collections.abc import AsyncIterator
from io import IOBase
from typing import Protocol

from opensandbox.models.filesystem import (
    ContentReplaceEntry,
    EntryInfo,
    MoveEntry,
    SearchEntry,
    SetPermissionEntry,
    WriteEntry,
)


class Filesystem(Protocol):
    """
    Filesystem operations service for sandbox environments.

    This service provides comprehensive file system management capabilities
    within sandbox environments, including file operations, directory management,
    and metadata handling with proper security controls.
    """

    async def read_file(
        self,
        path: str,
        *,
        encoding: str = "utf-8",
        range_header: str | None = None,
    ) -> str:
        """
        Read the content of a file as a string with specified encoding.

        Args:
            path: The absolute or relative path to the file to read
            encoding: Character encoding for the file content (default: UTF-8)
            range_header: HTTP byte range to read (e.g., "bytes=0-1023")

        Returns:
            The file content as a string

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def read_bytes(
        self,
        path: str,
        *,
        range_header: str | None = None,
    ) -> bytes:
        """
        Read the content of a file as bytes.

        Args:
            path: The absolute or relative path to the file to read
            range_header: HTTP byte range to read (e.g., "bytes=0-1023")

        Returns:
            The file content as bytes

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def read_bytes_stream(
        self,
        path: str,
        *,
        chunk_size: int = 64 * 1024,
        range_header: str | None = None,
    ) -> AsyncIterator[bytes]:
        """
        Stream file content as bytes chunks (read_* naming).
        """
        ...

    async def write_files(self, entries: list[WriteEntry]) -> None:
        """
        Write content to files based on the provided write entries.

        Args:
            entries: List of WriteEntry objects specifying files to write and their content

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def write_file(
        self,
        path: str,
        data: str | bytes | IOBase,
        *,
        encoding: str = "utf-8",
        mode: int = 755,
        owner: str | None = None,
        group: str | None = None,
    ) -> None:
        """
        Write content to a single file (convenience method).

        Args:
            path: Destination file path
            data: Content to write
            encoding: Character encoding
            mode: Unix file permissions
            owner: Owner username
            group: Group name

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def create_directories(self, entries: list[WriteEntry]) -> None:
        """
        Create directories based on the provided entries.

        Args:
            entries: List of WriteEntry objects specifying directories to create

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def delete_files(self, paths: list[str]) -> None:
        """
        Delete the specified files.

        Args:
            paths: List of file paths to delete

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def delete_directories(self, paths: list[str]) -> None:
        """
        Delete the specified directories.

        Args:
            paths: List of directory paths to delete

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def move_files(self, entries: list[MoveEntry]) -> None:
        """
        Move files from source to destination paths.

        Args:
            entries: List of MoveEntry objects specifying source and destination paths

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def set_permissions(self, entries: list[SetPermissionEntry]) -> None:
        """
        Set file system permissions for the specified entries.

        Args:
            entries: List of SetPermissionEntry objects specifying files and their new permissions

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def replace_contents(self, entries: list[ContentReplaceEntry]) -> None:
        """
        Replace content in files based on search and replace patterns.

        Args:
            entries: List of ContentReplaceEntry objects specifying replacement operations

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def search(self, entry: SearchEntry) -> list[EntryInfo]:
        """
        Search for files and directories based on the specified criteria.

        Args:
            entry: SearchEntry object containing search parameters and criteria

        Returns:
            List of EntryInfo objects containing metadata for matching files/directories

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def get_file_info(self, paths: list[str]) -> dict[str, EntryInfo]:
        """
        Retrieve file information for the specified paths.

        Args:
            paths: List of file/directory paths to get information for

        Returns:
            Map where keys are file paths and values are EntryInfo objects containing file metadata

        Raises:
            SandboxException: if the operation fails
        """
        ...


================================================
FILE: sdks/sandbox/python/src/opensandbox/services/health.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Health service interface.

Protocol for sandbox health monitoring operations.
"""

from typing import Protocol


class Health(Protocol):
    """
    Health monitoring service for sandbox environments.

    This service provides health checking and monitoring capabilities
    for sandbox instances.
    """

    async def ping(self, sandbox_id: str) -> bool:
        """
        Check if a sandbox is alive and responsive.

        Args:
            sandbox_id: Unique identifier of the sandbox

        Returns:
            True if the sandbox is healthy, False otherwise

        Raises:
            SandboxException: if the operation fails
        """
        ...


================================================
FILE: sdks/sandbox/python/src/opensandbox/services/metrics.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Metrics service interface.

Protocol for sandbox metrics and monitoring operations.
"""

from typing import Protocol

from opensandbox.models.sandboxes import SandboxMetrics


class Metrics(Protocol):
    """
    Metrics and monitoring service for sandbox environments.

    This service provides resource usage monitoring and performance
    metrics for sandbox instances.
    """

    async def get_metrics(self, sandbox_id: str) -> SandboxMetrics:
        """
        Retrieve real-time metrics for a sandbox.

        Args:
            sandbox_id: Unique identifier of the sandbox

        Returns:
            Current sandbox metrics including CPU, memory, and I/O statistics

        Raises:
            SandboxException: if the operation fails
        """
        ...


================================================
FILE: sdks/sandbox/python/src/opensandbox/services/sandbox.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Sandbox service interface.

Protocol for sandbox lifecycle management operations.
"""

from datetime import datetime, timedelta
from typing import Protocol

from opensandbox.models.sandboxes import (
    NetworkPolicy,
    PagedSandboxInfos,
    SandboxCreateResponse,
    SandboxEndpoint,
    SandboxFilter,
    SandboxImageSpec,
    SandboxInfo,
    SandboxRenewResponse,
    Volume,
)


class Sandboxes(Protocol):
    """
    Core sandbox lifecycle management service.

    This service provides a clean abstraction over sandbox creation, management,
    and termination operations, completely isolating business logic from API implementation details.
    """

    async def create_sandbox(
        self,
        spec: SandboxImageSpec,
        entrypoint: list[str],
        env: dict[str, str],
        metadata: dict[str, str],
        timeout: timedelta | None,
        resource: dict[str, str],
        network_policy: NetworkPolicy | None,
        extensions: dict[str, str],
        volumes: list[Volume] | None,
    ) -> SandboxCreateResponse:
        """
        Create a new sandbox with the specified configuration.

        Args:
            spec: Container image specification for provisioning the sandbox.
            entrypoint: Command to run as the sandbox's main process.
            env: Environment variables injected into the sandbox runtime.
            metadata: User-defined metadata used for management and filtering.
            timeout: Sandbox lifetime. Pass None to create a sandbox that requires explicit cleanup.
            resource: Runtime resource limits (e.g. cpu/memory). Exact semantics are server-defined.
            network_policy: Optional outbound network policy (egress).
            extensions: Opaque extension parameters passed through to the server as-is.
                Prefer namespaced keys (e.g. ``storage.id``).
            volumes: Optional list of volume mounts for persistent storage.

        Returns:
            Sandbox create response

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def get_sandbox_info(self, sandbox_id: str) -> SandboxInfo:
        """
        Retrieve information about an existing sandbox.

        Args:
            sandbox_id: Unique identifier of the sandbox

        Returns:
            Current sandbox information

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def list_sandboxes(self, filter: SandboxFilter) -> PagedSandboxInfos:
        """
        List sandboxes with optional filtering.

        Args:
            filter: Optional filter criteria

        Returns:
            List of sandbox information matching the filter

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def get_sandbox_endpoint(
        self, sandbox_id: str, port: int, use_server_proxy: bool = False
    ) -> SandboxEndpoint:
        """
        Get sandbox endpoint.

        Args:
            sandbox_id: Sandbox ID
            port: Endpoint port number
            use_server_proxy: Whether to use server proxy for endpoint

        Returns:
            Target sandbox endpoint

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def pause_sandbox(self, sandbox_id: str) -> None:
        """
        Pause a running sandbox, preserving its state.

        Args:
            sandbox_id: Unique identifier of the sandbox

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def resume_sandbox(self, sandbox_id: str) -> None:
        """
        Resume a paused sandbox.

        Args:
            sandbox_id: Unique identifier of the sandbox

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def renew_sandbox_expiration(
        self, sandbox_id: str, new_expiration_time: datetime
    ) -> SandboxRenewResponse:
        """
        Renew the expiration time of a sandbox.

        Args:
            sandbox_id: Unique identifier of the sandbox
            new_expiration_time: New expiration timestamp

        Returns:
            Renew response including the new expiration time.

        Raises:
            SandboxException: if the operation fails
        """
        ...

    async def kill_sandbox(self, sandbox_id: str) -> None:
        """
        Terminate a sandbox and release all associated resources.

        Args:
            sandbox_id: Unique identifier of the sandbox

        Raises:
            SandboxException: if the operation fails
        """
        ...


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous OpenSandbox SDK entrypoints.
"""

from opensandbox.sync.manager import SandboxManagerSync
from opensandbox.sync.sandbox import SandboxSync

__all__ = ["SandboxSync", "SandboxManagerSync"]


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/adapters/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous adapter implementations.
"""

from opensandbox.sync.adapters.command_adapter import CommandsAdapterSync
from opensandbox.sync.adapters.egress_adapter import EgressAdapterSync
from opensandbox.sync.adapters.factory import AdapterFactorySync
from opensandbox.sync.adapters.filesystem_adapter import FilesystemAdapterSync
from opensandbox.sync.adapters.health_adapter import HealthAdapterSync
from opensandbox.sync.adapters.metrics_adapter import MetricsAdapterSync
from opensandbox.sync.adapters.sandboxes_adapter import SandboxesAdapterSync

__all__ = [
    "CommandsAdapterSync",
    "EgressAdapterSync",
    "FilesystemAdapterSync",
    "HealthAdapterSync",
    "MetricsAdapterSync",
    "SandboxesAdapterSync",
    "AdapterFactorySync",
]


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/adapters/command_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous command adapter implementation (including SSE streaming).
"""

import json
import logging

import httpx

from opensandbox.adapters.converter.event_node import EventNode
from opensandbox.adapters.converter.exception_converter import (
    ExceptionConverter,
)
from opensandbox.adapters.converter.execution_converter import (
    ExecutionConverter,
)
from opensandbox.adapters.converter.response_handler import (
    extract_request_id,
    handle_api_error,
)
from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.exceptions import InvalidArgumentException, SandboxApiException
from opensandbox.models.execd import (
    CommandLogs,
    CommandStatus,
    Execution,
    RunCommandOpts,
)
from opensandbox.models.execd_sync import ExecutionHandlersSync
from opensandbox.models.sandboxes import SandboxEndpoint
from opensandbox.sync.adapters.converter.execution_event_dispatcher import (
    ExecutionEventDispatcherSync,
)
from opensandbox.sync.services.command import CommandsSync

logger = logging.getLogger(__name__)


class CommandsAdapterSync(CommandsSync):
    """
    Synchronous implementation of :class:`~opensandbox.sync.services.command.CommandsSync`.

    This adapter wraps openapi-python-client generated clients for simple operations and
    uses direct ``httpx`` streaming for SSE (Server-Sent Events) command execution output.
    """

    RUN_COMMAND_PATH = "/command"

    def __init__(self, connection_config: ConnectionConfigSync, execd_endpoint: SandboxEndpoint) -> None:
        """
        Initialize the command adapter (sync).

        Args:
            connection_config: Connection configuration (shared transport, headers, timeouts)
            execd_endpoint: Endpoint for execd service
        """
        self.connection_config = connection_config
        self.execd_endpoint = execd_endpoint

        from opensandbox.api.execd import Client

        base_url = f"{self.connection_config.protocol}://{self.execd_endpoint.endpoint}"
        timeout_seconds = self.connection_config.request_timeout.total_seconds()
        timeout = httpx.Timeout(timeout_seconds)

        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
            **self.execd_endpoint.headers,
        }

        self._client = Client(base_url=base_url, timeout=timeout)

        self._httpx_client = httpx.Client(
            base_url=base_url,
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client.set_httpx_client(self._httpx_client)

        # SSE client (read timeout disabled); endpoint headers already in headers
        sse_headers = {
            **headers,
            "Accept": "text/event-stream",
            "Cache-Control": "no-cache",
        }
        self._sse_client = httpx.Client(
            headers=sse_headers,
            timeout=httpx.Timeout(
                connect=timeout_seconds,
                read=None,
                write=timeout_seconds,
                pool=None,
            ),
            transport=self.connection_config.transport,
        )

    def _get_execd_url(self, path: str) -> str:
        """Build URL for execd endpoint."""
        return f"{self.connection_config.protocol}://{self.execd_endpoint.endpoint}{path}"

    def run(
        self,
        command: str,
        *,
        opts: RunCommandOpts | None = None,
        handlers: ExecutionHandlersSync | None = None,
    ) -> Execution:
        if not command.strip():
            raise InvalidArgumentException("Command cannot be empty")

        try:
            opts = opts or RunCommandOpts()
            json_body = ExecutionConverter.to_api_run_command_json(command, opts)
            url = self._get_execd_url(self.RUN_COMMAND_PATH)

            execution = Execution(id=None, execution_count=None, result=[], error=None)
            dispatcher = ExecutionEventDispatcherSync(execution, handlers)

            with self._sse_client.stream("POST", url, json=json_body) as response:
                if response.status_code != 200:
                    response.read()
                    raise SandboxApiException(
                        message=f"Failed to run command. Status code: {response.status_code}",
                        status_code=response.status_code,
                        request_id=extract_request_id(response.headers),
                    )

                for line in response.iter_lines():
                    if not line or not line.strip():
                        continue
                    data = line
                    if data.startswith("data:"):
                        data = data[5:].strip()
                    try:
                        event_dict = json.loads(data)
                        event_node = EventNode(**event_dict)
                        dispatcher.dispatch(event_node)
                    except Exception as e:
                        logger.error("Failed to parse SSE line: %s", line, exc_info=e)

            return execution

        except Exception as e:
            logger.error("Failed to run command (length: %s)", len(command), exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def interrupt(self, execution_id: str) -> None:
        """
        Interrupt a running command execution.

        Args:
            execution_id: Execution id returned by execd for the running command
        """
        try:
            from opensandbox.api.execd.api.command import interrupt_command

            response_obj = interrupt_command.sync_detailed(client=self._client, id=execution_id)
            handle_api_error(response_obj, "Interrupt command")
        except Exception as e:
            logger.error("Failed to interrupt command", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def get_command_status(self, execution_id: str) -> CommandStatus:
        """Get the current running status for a command."""
        try:
            from opensandbox.adapters.converter.command_model_converter import (
                to_command_status,
            )
            from opensandbox.adapters.converter.response_handler import require_parsed
            from opensandbox.api.execd.api.command import get_command_status
            from opensandbox.api.execd.models import CommandStatusResponse

            response_obj = get_command_status.sync_detailed(
                client=self._client,
                id=execution_id,
            )
            handle_api_error(response_obj, "Get command status")
            parsed = require_parsed(response_obj, CommandStatusResponse, "Get command status")
            return to_command_status(parsed)
        except Exception as e:
            logger.error("Failed to get command status", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def get_background_command_logs(
        self, execution_id: str, cursor: int | None = None
    ) -> CommandLogs:
        """Get background command logs (non-streamed)."""
        try:
            from opensandbox.adapters.converter.response_handler import require_parsed
            from opensandbox.api.execd.api.command import get_background_command_logs
            from opensandbox.api.execd.types import UNSET

            response_obj = get_background_command_logs.sync_detailed(
                client=self._client,
                id=execution_id,
                cursor=cursor if cursor is not None else UNSET,
            )
            handle_api_error(response_obj, "Get command logs")
            content = require_parsed(response_obj, str, "Get command logs")
            cursor_header = response_obj.headers.get("EXECD-COMMANDS-TAIL-CURSOR")
            next_cursor = None
            if cursor_header:
                try:
                    next_cursor = int(cursor_header)
                except ValueError:
                    next_cursor = None
            return CommandLogs(content=content, cursor=next_cursor)
        except Exception as e:
            logger.error("Failed to get command logs", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/adapters/converter/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from opensandbox.sync.adapters.converter.execution_event_dispatcher import (
    ExecutionEventDispatcherSync,
)

__all__ = ["ExecutionEventDispatcherSync"]


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/adapters/converter/execution_event_dispatcher.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous dispatcher for processing execution events.
"""

from opensandbox.adapters.converter.event_node import EventNode
from opensandbox.models.execd import (
    Execution,
    ExecutionComplete,
    ExecutionError,
    ExecutionInit,
    ExecutionResult,
    OutputMessage,
)
from opensandbox.models.execd_sync import ExecutionHandlersSync


class ExecutionEventDispatcherSync:
    """
    Dispatches events from the server stream to the Execution object and sync handlers.
    """

    def __init__(self, execution: Execution, handlers: ExecutionHandlersSync | None = None) -> None:
        self.execution = execution
        self.handlers = handlers

    def dispatch(self, event_node: EventNode) -> None:
        event_type = event_node.type
        timestamp = event_node.timestamp

        if event_type == "stdout":
            self._handle_stdout(event_node, timestamp)
        elif event_type == "stderr":
            self._handle_stderr(event_node, timestamp)
        elif event_type == "result":
            self._handle_result(event_node, timestamp)
        elif event_type == "error":
            self._handle_error(event_node, timestamp)
        elif event_type == "execution_complete":
            self._handle_execution_complete(event_node, timestamp)
        elif event_type == "init":
            self._handle_init(event_node, timestamp)
        elif event_type == "execution_count":
            if event_node.execution_count is not None:
                self.execution.execution_count = event_node.execution_count

    def _handle_init(self, event_node: EventNode, timestamp: int) -> None:
        execution_id = event_node.text or ""
        init_event = ExecutionInit(id=execution_id, timestamp=timestamp)
        self.execution.id = init_event.id
        if self.handlers and self.handlers.on_init:
            self.handlers.on_init(init_event)

    def _handle_stdout(self, event_node: EventNode, timestamp: int) -> None:
        message = OutputMessage(text=event_node.text or "", timestamp=timestamp, is_error=False)
        self.execution.logs.add_stdout(message)
        if self.handlers and self.handlers.on_stdout:
            self.handlers.on_stdout(message)

    def _handle_stderr(self, event_node: EventNode, timestamp: int) -> None:
        message = OutputMessage(text=event_node.text or "", timestamp=timestamp, is_error=True)
        self.execution.logs.add_stderr(message)
        if self.handlers and self.handlers.on_stderr:
            self.handlers.on_stderr(message)

    def _handle_result(self, event_node: EventNode, timestamp: int) -> None:
        result_text = event_node.results.get_text() if event_node.results else ""
        result = ExecutionResult(text=result_text, timestamp=timestamp)
        self.execution.add_result(result)
        if self.handlers and self.handlers.on_result:
            self.handlers.on_result(result)

    def _handle_error(self, event_node: EventNode, timestamp: int) -> None:
        if not event_node.error:
            return
        error_data = event_node.error
        error = ExecutionError(
            name=error_data.name or "",
            value=error_data.value or "",
            timestamp=timestamp,
            traceback=error_data.traceback,
        )
        self.execution.error = error
        if self.handlers and self.handlers.on_error:
            self.handlers.on_error(error)

    def _handle_execution_complete(self, event_node: EventNode, timestamp: int) -> None:
        complete = ExecutionComplete(
            timestamp=timestamp,
            execution_time_in_millis=event_node.execution_time_in_millis or 0,
        )
        if self.handlers and self.handlers.on_execution_complete:
            self.handlers.on_execution_complete(complete)


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/adapters/egress_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous direct egress sidecar adapter implementation.
"""

import logging

import httpx

from opensandbox.adapters.converter.exception_converter import ExceptionConverter
from opensandbox.adapters.converter.response_handler import (
    handle_api_error,
    require_parsed,
)
from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.models.sandboxes import NetworkPolicy, NetworkRule, SandboxEndpoint
from opensandbox.sync.services.egress import EgressSync

logger = logging.getLogger(__name__)


class EgressAdapterSync(EgressSync):
    """Blocking direct egress sidecar adapter using the generated egress client."""

    def __init__(self, connection_config: ConnectionConfigSync, endpoint: SandboxEndpoint) -> None:
        self.connection_config = connection_config
        self.endpoint = endpoint
        from opensandbox.api.egress import Client

        base_url = f"{self.connection_config.protocol}://{self.endpoint.endpoint}"
        timeout_seconds = self.connection_config.request_timeout.total_seconds()
        timeout = httpx.Timeout(timeout_seconds)
        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
            **self.endpoint.headers,
        }

        self._client = Client(
            base_url=base_url,
            timeout=timeout,
        )
        self._httpx_client = httpx.Client(
            base_url=base_url,
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client.set_httpx_client(self._httpx_client)

    def get_policy(self) -> NetworkPolicy:
        try:
            from opensandbox.api.egress.api.policy import get_policy
            from opensandbox.api.egress.models.network_policy import (
                NetworkPolicy as ApiNetworkPolicy,
            )
            from opensandbox.api.egress.models.policy_status_response import (
                PolicyStatusResponse,
            )
            from opensandbox.api.egress.types import Unset

            response_obj = get_policy.sync_detailed(client=self._client)
            handle_api_error(response_obj, "Get egress policy")
            parsed = require_parsed(response_obj, PolicyStatusResponse, "Get egress policy")
            policy = parsed.policy
            if isinstance(policy, Unset):
                raise ValueError("Egress policy response missing policy payload")
            if not isinstance(policy, ApiNetworkPolicy):
                raise TypeError(f"Expected NetworkPolicy, got {type(policy).__name__}")
            return NetworkPolicy.model_validate(policy.to_dict())
        except Exception as e:
            logger.error("Failed to get egress policy from endpoint %s", self.endpoint.endpoint, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def patch_rules(self, rules: list[NetworkRule]) -> None:
        try:
            from opensandbox.api.egress.api.policy import patch_policy
            from opensandbox.api.egress.models.network_rule import (
                NetworkRule as ApiNetworkRule,
            )
            from opensandbox.api.egress.models.network_rule_action import (
                NetworkRuleAction,
            )

            response_obj = patch_policy.sync_detailed(
                client=self._client,
                body=[
                    ApiNetworkRule(
                        action=NetworkRuleAction(rule.action),
                        target=rule.target,
                    )
                    for rule in rules
                ],
            )
            handle_api_error(response_obj, "Patch egress rules")
        except Exception as e:
            logger.error("Failed to patch egress policy via endpoint %s", self.endpoint.endpoint, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/adapters/factory.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous service factory for creating sync adapter instances.
"""

from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.models.sandboxes import SandboxEndpoint
from opensandbox.sync.adapters.command_adapter import CommandsAdapterSync
from opensandbox.sync.adapters.egress_adapter import EgressAdapterSync
from opensandbox.sync.adapters.filesystem_adapter import FilesystemAdapterSync
from opensandbox.sync.adapters.health_adapter import HealthAdapterSync
from opensandbox.sync.adapters.metrics_adapter import MetricsAdapterSync
from opensandbox.sync.adapters.sandboxes_adapter import SandboxesAdapterSync
from opensandbox.sync.services import (
    CommandsSync,
    EgressSync,
    FilesystemSync,
    HealthSync,
    MetricsSync,
    SandboxesSync,
)


class AdapterFactorySync:
    def __init__(self, connection_config: ConnectionConfigSync) -> None:
        self.connection_config = connection_config

    def create_sandbox_service(self) -> SandboxesSync:
        return SandboxesAdapterSync(self.connection_config)

    def create_filesystem_service(self, endpoint: SandboxEndpoint) -> FilesystemSync:
        return FilesystemAdapterSync(self.connection_config, endpoint)

    def create_command_service(self, endpoint: SandboxEndpoint) -> CommandsSync:
        return CommandsAdapterSync(self.connection_config, endpoint)

    def create_egress_service(self, endpoint: SandboxEndpoint) -> EgressSync:
        return EgressAdapterSync(self.connection_config, endpoint)

    def create_health_service(self, endpoint: SandboxEndpoint) -> HealthSync:
        return HealthAdapterSync(self.connection_config, endpoint)

    def create_metrics_service(self, endpoint: SandboxEndpoint) -> MetricsSync:
        return MetricsAdapterSync(self.connection_config, endpoint)


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/adapters/filesystem_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous filesystem service adapter implementation.
"""

import json
import logging
from collections.abc import Iterator
from io import IOBase, TextIOBase
from typing import TypedDict
from urllib.parse import quote

import httpx

from opensandbox.adapters.converter.exception_converter import (
    ExceptionConverter,
)
from opensandbox.adapters.converter.filesystem_model_converter import (
    FilesystemModelConverter,
)
from opensandbox.adapters.converter.response_handler import (
    extract_request_id,
    handle_api_error,
)
from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.exceptions import InvalidArgumentException, SandboxApiException
from opensandbox.models.filesystem import (
    ContentReplaceEntry,
    EntryInfo,
    MoveEntry,
    SearchEntry,
    SetPermissionEntry,
    WriteEntry,
)
from opensandbox.models.sandboxes import SandboxEndpoint
from opensandbox.sync.services.filesystem import FilesystemSync

logger = logging.getLogger(__name__)

class _DownloadRequest(TypedDict):
    url: str
    params: dict[str, str] | None
    headers: dict[str, str]


class FilesystemAdapterSync(FilesystemSync):
    FILESYSTEM_UPLOAD_PATH = "/files/upload"
    FILESYSTEM_DOWNLOAD_PATH = "/files/download"

    def __init__(self, connection_config: ConnectionConfigSync, execd_endpoint: SandboxEndpoint) -> None:
        self.connection_config = connection_config
        self.execd_endpoint = execd_endpoint
        from opensandbox.api.execd import Client

        base_url = self._get_execd_base_url()
        timeout_seconds = self.connection_config.request_timeout.total_seconds()
        timeout = httpx.Timeout(timeout_seconds)
        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
            **self.execd_endpoint.headers,
        }

        self._httpx_client = httpx.Client(
            base_url=base_url,
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client = Client(base_url=base_url, timeout=timeout)
        self._client.set_httpx_client(self._httpx_client)

    def _get_execd_base_url(self) -> str:
        return f"{self.connection_config.protocol}://{self.execd_endpoint.endpoint}"

    def _get_execd_url(self, path: str) -> str:
        return f"{self.connection_config.protocol}://{self.execd_endpoint.endpoint}{path}"

    def _build_download_request(self, path: str, range_header: str | None = None) -> _DownloadRequest:
        encoded_path = quote(path, safe="/")
        url = f"{self._get_execd_url(self.FILESYSTEM_DOWNLOAD_PATH)}?path={encoded_path}"
        headers: dict[str, str] = {}
        if range_header:
            headers["Range"] = range_header
        return {"url": url, "params": None, "headers": headers}

    def read_file(
        self,
        path: str,
        *,
        encoding: str = "utf-8",
        range_header: str | None = None,
    ) -> str:
        content = self.read_bytes(path, range_header=range_header)
        return content.decode(encoding)

    def read_bytes(self, path: str, *, range_header: str | None = None) -> bytes:
        logger.debug("Reading file as bytes: %s", path)
        try:
            request_data = self._build_download_request(path, range_header)
            if request_data["params"] is None:
                response = self._httpx_client.get(
                    request_data["url"],
                    headers=request_data["headers"],
                )
            else:
                response = self._httpx_client.get(
                    request_data["url"],
                    headers=request_data["headers"],
                    params=request_data["params"],
                )
            response.raise_for_status()
            return response.content
        except Exception as e:
            logger.error("Failed to read file %s", path, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def read_bytes_stream(
        self, path: str, *, chunk_size: int = 64 * 1024, range_header: str | None = None
    ) -> Iterator[bytes]:
        logger.debug("Streaming file as bytes: %s (chunk_size=%s)", path, chunk_size)
        request_data = self._build_download_request(path, range_header)
        url = request_data["url"]
        params = request_data["params"]
        headers = request_data["headers"]

        if params is None:
            request = self._httpx_client.build_request("GET", url, headers=headers)
        else:
            request = self._httpx_client.build_request(
                "GET",
                url,
                headers=headers,
                params=params,
            )
        response = self._httpx_client.send(request, stream=True)

        if response.status_code >= 300:
            try:
                response.read()
            finally:
                response.close()
            raise SandboxApiException(
                f"Failed to stream file {path}: {response.status_code}",
                status_code=response.status_code,
                request_id=extract_request_id(response.headers),
            )

        def _iter() -> Iterator[bytes]:
            try:
                yield from response.iter_bytes(chunk_size=chunk_size)
            finally:
                response.close()

        return _iter()

    def write_files(self, entries: list[WriteEntry]) -> None:
        if not entries:
            return
        logger.debug("Writing %s files", len(entries))
        try:
            multipart_parts = []
            for entry in entries:
                if not entry.path:
                    raise InvalidArgumentException("File path cannot be null")
                if entry.data is None:
                    raise InvalidArgumentException("File data cannot be null")

                metadata = {
                    "path": entry.path,
                    "owner": entry.owner,
                    "group": entry.group,
                    "mode": entry.mode,
                }
                multipart_parts.append(("metadata", ("metadata", json.dumps(metadata), "application/json")))

                content: bytes | str | IOBase
                content_type: str
                if isinstance(entry.data, bytes):
                    content = entry.data
                    content_type = "application/octet-stream"
                elif isinstance(entry.data, str):
                    encoding = entry.encoding or "utf-8"
                    content = entry.data
                    content_type = f"text/plain; charset={encoding}"
                elif isinstance(entry.data, IOBase):
                    if isinstance(entry.data, TextIOBase):
                        raise InvalidArgumentException(
                            "File stream must be binary (opened with 'rb'). Text streams are not supported."
                        )
                    content = entry.data
                    content_type = "application/octet-stream"
                else:
                    raise InvalidArgumentException(f"Unsupported file data type: {type(entry.data)}")

                multipart_parts.append(("file", (entry.path, content, content_type)))

            url = self._get_execd_url(self.FILESYSTEM_UPLOAD_PATH)
            response = self._httpx_client.post(url, files=multipart_parts)
            response.raise_for_status()
        except Exception as e:
            logger.error("Failed to write %s files", len(entries), exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def write_file(
        self,
        path: str,
        data: str | bytes | IOBase,
        *,
        encoding: str = "utf-8",
        mode: int = 755,
        owner: str | None = None,
        group: str | None = None,
    ) -> None:
        entry = WriteEntry(path=path, data=data, mode=mode, owner=owner, group=group, encoding=encoding)
        self.write_files([entry])

    def create_directories(self, entries: list[WriteEntry]) -> None:
        try:
            from opensandbox.api.execd.api.filesystem import make_dirs

            response_obj = make_dirs.sync_detailed(
                client=self._client,
                body=FilesystemModelConverter.to_api_make_dirs_body(entries),
            )
            handle_api_error(response_obj, "Create directories")
        except Exception as e:
            logger.error("Failed to create directories", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def delete_files(self, paths: list[str]) -> None:
        try:
            from opensandbox.api.execd.api.filesystem import remove_files

            response_obj = remove_files.sync_detailed(client=self._client, path=paths)
            handle_api_error(response_obj, "Delete files")
        except Exception as e:
            logger.error("Failed to delete %s files", len(paths), exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def delete_directories(self, paths: list[str]) -> None:
        try:
            from opensandbox.api.execd.api.filesystem import remove_dirs

            response_obj = remove_dirs.sync_detailed(client=self._client, path=paths)
            handle_api_error(response_obj, "Delete directories")
        except Exception as e:
            logger.error("Failed to delete %s directories", len(paths), exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def move_files(self, entries: list[MoveEntry]) -> None:
        try:
            from opensandbox.api.execd.api.filesystem import rename_files

            rename_items = FilesystemModelConverter.to_api_rename_file_items(entries)
            response_obj = rename_files.sync_detailed(client=self._client, body=rename_items)
            handle_api_error(response_obj, "Move files")
        except Exception as e:
            logger.error("Failed to move files", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def set_permissions(self, entries: list[SetPermissionEntry]) -> None:
        try:
            from opensandbox.api.execd.api.filesystem import chmod_files

            response_obj = chmod_files.sync_detailed(
                client=self._client,
                body=FilesystemModelConverter.to_api_chmod_files_body(entries),
            )
            handle_api_error(response_obj, "Set permissions")
        except Exception as e:
            logger.error("Failed to set permissions", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def replace_contents(self, entries: list[ContentReplaceEntry]) -> None:
        try:
            from opensandbox.api.execd.api.filesystem import replace_content

            response_obj = replace_content.sync_detailed(
                client=self._client,
                body=FilesystemModelConverter.to_api_replace_content_body(entries),
            )
            handle_api_error(response_obj, "Replace contents")
        except Exception as e:
            logger.error("Failed to replace contents", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def search(self, entry: SearchEntry) -> list[EntryInfo]:
        try:
            from opensandbox.api.execd.api.filesystem import search_files
            from opensandbox.api.execd.models import FileInfo

            response_obj = search_files.sync_detailed(
                client=self._client,
                path=entry.path,
                pattern=entry.pattern,
            )
            handle_api_error(response_obj, "Search files")
            parsed = response_obj.parsed
            if not parsed:
                return []
            if isinstance(parsed, list) and all(isinstance(x, FileInfo) for x in parsed):
                return FilesystemModelConverter.to_entry_info_list(parsed)
            raise SandboxApiException(
                message="Search files failed: unexpected response type",
                request_id=extract_request_id(getattr(response_obj, "headers", None)),
            )
        except Exception as e:
            logger.error("Failed to search files", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def get_file_info(self, paths: list[str]) -> dict[str, EntryInfo]:
        try:
            from opensandbox.api.execd.api.filesystem import get_files_info

            response_obj = get_files_info.sync_detailed(client=self._client, path=paths)
            handle_api_error(response_obj, "Get file info")
            if not response_obj.parsed:
                return {}
            return FilesystemModelConverter.to_entry_info_map(response_obj.parsed)
        except Exception as e:
            logger.error("Failed to get file info for %s paths", len(paths), exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/adapters/health_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous health service adapter implementation.
"""

import logging

import httpx

from opensandbox.adapters.converter.response_handler import handle_api_error
from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.models.sandboxes import SandboxEndpoint
from opensandbox.sync.services.health import HealthSync

logger = logging.getLogger(__name__)


class HealthAdapterSync(HealthSync):
    def __init__(self, connection_config: ConnectionConfigSync, execd_endpoint: SandboxEndpoint) -> None:
        self.connection_config = connection_config
        self.execd_endpoint = execd_endpoint
        from opensandbox.api.execd import Client

        base_url = f"{self.connection_config.protocol}://{self.execd_endpoint.endpoint}"
        timeout = httpx.Timeout(self.connection_config.request_timeout.total_seconds())
        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
            **self.execd_endpoint.headers,
        }

        self._client = Client(base_url=base_url, timeout=timeout)
        self._httpx_client = httpx.Client(
            base_url=base_url,
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client.set_httpx_client(self._httpx_client)

    def ping(self, sandbox_id: str) -> bool:
        try:
            from opensandbox.api.execd.api.health import ping

            response_obj = ping.sync_detailed(client=self._client)
            handle_api_error(response_obj, "Ping")
            return True
        except Exception as e:
            logger.debug("Health check failed for sandbox %s: %s", sandbox_id, e)
            return False


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/adapters/metrics_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous metrics service adapter implementation.
"""

import logging

import httpx

from opensandbox.adapters.converter.exception_converter import (
    ExceptionConverter,
)
from opensandbox.adapters.converter.metrics_model_converter import (
    MetricsModelConverter,
)
from opensandbox.adapters.converter.response_handler import (
    handle_api_error,
    require_parsed,
)
from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.models.sandboxes import SandboxEndpoint, SandboxMetrics
from opensandbox.sync.services.metrics import MetricsSync

logger = logging.getLogger(__name__)


class MetricsAdapterSync(MetricsSync):
    def __init__(self, connection_config: ConnectionConfigSync, execd_endpoint: SandboxEndpoint) -> None:
        self.connection_config = connection_config
        self.execd_endpoint = execd_endpoint
        from opensandbox.api.execd import Client

        base_url = f"{self.connection_config.protocol}://{self.execd_endpoint.endpoint}"
        timeout = httpx.Timeout(self.connection_config.request_timeout.total_seconds())
        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
            **self.execd_endpoint.headers,
        }

        self._client = Client(base_url=base_url, timeout=timeout)
        self._httpx_client = httpx.Client(
            base_url=base_url,
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client.set_httpx_client(self._httpx_client)

    def get_metrics(self, sandbox_id: str) -> SandboxMetrics:
        try:
            from opensandbox.api.execd.api.metric import get_metrics
            from opensandbox.api.execd.models import Metrics

            response_obj = get_metrics.sync_detailed(client=self._client)
            handle_api_error(response_obj, "Get metrics")
            parsed = require_parsed(response_obj, Metrics, "Get metrics")
            return MetricsModelConverter.to_sandbox_metrics(parsed)
        except Exception as e:
            logger.error("Failed to get metrics for sandbox %s", sandbox_id, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/adapters/sandboxes_adapter.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous sandbox service adapter implementation.
"""

import logging
from datetime import datetime, timedelta

import httpx

from opensandbox.adapters.converter.exception_converter import (
    ExceptionConverter,
)
from opensandbox.adapters.converter.response_handler import (
    handle_api_error,
    require_parsed,
)
from opensandbox.adapters.converter.sandbox_model_converter import (
    SandboxModelConverter,
)
from opensandbox.api.lifecycle.types import UNSET
from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.models.sandboxes import (
    NetworkPolicy,
    PagedSandboxInfos,
    SandboxCreateResponse,
    SandboxEndpoint,
    SandboxFilter,
    SandboxImageSpec,
    SandboxInfo,
    SandboxRenewResponse,
    Volume,
)
from opensandbox.sync.services.sandbox import SandboxesSync

logger = logging.getLogger(__name__)


class SandboxesAdapterSync(SandboxesSync):
    def __init__(self, connection_config: ConnectionConfigSync) -> None:
        self.connection_config = connection_config
        from opensandbox.api.lifecycle import AuthenticatedClient

        api_key = self.connection_config.get_api_key()
        timeout_seconds = self.connection_config.request_timeout.total_seconds()
        timeout = httpx.Timeout(timeout_seconds)

        headers = {
            "User-Agent": self.connection_config.user_agent,
            **self.connection_config.headers,
        }
        if api_key:
            headers["OPEN-SANDBOX-API-KEY"] = api_key

        self._client = AuthenticatedClient(
            base_url=self.connection_config.get_base_url(),
            token=api_key or "",
            prefix="",
            auth_header_name="OPEN-SANDBOX-API-KEY",
            timeout=timeout,
        )

        self._httpx_client = httpx.Client(
            base_url=self.connection_config.get_base_url(),
            headers=headers,
            timeout=timeout,
            transport=self.connection_config.transport,
        )
        self._client.set_httpx_client(self._httpx_client)

    def _get_client(self):
        return self._client

    def create_sandbox(
        self,
        spec: SandboxImageSpec,
        entrypoint: list[str],
        env: dict[str, str],
        metadata: dict[str, str],
        timeout: timedelta | None,
        resource: dict[str, str],
        network_policy: NetworkPolicy | None,
        extensions: dict[str, str],
        volumes: list[Volume] | None,
    ) -> SandboxCreateResponse:
        logger.info("Creating sandbox with image: %s", spec.image)
        try:
            from opensandbox.api.lifecycle.api.sandboxes import post_sandboxes
            from opensandbox.api.lifecycle.models import (
                CreateSandboxResponse as ApiCreateSandboxResponse,
            )

            create_request = SandboxModelConverter.to_api_create_sandbox_request(
                spec=spec,
                entrypoint=entrypoint,
                env=env,
                metadata=metadata,
                timeout=timeout,
                resource=resource,
                network_policy=network_policy,
                extensions=extensions,
                volumes=volumes,
            )
            response_obj = post_sandboxes.sync_detailed(client=self._get_client(), body=create_request)
            handle_api_error(response_obj, "Create sandbox")

            parsed = require_parsed(response_obj, ApiCreateSandboxResponse, "Create sandbox")
            return SandboxModelConverter.to_sandbox_create_response(parsed)
        except Exception as e:
            logger.error("Failed to create sandbox with image: %s", spec.image, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def get_sandbox_info(self, sandbox_id: str) -> SandboxInfo:
        try:
            from opensandbox.api.lifecycle.api.sandboxes import get_sandboxes_sandbox_id
            from opensandbox.api.lifecycle.models import Sandbox as ApiSandbox

            response_obj = get_sandboxes_sandbox_id.sync_detailed(
                client=self._get_client(),
                sandbox_id=sandbox_id,
            )
            handle_api_error(response_obj, f"Get sandbox {sandbox_id}")
            parsed = require_parsed(response_obj, ApiSandbox, f"Get sandbox {sandbox_id}")
            return SandboxModelConverter.to_sandbox_info(parsed)
        except Exception as e:
            logger.error("Failed to get sandbox info: %s", sandbox_id, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def list_sandboxes(self, filter: SandboxFilter) -> PagedSandboxInfos:
        # metadata double-encoding logic kept identical to async adapter
        metadata = UNSET
        if filter.metadata:
            from urllib.parse import quote

            metadata_parts: list[str] = []
            for key, value in filter.metadata.items():
                k1 = quote(key, safe="")
                v1 = quote(value, safe="")
                k2 = quote(k1, safe="")
                v2 = quote(v1, safe="")
                metadata_parts.append(f"{k2}={v2}")
            metadata = "&".join(metadata_parts)

        try:
            from opensandbox.api.lifecycle.api.sandboxes import get_sandboxes
            from opensandbox.api.lifecycle.models import (
                ListSandboxesResponse as ApiListSandboxesResponse,
            )
            from opensandbox.api.lifecycle.types import UNSET as API_UNSET

            response_obj = get_sandboxes.sync_detailed(
                client=self._get_client(),
                state=filter.states if filter.states else API_UNSET,
                metadata=metadata,
                page=filter.page if filter.page is not None else API_UNSET,
                page_size=filter.page_size if filter.page_size is not None else API_UNSET,
            )
            handle_api_error(response_obj, "List sandboxes")
            parsed = require_parsed(response_obj, ApiListSandboxesResponse, "List sandboxes")
            return SandboxModelConverter.to_paged_sandbox_infos(parsed)
        except Exception as e:
            logger.error("Failed to list sandboxes", exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def get_sandbox_endpoint(
        self, sandbox_id: str, port: int, use_server_proxy: bool = False
    ) -> SandboxEndpoint:
        try:
            from opensandbox.api.lifecycle.api.sandboxes import (
                get_sandboxes_sandbox_id_endpoints_port,
            )
            from opensandbox.api.lifecycle.models import Endpoint as ApiEndpoint

            response_obj = get_sandboxes_sandbox_id_endpoints_port.sync_detailed(
                sandbox_id=sandbox_id,
                port=port,
                client=self._get_client(),
                use_server_proxy=use_server_proxy,
            )
            handle_api_error(response_obj, f"Get endpoint for sandbox {sandbox_id} port {port}")
            parsed = require_parsed(response_obj, ApiEndpoint, "Get endpoint")
            return SandboxModelConverter.to_sandbox_endpoint(parsed)
        except Exception as e:
            logger.error("Failed to retrieve sandbox endpoint for sandbox %s", sandbox_id, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def pause_sandbox(self, sandbox_id: str) -> None:
        try:
            from opensandbox.api.lifecycle.api.sandboxes import (
                post_sandboxes_sandbox_id_pause,
            )

            response_obj = post_sandboxes_sandbox_id_pause.sync_detailed(
                client=self._get_client(), sandbox_id=sandbox_id
            )
            handle_api_error(response_obj, f"Pause sandbox {sandbox_id}")
        except Exception as e:
            logger.error("Failed to pause sandbox: %s", sandbox_id, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def resume_sandbox(self, sandbox_id: str) -> None:
        try:
            from opensandbox.api.lifecycle.api.sandboxes import (
                post_sandboxes_sandbox_id_resume,
            )

            response_obj = post_sandboxes_sandbox_id_resume.sync_detailed(
                client=self._get_client(), sandbox_id=sandbox_id
            )
            handle_api_error(response_obj, f"Resume sandbox {sandbox_id}")
        except Exception as e:
            logger.error("Failed to resume sandbox: %s", sandbox_id, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def renew_sandbox_expiration(
        self, sandbox_id: str, new_expiration_time: datetime
    ) -> SandboxRenewResponse:
        try:
            from opensandbox.api.lifecycle.api.sandboxes import (
                post_sandboxes_sandbox_id_renew_expiration,
            )
            from opensandbox.api.lifecycle.models.renew_sandbox_expiration_response import (
                RenewSandboxExpirationResponse,
            )

            renew_request = SandboxModelConverter.to_api_renew_request(new_expiration_time)
            response_obj = post_sandboxes_sandbox_id_renew_expiration.sync_detailed(
                client=self._get_client(),
                sandbox_id=sandbox_id,
                body=renew_request,
            )
            handle_api_error(response_obj, f"Renew sandbox {sandbox_id} expiration")
            parsed = require_parsed(
                response_obj,
                RenewSandboxExpirationResponse,
                f"Renew sandbox {sandbox_id} expiration",
            )
            return SandboxModelConverter.to_sandbox_renew_response(parsed)
        except Exception as e:
            logger.error("Failed to renew sandbox %s expiration", sandbox_id, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e

    def kill_sandbox(self, sandbox_id: str) -> None:
        try:
            from opensandbox.api.lifecycle.api.sandboxes import (
                delete_sandboxes_sandbox_id,
            )

            response_obj = delete_sandboxes_sandbox_id.sync_detailed(
                client=self._get_client(), sandbox_id=sandbox_id
            )
            handle_api_error(response_obj, f"Kill sandbox {sandbox_id}")
        except Exception as e:
            logger.error("Failed to kill sandbox: %s", sandbox_id, exc_info=e)
            raise ExceptionConverter.to_sandbox_exception(e) from e


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/manager.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous SandboxManager implementation.
"""

import logging
from datetime import datetime, timedelta, timezone
from typing import Any

from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.models.sandboxes import (
    PagedSandboxInfos,
    SandboxFilter,
    SandboxInfo,
    SandboxRenewResponse,
)
from opensandbox.sync.adapters.factory import AdapterFactorySync
from opensandbox.sync.services.sandbox import SandboxesSync

logger = logging.getLogger(__name__)


class SandboxManagerSync:
    """
    Synchronous sandbox management interface for administrative operations.

    This class mirrors the async :class:`opensandbox.manager.SandboxManager`, but all
    operations are **blocking** and executed in the current thread.

    It is designed for *fleet* / admin workflows (listing, filtering, controlling sandboxes).
    For interacting with a single sandbox instance (files/commands/metrics), prefer
    :class:`opensandbox.sync.sandbox.SandboxSync`.

    Usage Example:

    ```python
    from opensandbox.models.sandboxes import SandboxFilter
    from opensandbox.sync.manager import SandboxManagerSync

    manager = SandboxManagerSync.create()
    infos = manager.list_sandbox_infos(SandboxFilter(states=["RUNNING"]))
    manager.close()
    ```
    """

    def __init__(
        self, sandbox_service: SandboxesSync, connection_config: ConnectionConfigSync
    ) -> None:
        """
        Internal constructor for SandboxManagerSync.

        Note: Use :meth:`create` instead.

        Args:
            sandbox_service: Service for sandbox operations
            connection_config: Connection configuration (shared transport, headers, timeouts)
        """
        self._sandbox_service = sandbox_service
        self._connection_config = connection_config

    @property
    def connection_config(self) -> ConnectionConfigSync:
        """Provides access to the connection configuration (including shared transport)."""
        return self._connection_config

    @classmethod
    def create(cls, connection_config: ConnectionConfigSync | None = None) -> "SandboxManagerSync":
        """
        Create a SandboxManagerSync instance with the provided configuration (blocking).

        Args:
            connection_config: Connection configuration for the manager.
                If None, default configuration will be used.

        Returns:
            Configured sandbox manager instance
        """
        config = (connection_config or ConnectionConfigSync()).with_transport_if_missing()
        factory = AdapterFactorySync(config)
        sandbox_service = factory.create_sandbox_service()
        return cls(sandbox_service, config)

    def list_sandbox_infos(self, filter: SandboxFilter) -> PagedSandboxInfos:
        """
        List sandboxes with filtering options.

        Args:
            filter: Filter criteria for sandbox listing

        Returns:
            Paged sandbox information matching the filter criteria

        Raises:
            SandboxException: if the operation fails
        """
        return self._sandbox_service.list_sandboxes(filter)

    def get_sandbox_info(self, sandbox_id: str) -> SandboxInfo:
        """
        Get information for a single sandbox by its ID.

        Args:
            sandbox_id: Sandbox ID to retrieve information for

        Returns:
            SandboxInfo for the specified sandbox

        Raises:
            SandboxException: if the operation fails
        """
        logger.debug("Getting info for sandbox: %s", sandbox_id)
        return self._sandbox_service.get_sandbox_info(sandbox_id)

    def kill_sandbox(self, sandbox_id: str) -> None:
        """
        Terminate a single sandbox.

        Args:
            sandbox_id: Sandbox ID to terminate

        Raises:
            SandboxException: if the operation fails
        """
        logger.info("Terminating sandbox: %s", sandbox_id)
        self._sandbox_service.kill_sandbox(sandbox_id)
        logger.info("Successfully terminated sandbox: %s", sandbox_id)

    def renew_sandbox(self, sandbox_id: str, timeout: timedelta) -> SandboxRenewResponse:
        """
        Renew expiration time for a single sandbox.

        The new expiration time will be set to the current time plus the provided duration.

        Args:
            sandbox_id: Sandbox ID to renew
            timeout: Duration to add to the current time to set the new expiration

        Raises:
            SandboxException: if the operation fails
        """
        # Use timezone-aware UTC datetime to avoid cross-timezone ambiguity.
        new_expiration = datetime.now(timezone.utc) + timeout
        logger.info("Renew expiration for sandbox %s to %s", sandbox_id, new_expiration)
        return self._sandbox_service.renew_sandbox_expiration(sandbox_id, new_expiration)

    def pause_sandbox(self, sandbox_id: str) -> None:
        """
        Pause a single sandbox while preserving its state.

        Args:
            sandbox_id: Sandbox ID to pause

        Raises:
            SandboxException: if the operation fails
        """
        logger.info("Pausing sandbox: %s", sandbox_id)
        self._sandbox_service.pause_sandbox(sandbox_id)

    def resume_sandbox(self, sandbox_id: str) -> None:
        """
        Resume a previously paused sandbox.

        Args:
            sandbox_id: Sandbox ID to resume

        Raises:
            SandboxException: if the operation fails
        """
        logger.info("Resuming sandbox: %s", sandbox_id)
        self._sandbox_service.resume_sandbox(sandbox_id)

    def close(self) -> None:
        """
        Close local resources associated with this sandbox manager.

        This method closes HTTP client resources and other local resources.

        Note: This method logs errors but does not raise exceptions to avoid
        issues in context manager cleanup.
        """
        try:
            self._connection_config.close_transport_if_owned()
        except Exception as e:
            logger.warning("Error closing resources for sandbox manager: %s", e, exc_info=True)

    def __enter__(self) -> "SandboxManagerSync":
        """Sync context manager entry."""
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
        """Sync context manager exit."""
        self.close()


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/sandbox.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous Sandbox client implementation.
"""

import logging
import time
from collections.abc import Callable
from datetime import datetime, timedelta, timezone
from typing import Any

from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.constants import DEFAULT_EGRESS_PORT, DEFAULT_EXECD_PORT
from opensandbox.exceptions import (
    InvalidArgumentException,
    SandboxException,
    SandboxInternalException,
    SandboxReadyTimeoutException,
)
from opensandbox.models.sandboxes import (
    NetworkPolicy,
    NetworkRule,
    SandboxEndpoint,
    SandboxImageSpec,
    SandboxInfo,
    SandboxMetrics,
    SandboxRenewResponse,
    Volume,
)
from opensandbox.sync.adapters.factory import AdapterFactorySync
from opensandbox.sync.services import (
    CommandsSync,
    EgressSync,
    FilesystemSync,
    HealthSync,
    MetricsSync,
    SandboxesSync,
)

logger = logging.getLogger(__name__)


class SandboxSync:
    """
    Main synchronous entrypoint for the Open Sandbox SDK.

    This class mirrors the async :class:`opensandbox.sandbox.Sandbox` API, but all
    operations are **blocking** and executed in the current thread.

    Key Features:

    - **Secure Isolation**: Complete Linux OS access in isolated containers
    - **File System Operations**: Create, read, update, delete files and directories
    - **Multi-language Execution**: Support for Python, Java, Bash, and other languages
    - **Real-time Command Execution**: Streaming output via SSE (Server-Sent Events)
    - **Resource Management**: CPU, memory, and storage constraints
    - **Lifecycle Management**: Create, pause, resume, terminate operations
    - **Health Monitoring**: Readiness polling and status tracking

    Notes:

    - **Blocking**: Do not call these methods directly from an asyncio event loop thread.
      If you need non-blocking behavior, prefer the async :class:`~opensandbox.sandbox.Sandbox`.
    - **Resource cleanup**: :meth:`close` closes *local* HTTP resources only. It does **not**
      terminate the remote sandbox instance. Call :meth:`kill` to stop the remote sandbox.

    Usage Example:

    ```python
    from datetime import timedelta
    from opensandbox.models.sandboxes import SandboxImageSpec
    from opensandbox.models.execd import RunCommandOpts
    from opensandbox.sync.sandbox import SandboxSync

    # Create a sandbox (blocking)
    sandbox = SandboxSync.create(
        "python:3.11",
        resource={"cpu": "1", "memory": "500Mi"},
        timeout=timedelta(minutes=30),
    )

    # Use the sandbox
    sandbox.files.write_file("script.py", "print('Hello World')")
    result = sandbox.commands.run("python script.py")

    # Always clean up resources
    sandbox.kill()   # terminate remote sandbox
    sandbox.close()  # close local HTTP resources

    # Or use a context manager for automatic close():
    with SandboxSync.create("python:3.11") as sandbox:
        # Note on lifecycle:
        # - Exiting the context manager will call `sandbox.close()` (local HTTP resources only).
        # - You must still call `sandbox.kill()` to terminate the remote sandbox instance.
        sandbox.commands.run("python -c \"print('hi')\"")
        sandbox.kill()
    ```
    """

    def __init__(
        self,
        sandbox_id: str,
        sandbox_service: SandboxesSync,
        filesystem_service: FilesystemSync,
        command_service: CommandsSync,
        health_service: HealthSync,
        metrics_service: MetricsSync,
        egress_service: EgressSync,
        connection_config: ConnectionConfigSync,
        custom_health_check: Callable[["SandboxSync"], bool] | None = None,
    ) -> None:
        """
        Internal constructor for SandboxSync. Use :meth:`create` or :meth:`connect` instead.
        """
        self.id = sandbox_id
        self._sandbox_service = sandbox_service
        self._filesystem_service = filesystem_service
        self._command_service = command_service
        self._health_service = health_service
        self._metrics_service = metrics_service
        self._egress_service = egress_service
        self._connection_config = connection_config
        self._custom_health_check = custom_health_check

    @property
    def files(self) -> FilesystemSync:
        """
        Provides access to file system operations within the sandbox.

        Allows writing, reading, listing, and deleting files and directories.
        """
        return self._filesystem_service

    @property
    def commands(self) -> CommandsSync:
        """
        Provides access to command execution operations.

        Supports both one-shot command execution and SSE streaming output.
        """
        return self._command_service

    @property
    def metrics(self) -> MetricsSync:
        """
        Provides access to sandbox metrics and monitoring.

        Allows retrieving resource usage statistics (CPU, memory) and other performance metrics.
        """
        return self._metrics_service

    @property
    def connection_config(self) -> ConnectionConfigSync:
        """Provides access to the connection configuration (including shared transport)."""
        return self._connection_config

    def get_info(self) -> SandboxInfo:
        """
        Get the current status of this sandbox.

        Returns:
            Current sandbox status including state and metadata

        Raises:
            SandboxException: if status cannot be retrieved
        """
        return self._sandbox_service.get_sandbox_info(self.id)

    def get_endpoint(self, port: int) -> SandboxEndpoint:
        """
        Get a specific network endpoint for this sandbox.

        Args:
            port: The port number to get the endpoint for

        Returns:
            Endpoint information including connection details

        Raises:
            SandboxException: if endpoint cannot be retrieved
        """
        return self._sandbox_service.get_sandbox_endpoint(
            self.id, port, self.connection_config.use_server_proxy
        )

    def get_metrics(self) -> SandboxMetrics:
        """
        Get the current resource usage metrics for this sandbox.

        Returns:
            Current sandbox metrics including CPU, memory, and I/O statistics

        Raises:
            SandboxException: if metrics cannot be retrieved
        """
        return self._metrics_service.get_metrics(self.id)

    def renew(self, timeout: timedelta) -> SandboxRenewResponse:
        """
        Renew the sandbox expiration time to delay automatic termination.

        The new expiration time will be set to the current time plus the provided duration.

        Args:
            timeout: Duration to add to the current time to set the new expiration

        Returns:
            Renew response including the new expiration time.

        Raises:
            SandboxException: if the operation fails
        """
        # Use timezone-aware UTC datetime to avoid cross-timezone ambiguity.
        new_expiration = datetime.now(timezone.utc) + timeout
        logger.info(
            "Renewing sandbox %s timeout, estimated expiration: %s",
            self.id,
            new_expiration,
        )
        return self._sandbox_service.renew_sandbox_expiration(self.id, new_expiration)

    def get_egress_policy(self) -> NetworkPolicy:
        """
        Get current egress policy for this sandbox.
        """
        return self._egress_service.get_policy()

    def patch_egress_rules(self, rules: list[NetworkRule]) -> None:
        """
        Patch egress rules for this sandbox using sidecar merge semantics.

        Rules in this patch payload take priority over existing rules with the
        same target. Existing rules for other targets remain unchanged. Within a
        single patch payload, the first rule for a target wins.

        This operation does not replace the entire policy and does not change
        the current defaultAction.
        """
        self._egress_service.patch_rules(rules)

    def pause(self) -> None:
        """
        Pause the sandbox while preserving its state.

        The sandbox will transition to PAUSED state and can be resumed later.
        All running processes will be suspended.

        Raises:
            SandboxException: if pause operation fails
        """
        logger.info("Pausing sandbox: %s", self.id)
        self._sandbox_service.pause_sandbox(self.id)


    def kill(self) -> None:
        """
        Send a termination signal to the remote sandbox instance.

        This is an irreversible operation that stops the sandbox immediately.

        Note: This method does NOT close the local resources. Use :meth:`close` or
        the sync context manager to clean up local resources.

        Raises:
            SandboxException: if termination fails
        """
        logger.info("Killing sandbox: %s", self.id)
        self._sandbox_service.kill_sandbox(self.id)

    def close(self) -> None:
        """
        Close local resources associated with this sandbox.

        This method closes HTTP client resources and other local resources.
        It does NOT terminate the remote sandbox instance. Call :meth:`kill` first
        if you want to terminate the remote sandbox.

        Note: This method logs errors but does not raise exceptions to avoid
        issues in context manager cleanup.
        """
        try:
            self._connection_config.close_transport_if_owned()
            logger.debug("Closed resources for sandbox %s", self.id)
        except Exception as e:
            logger.warning("Error closing resources for sandbox %s: %s", self.id, e, exc_info=True)

    def is_healthy(self) -> bool:
        """
        Check if the sandbox is healthy and responsive.

        Returns:
            True if sandbox is healthy, False otherwise
        """
        if self._custom_health_check:
            return self._custom_health_check(self)
        try:
            return self._health_service.ping(self.id)
        except Exception:
            return False

    def check_ready(self, timeout: timedelta, polling_interval: timedelta) -> None:
        """
        Wait for the sandbox to pass health checks with polling.

        Args:
            timeout: Maximum time to wait for health check to pass
            polling_interval: Time between health check attempts

        Raises:
            SandboxReadyTimeoutException: if health check doesn't pass within timeout
            SandboxException: if health check fails
        """
        logger.info(
            "Waiting for sandbox %s to pass health check (timeout: %ss)",
            self.id,
            timeout.total_seconds(),
        )

        deadline = time.time() + timeout.total_seconds()
        attempt = 0
        last_exception: Exception | None = None

        while time.time() < deadline:
            attempt += 1
            logger.debug("Health check attempt #%s for sandbox %s", attempt, self.id)
            try:
                if self.is_healthy():
                    logger.info(
                        "Sandbox %s passed health check after %s attempts",
                        self.id,
                        attempt,
                    )
                    return
                last_exception = None
            except Exception as e:
                last_exception = e

            time.sleep(polling_interval.total_seconds())

        error_detail = (
            f"Last error: {last_exception}"
            if last_exception
            else "Health check returned false continuously"
        )
        connection_detail = (
            f"ConnectionConfig(domain={self.connection_config.get_domain()}, "
            f"use_server_proxy={self.connection_config.use_server_proxy})"
        )
        if self.connection_config.use_server_proxy:
            hint = (
                "Hint: server proxy mode is enabled. Check server-to-sandbox connectivity "
                "and server API key/auth configuration."
            )
        else:
            hint = (
                "Hint: direct sandbox endpoint access is enabled. If the SDK cannot directly "
                "reach sandbox network/ports, set ConnectionConfigSync(use_server_proxy=True). "
                "For Docker bridge deployments where server runs in a container, also configure "
                "server [docker].host_ip to a host-reachable address."
            )
        final_message = (
            f"Sandbox health check timed out after {timeout.total_seconds()}s "
            f"({attempt} attempts). {error_detail}. {connection_detail}. {hint}"
        )
        logger.error(final_message)
        raise SandboxReadyTimeoutException(final_message)

    @classmethod
    def create(
        cls,
        image: SandboxImageSpec | str,
        *,
        timeout: timedelta | None = timedelta(minutes=10),
        ready_timeout: timedelta = timedelta(seconds=30),
        env: dict[str, str] | None = None,
        metadata: dict[str, str] | None = None,
        resource: dict[str, str] | None = None,
        network_policy: NetworkPolicy | None = None,
        extensions: dict[str, str] | None = None,
        entrypoint: list[str] | None = None,
        volumes: list[Volume] | None = None,
        connection_config: ConnectionConfigSync | None = None,
        health_check: Callable[["SandboxSync"], bool] | None = None,
        health_check_polling_interval: timedelta = timedelta(milliseconds=200),
        skip_health_check: bool = False,
    ) -> "SandboxSync":
        """
        Create a new sandbox instance with the specified configuration (blocking).

        Args:
            image: Container image specification including image reference and optional auth
            timeout: Maximum sandbox lifetime. Pass None to require explicit cleanup.
            ready_timeout: Maximum time to wait for sandbox to become ready
            env: Environment variables for the sandbox
            metadata: Custom metadata for the sandbox
            resource: Resource limits (CPU, memory, etc.)
            network_policy: Optional outbound network policy (egress).
            extensions: Opaque extension parameters passed through to the server as-is.
                Prefer namespaced keys (e.g. ``storage.id``).
            entrypoint: Command to run as entrypoint
            volumes: Optional list of volumes to mount in the sandbox.
            connection_config: Connection configuration
            health_check: Custom sync health check function
            health_check_polling_interval: Time between health check attempts
            skip_health_check: If True, do NOT wait for sandbox readiness/health; returned instance may not be ready yet.

        Returns:
            Fully configured and ready SandboxSync instance

        Raises:
            SandboxException: if sandbox creation or initialization fails
        """
        config = (connection_config or ConnectionConfigSync()).with_transport_if_missing()
        entrypoint = entrypoint or ["tail", "-f", "/dev/null"]
        env = env or {}
        metadata = metadata or {}
        resource = resource or {"cpu": "1", "memory": "2Gi"}
        extensions = extensions or {}

        if isinstance(image, str):
            image = SandboxImageSpec(image=image)

        timeout_log = "manual-cleanup" if timeout is None else f"{timeout.total_seconds()}s"
        logger.info(
            "Creating sandbox with image: %s (timeout: %s)",
            image.image,
            timeout_log,
        )
        factory = AdapterFactorySync(config)
        sandbox_id: str | None = None
        sandbox_service: SandboxesSync | None = None

        try:
            sandbox_service = factory.create_sandbox_service()
            response = sandbox_service.create_sandbox(
                image,
                entrypoint,
                env,
                metadata,
                timeout,
                resource,
                network_policy,
                extensions,
                volumes,
            )
            sandbox_id = response.id
            execd_endpoint = sandbox_service.get_sandbox_endpoint(
                response.id, DEFAULT_EXECD_PORT, config.use_server_proxy
            )
            egress_endpoint = sandbox_service.get_sandbox_endpoint(
                response.id, DEFAULT_EGRESS_PORT, config.use_server_proxy
            )

            sandbox = cls(
                sandbox_id=response.id,
                sandbox_service=sandbox_service,
                filesystem_service=factory.create_filesystem_service(execd_endpoint),
                command_service=factory.create_command_service(execd_endpoint),
                health_service=factory.create_health_service(execd_endpoint),
                metrics_service=factory.create_metrics_service(execd_endpoint),
                egress_service=factory.create_egress_service(egress_endpoint),
                connection_config=config,
                custom_health_check=health_check,
            )

            if not skip_health_check:
                sandbox.check_ready(ready_timeout, health_check_polling_interval)
                logger.info("Sandbox %s is ready", sandbox.id)
            else:
                logger.info(
                    "Sandbox %s created (skip_health_check=true, sandbox may not be ready yet)",
                    sandbox.id,
                )

            return sandbox
        except Exception as e:
            if sandbox_id and sandbox_service:
                try:
                    logger.warning(
                        "Sandbox creation failed during initialization. Attempting to terminate zombie sandbox: %s",
                        sandbox_id,
                    )
                    sandbox_service.kill_sandbox(sandbox_id)
                except Exception:
                    pass
            config.close_transport_if_owned()
            if isinstance(e, SandboxException):
                raise
            raise SandboxInternalException(f"Internal exception when creating sandbox: {e}") from e

    @classmethod
    def connect(
        cls,
        sandbox_id: str,
        connection_config: ConnectionConfigSync | None = None,
        health_check: Callable[["SandboxSync"], bool] | None = None,
        connect_timeout: timedelta = timedelta(seconds=30),
        health_check_polling_interval: timedelta = timedelta(milliseconds=200),
        skip_health_check: bool = False,
    ) -> "SandboxSync":
        """
        Connect to an existing sandbox instance by ID (blocking).

        Args:
            sandbox_id: ID of the existing sandbox
            connection_config: Connection configuration
            health_check: Custom sync health check function
            connect_timeout: Max time to wait for sandbox readiness/health after connecting.
            health_check_polling_interval: Polling interval used while waiting for readiness/health.
            skip_health_check: If True, do NOT wait for readiness/health; returned instance may not be ready yet.

        Returns:
            Connected SandboxSync instance

        Raises:
            InvalidArgumentException: if required configuration is missing
            SandboxException: if sandbox connection fails
        """
        if not sandbox_id:
            raise InvalidArgumentException("Sandbox ID must be specified")
        # Accept any string identifier.
        sandbox_id = str(sandbox_id)

        config = (connection_config or ConnectionConfigSync()).with_transport_if_missing()
        logger.info("Connecting to sandbox: %s", sandbox_id)
        factory = AdapterFactorySync(config)

        try:
            sandbox_service = factory.create_sandbox_service()
            execd_endpoint = sandbox_service.get_sandbox_endpoint(
                sandbox_id, DEFAULT_EXECD_PORT, config.use_server_proxy
            )
            egress_endpoint = sandbox_service.get_sandbox_endpoint(
                sandbox_id, DEFAULT_EGRESS_PORT, config.use_server_proxy
            )

            sandbox = cls(
                sandbox_id=sandbox_id,
                sandbox_service=sandbox_service,
                filesystem_service=factory.create_filesystem_service(execd_endpoint),
                command_service=factory.create_command_service(execd_endpoint),
                health_service=factory.create_health_service(execd_endpoint),
                metrics_service=factory.create_metrics_service(execd_endpoint),
                egress_service=factory.create_egress_service(egress_endpoint),
                connection_config=config,
                custom_health_check=health_check,
            )

            if not skip_health_check:
                sandbox.check_ready(connect_timeout, health_check_polling_interval)
            else:
                logger.info(
                    "Connected to sandbox %s (skip_health_check=true, sandbox may not be ready yet)",
                    sandbox_id,
                )

            logger.info("Connected to sandbox %s", sandbox_id)
            return sandbox
        except Exception as e:
            config.close_transport_if_owned()
            if isinstance(e, SandboxException):
                raise
            raise SandboxInternalException(f"Failed to connect to sandbox: {e}") from e

    @classmethod
    def resume(
            cls,
            sandbox_id: str,
            connection_config: ConnectionConfigSync | None = None,
            health_check: Callable[["SandboxSync"], bool] | None = None,
            resume_timeout: timedelta = timedelta(seconds=30),
            health_check_polling_interval: timedelta = timedelta(milliseconds=200),
            skip_health_check: bool = False,
    ) -> "SandboxSync":
        """
        Resume a paused sandbox by ID and return a new, usable SandboxSync instance.

        This method performs the server-side resume operation, then re-resolves the execd endpoint
        (which may change across pause/resume on some backends), rebuilds service adapters, and
        optionally waits for readiness/health.

        Args:
            sandbox_id: ID of the paused sandbox to resume.
            connection_config: Connection configuration (shared transport, headers, timeouts).
            health_check: Optional custom sync health check function (falls back to ping).
            resume_timeout: Max time to wait for sandbox readiness/health after resuming.
            health_check_polling_interval: Polling interval used while waiting for readiness/health.
            skip_health_check: If True, do NOT wait for readiness/health; returned instance may not be ready yet.
        """
        if not sandbox_id:
            raise InvalidArgumentException("Sandbox ID must be specified")

        # Accept any string identifier.
        sandbox_id = str(sandbox_id)

        config = (connection_config or ConnectionConfigSync()).with_transport_if_missing()

        logger.info("Resuming sandbox: %s", sandbox_id)
        factory = AdapterFactorySync(config)

        try:
            sandbox_service = factory.create_sandbox_service()
            sandbox_service.resume_sandbox(sandbox_id)

            execd_endpoint = sandbox_service.get_sandbox_endpoint(
                sandbox_id, DEFAULT_EXECD_PORT, config.use_server_proxy
            )
            egress_endpoint = sandbox_service.get_sandbox_endpoint(
                sandbox_id, DEFAULT_EGRESS_PORT, config.use_server_proxy
            )

            sandbox = cls(
                sandbox_id=sandbox_id,
                sandbox_service=sandbox_service,
                filesystem_service=factory.create_filesystem_service(execd_endpoint),
                command_service=factory.create_command_service(execd_endpoint),
                health_service=factory.create_health_service(execd_endpoint),
                metrics_service=factory.create_metrics_service(execd_endpoint),
                egress_service=factory.create_egress_service(egress_endpoint),
                connection_config=config,
                custom_health_check=health_check,
            )

            if not skip_health_check:
                sandbox.check_ready(resume_timeout, health_check_polling_interval)
            else:
                logger.info(
                    "Resumed sandbox %s (skip_health_check=true, sandbox may not be ready yet)",
                    sandbox_id,
                )

            return sandbox
        except Exception as e:
            config.close_transport_if_owned()
            if isinstance(e, SandboxException):
                raise
            raise SandboxInternalException(f"Failed to resume sandbox: {e}") from e


    def __enter__(self) -> "SandboxSync":
        """Sync context manager entry."""
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
        """Sync context manager exit."""
        self.close()


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/services/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous service interfaces (Protocols) for the sync SDK.
"""

from opensandbox.sync.services.command import CommandsSync
from opensandbox.sync.services.egress import EgressSync
from opensandbox.sync.services.filesystem import FilesystemSync
from opensandbox.sync.services.health import HealthSync
from opensandbox.sync.services.metrics import MetricsSync
from opensandbox.sync.services.sandbox import SandboxesSync

__all__ = [
    "CommandsSync",
    "EgressSync",
    "FilesystemSync",
    "HealthSync",
    "MetricsSync",
    "SandboxesSync",
]


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/services/command.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous command service interface.

Defines the contract for **blocking** command execution operations inside a sandbox.
This is the sync counterpart of :mod:`opensandbox.services.command`.
"""

from typing import Protocol

from opensandbox.models.execd import (
    CommandLogs,
    CommandStatus,
    Execution,
    RunCommandOpts,
)
from opensandbox.models.execd_sync import ExecutionHandlersSync


class CommandsSync(Protocol):
    """
    Command execution service for sandbox environments (sync).

    This service provides secure command execution capabilities within sandbox environments,
    with support for SSE streaming output, timeout handling, and interruption.

    Notes:
        - All methods are **blocking** and executed in the current thread.
        - Streaming output is delivered via SSE and accumulated into an ``Execution`` object.
    """

    def run(
        self,
        command: str,
        *,
        opts: RunCommandOpts | None = None,
        handlers: ExecutionHandlersSync | None = None,
    ) -> Execution:
        """
        Execute a shell command in the sandbox environment.

        The command can be executed in streaming mode (SSE) based on request configuration
        and optional handlers.

        Args:
            command: Shell command text to execute
            opts: Command execution options (e.g. background, working_directory)
            handlers: Optional handlers for streaming events

        Returns:
            An ``Execution`` object representing the command execution result/events.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def interrupt(self, execution_id: str) -> None:
        """
        Interrupt and terminate a running command execution.

        This typically sends a termination signal to the process associated with the given
        execution ID.

        Args:
            execution_id: Unique identifier of the execution to interrupt.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def get_command_status(self, execution_id: str) -> CommandStatus:
        """
        Get the current running status for a command.

        Args:
            execution_id: Unique identifier of the execution to query

        Returns:
            CommandStatus describing running state and exit code if available

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def get_background_command_logs(
        self, execution_id: str, cursor: int | None = None
    ) -> CommandLogs:
        """
        Get background command logs (non-streamed).

        Args:
            execution_id: Unique identifier of the execution to query
            cursor: Optional line cursor for incremental reads

        Returns:
            CommandLogs containing raw output and latest cursor

        Raises:
            SandboxException: If the operation fails.
        """
        ...


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/services/egress.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous egress service interface.
"""

from typing import Protocol

from opensandbox.models.sandboxes import NetworkPolicy, NetworkRule


class EgressSync(Protocol):
    """Blocking direct runtime egress policy service."""

    def get_policy(self) -> NetworkPolicy:
        """Retrieve the current egress policy from the sidecar."""
        ...

    def patch_rules(self, rules: list[NetworkRule]) -> None:
        """Patch egress rules via the sidecar policy API with merge semantics.

        Incoming rules take priority over existing rules with the same target.
        Existing rules for other targets remain unchanged. Within one patch
        payload, the first rule for a target wins. The current defaultAction is
        preserved.
        """
        ...


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/services/filesystem.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous filesystem service interface.

Defines the contract for **blocking** filesystem operations inside a sandbox.
This is the sync counterpart of :mod:`opensandbox.services.filesystem`.
"""

from collections.abc import Iterator
from io import IOBase
from typing import Protocol

from opensandbox.models.filesystem import (
    ContentReplaceEntry,
    EntryInfo,
    MoveEntry,
    SearchEntry,
    SetPermissionEntry,
    WriteEntry,
)


class FilesystemSync(Protocol):
    """
    Filesystem operations service for sandbox environments (sync).

    This service provides comprehensive file system management capabilities within sandbox
    environments, including file operations, directory management, and metadata handling.

    Notes:
        - All methods are **blocking**.
        - Paths may be absolute or relative to the sandbox working directory (server-defined).
    """

    def read_file(
        self,
        path: str,
        *,
        encoding: str = "utf-8",
        range_header: str | None = None,
    ) -> str:
        """
        Read the content of a file as a string with specified encoding.

        Args:
            path: The absolute or relative path to the file to read.
            encoding: Character encoding for the file content (default: UTF-8).
            range_header: HTTP byte range to read (e.g., "bytes=0-1023").

        Returns:
            The file content as a string.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def read_bytes(self, path: str, *, range_header: str | None = None) -> bytes:
        """
        Read the content of a file as bytes.

        Args:
            path: The absolute or relative path to the file to read.
            range_header: HTTP byte range to read (e.g., "bytes=0-1023").

        Returns:
            The file content as bytes.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def read_bytes_stream(
        self, path: str, *, chunk_size: int = 64 * 1024, range_header: str | None = None
    ) -> Iterator[bytes]:
        """
        Stream file content as bytes chunks (blocking iterator).

        Args:
            path: File path to read.
            chunk_size: Chunk size in bytes (default: 64KiB).
            range_header: Optional HTTP range header.

        Yields:
            Byte chunks from the file.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def write_files(self, entries: list[WriteEntry]) -> None:
        """
        Write content to files based on the provided write entries.

        Args:
            entries: List of WriteEntry objects specifying files to write and their content.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def write_file(
        self,
        path: str,
        data: str | bytes | IOBase,
        *,
        encoding: str = "utf-8",
        mode: int = 755,
        owner: str | None = None,
        group: str | None = None,
    ) -> None:
        """
        Write content to a single file (convenience method).

        Args:
            path: Destination file path.
            data: Content to write (str/bytes/file-like).
            encoding: Character encoding (when data is str).
            mode: Unix file permissions (implementation-defined).
            owner: Owner username.
            group: Group name.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def create_directories(self, entries: list[WriteEntry]) -> None:
        """
        Create directories based on the provided entries.

        Args:
            entries: List of WriteEntry objects specifying directories to create.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def delete_files(self, paths: list[str]) -> None:
        """
        Delete the specified files.

        Args:
            paths: List of file paths to delete.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def delete_directories(self, paths: list[str]) -> None:
        """
        Delete the specified directories.

        Args:
            paths: List of directory paths to delete.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def move_files(self, entries: list[MoveEntry]) -> None:
        """
        Move files from source to destination paths.

        Args:
            entries: List of MoveEntry objects specifying source and destination paths.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def set_permissions(self, entries: list[SetPermissionEntry]) -> None:
        """
        Set file system permissions for the specified entries.

        Args:
            entries: List of SetPermissionEntry objects specifying files and their new permissions.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def replace_contents(self, entries: list[ContentReplaceEntry]) -> None:
        """
        Replace content in files based on search and replace patterns.

        Args:
            entries: List of ContentReplaceEntry objects specifying replacement operations.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def search(self, entry: SearchEntry) -> list[EntryInfo]:
        """
        Search for files and directories based on the specified criteria.

        Args:
            entry: SearchEntry object containing search parameters and criteria.

        Returns:
            List of EntryInfo objects containing metadata for matching files/directories.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def get_file_info(self, paths: list[str]) -> dict[str, EntryInfo]:
        """
        Retrieve file information for the specified paths.

        Args:
            paths: List of file/directory paths to get information for.

        Returns:
            Mapping where keys are paths and values are EntryInfo objects containing metadata.

        Raises:
            SandboxException: If the operation fails.
        """
        ...


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/services/health.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous health service interface.

Defines the contract for **blocking** health checks against a sandbox instance.
This is the sync counterpart of :mod:`opensandbox.services.health`.
"""

from typing import Protocol


class HealthSync(Protocol):
    """
    Health check service for sandbox environments (sync).

    This service provides lightweight checks to verify that a sandbox (and its execd service)
    is reachable and responsive.
    """

    def ping(self, sandbox_id: str) -> bool:
        """
        Ping the sandbox execd service to verify liveness.

        Args:
            sandbox_id: Unique identifier of the sandbox.

        Returns:
            True if the sandbox responds successfully, False otherwise.

        Raises:
            SandboxException: If the underlying request fails in a non-recoverable way.
        """
        ...


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/services/metrics.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous metrics service interface.

Defines the contract for **blocking** metrics retrieval from a sandbox instance.
This is the sync counterpart of :mod:`opensandbox.services.metrics`.
"""

from typing import Protocol

from opensandbox.models.sandboxes import SandboxMetrics


class MetricsSync(Protocol):
    """
    Metrics retrieval service for sandbox environments (sync).

    This service provides resource usage statistics (CPU, memory, etc.) for a running sandbox.
    """

    def get_metrics(self, sandbox_id: str) -> SandboxMetrics:
        """
        Retrieve current sandbox metrics for the given sandbox id.

        Args:
            sandbox_id: Unique identifier of the sandbox.

        Returns:
            Current sandbox metrics including CPU/memory and other usage information.

        Raises:
            SandboxException: If the operation fails.
        """
        ...


================================================
FILE: sdks/sandbox/python/src/opensandbox/sync/services/sandbox.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Synchronous sandbox service interface.

Defines the contract for **blocking** sandbox lifecycle operations.
This is the sync counterpart of :mod:`opensandbox.services.sandbox`.
"""

from datetime import datetime, timedelta
from typing import Protocol

from opensandbox.models.sandboxes import (
    NetworkPolicy,
    PagedSandboxInfos,
    SandboxCreateResponse,
    SandboxEndpoint,
    SandboxFilter,
    SandboxImageSpec,
    SandboxInfo,
    SandboxRenewResponse,
    Volume,
)


class SandboxesSync(Protocol):
    """
    Core sandbox lifecycle management service (sync).

    This service provides a clean abstraction over sandbox creation, management, and termination
    operations, isolating business logic from API implementation details.
    """

    def create_sandbox(
        self,
        spec: SandboxImageSpec,
        entrypoint: list[str],
        env: dict[str, str],
        metadata: dict[str, str],
        timeout: timedelta | None,
        resource: dict[str, str],
        network_policy: NetworkPolicy | None,
        extensions: dict[str, str],
        volumes: list[Volume] | None,
    ) -> SandboxCreateResponse:
        """
        Create a new sandbox with the specified configuration (blocking).

        Args:
            spec: Image specification for the sandbox.
            entrypoint: Command to run as entrypoint.
            env: Environment variables.
            metadata: Custom metadata.
            timeout: Sandbox lifetime / expiration duration. Pass None to require explicit cleanup.
            resource: Resource limits.
            network_policy: Optional outbound network policy (egress).
            extensions: Opaque extension parameters passed through to the server as-is.
                Prefer namespaced keys (e.g. ``storage.id``).
            volumes: Optional list of volumes to mount in the sandbox.

        Returns:
            Sandbox create response.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def get_sandbox_info(self, sandbox_id: str) -> SandboxInfo:
        """
        Retrieve information about an existing sandbox.

        Args:
            sandbox_id: Unique identifier of the sandbox.

        Returns:
            Current sandbox information.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def list_sandboxes(self, filter: SandboxFilter) -> PagedSandboxInfos:
        """
        List sandboxes with optional filtering.

        Args:
            filter: Filter criteria.

        Returns:
            Paged list of sandbox information matching the filter.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def get_sandbox_endpoint(
        self, sandbox_id: str, port: int, use_server_proxy: bool = False
    ) -> SandboxEndpoint:
        """
        Get sandbox endpoint for an exposed port.

        Args:
            sandbox_id: Sandbox id.
            port: Endpoint port number.
            use_server_proxy: Whether to use server proxy for endpoint.

        Returns:
            Target sandbox endpoint.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def pause_sandbox(self, sandbox_id: str) -> None:
        """
        Pause a running sandbox, preserving its state.

        Args:
            sandbox_id: Unique identifier of the sandbox.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def resume_sandbox(self, sandbox_id: str) -> None:
        """
        Resume a paused sandbox.

        Args:
            sandbox_id: Unique identifier of the sandbox.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def renew_sandbox_expiration(
        self, sandbox_id: str, new_expiration_time: datetime
    ) -> SandboxRenewResponse:
        """
        Renew the expiration time of a sandbox.

        Args:
            sandbox_id: Unique identifier of the sandbox.
            new_expiration_time: New expiration timestamp (timezone-aware recommended).

        Returns:
            Renew response including the new expiration time.

        Raises:
            SandboxException: If the operation fails.
        """
        ...

    def kill_sandbox(self, sandbox_id: str) -> None:
        """
        Terminate a sandbox and release all associated resources.

        Args:
            sandbox_id: Unique identifier of the sandbox.

        Raises:
            SandboxException: If the operation fails.
        """
        ...


================================================
FILE: sdks/sandbox/python/tests/test_adapters_eager_init.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pytest

from opensandbox.adapters.command_adapter import CommandsAdapter
from opensandbox.adapters.filesystem_adapter import FilesystemAdapter
from opensandbox.adapters.health_adapter import HealthAdapter
from opensandbox.adapters.metrics_adapter import MetricsAdapter
from opensandbox.adapters.sandboxes_adapter import SandboxesAdapter
from opensandbox.config import ConnectionConfig
from opensandbox.models.sandboxes import SandboxEndpoint


def test_sandbox_service_adapter_eager_init() -> None:
    cfg = ConnectionConfig(domain="localhost:8080", api_key="x")
    adapter = SandboxesAdapter(cfg)
    assert adapter is not None


@pytest.mark.asyncio
async def test_execd_service_adapters_eager_init_and_urls() -> None:
    cfg = ConnectionConfig(protocol="http")
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)

    cmd = CommandsAdapter(cfg, endpoint)
    fs = FilesystemAdapter(cfg, endpoint)
    health = HealthAdapter(cfg, endpoint)
    metrics = MetricsAdapter(cfg, endpoint)

    assert cmd._get_execd_url("/ping").endswith("/ping")
    assert fs._get_execd_url("/files/download").endswith("/files/download")

    # Ensure openapi clients are available without lazy init
    assert await cmd._get_client() is not None
    assert await fs._get_client() is not None
    assert await health._get_client() is not None
    assert await metrics._get_client() is not None


================================================
FILE: sdks/sandbox/python/tests/test_command_service_adapter_streaming.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations

import json

import httpx
import pytest

from opensandbox.adapters.command_adapter import CommandsAdapter
from opensandbox.config import ConnectionConfig
from opensandbox.exceptions import InvalidArgumentException, SandboxApiException
from opensandbox.models.sandboxes import SandboxEndpoint


class _SseTransport(httpx.AsyncBaseTransport):
    def __init__(self) -> None:
        self.last_request: httpx.Request | None = None

    async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
        self.last_request = request
        body = request.content.decode("utf-8") if isinstance(request.content, (bytes, bytearray)) else ""
        payload = json.loads(body) if body else {}

        if request.url.path == "/command" and payload.get("command") == "echo hi":
            sse = (
                b'data: {"type":"init","text":"exec-1","timestamp":1}\n\n'
                b'\n'
                b'data: {"type":"stdout","text":"hi","timestamp":2}\n\n'
                b"not-json\n\n"
                b'data: {"type":"result","results":{"text":"ok"},"timestamp":3}\n\n'
                b'data: {"type":"execution_complete","timestamp":4,"execution_time":5}\n\n'
            )
            return httpx.Response(
                200,
                headers={"Content-Type": "text/event-stream"},
                content=sse,
                request=request,
            )

        return httpx.Response(500, content=b"boom", request=request)


@pytest.mark.asyncio
async def test_run_command_streaming_happy_path_updates_execution() -> None:
    transport = _SseTransport()
    cfg = ConnectionConfig(protocol="http", transport=transport)
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)
    adapter = CommandsAdapter(cfg, endpoint)

    execution = await adapter.run("echo hi")
    assert execution.id == "exec-1"
    assert execution.logs.stdout[0].text == "hi"
    assert execution.result[0].text == "ok"

    assert transport.last_request is not None
    assert transport.last_request.headers.get("accept") == "text/event-stream"


@pytest.mark.asyncio
async def test_run_command_rejects_blank_command() -> None:
    cfg = ConnectionConfig(protocol="http")
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)
    adapter = CommandsAdapter(cfg, endpoint)

    with pytest.raises(InvalidArgumentException):
        await adapter.run("   ")


@pytest.mark.asyncio
async def test_run_command_non_200_raises_api_exception() -> None:
    transport = _SseTransport()
    cfg = ConnectionConfig(protocol="http", transport=transport)
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)
    adapter = CommandsAdapter(cfg, endpoint)

    with pytest.raises(SandboxApiException):
        await adapter.run("other")


================================================
FILE: sdks/sandbox/python/tests/test_command_service_sse_client_config.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from opensandbox.adapters.command_adapter import CommandsAdapter
from opensandbox.config import ConnectionConfig
from opensandbox.models.sandboxes import SandboxEndpoint


def test_sse_client_has_event_stream_headers_and_no_read_timeout() -> None:
    cfg = ConnectionConfig(protocol="http")
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)
    adapter = CommandsAdapter(cfg, endpoint)

    sse_client = adapter._sse_client
    assert sse_client is not None
    assert sse_client.headers.get("Accept") == "text/event-stream"
    assert sse_client.timeout.read is None


================================================
FILE: sdks/sandbox/python/tests/test_connection_config.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import httpx
import pytest

from opensandbox.config import ConnectionConfig


def test_protocol_validation() -> None:
    ConnectionConfig(protocol="http")
    ConnectionConfig(protocol="https")

    with pytest.raises(ValueError):
        ConnectionConfig(protocol="ftp")  # type: ignore[arg-type]


def test_get_base_url_with_domain_and_protocol() -> None:
    cfg = ConnectionConfig(domain="example.com:1234", protocol="https")
    assert cfg.get_base_url() == "https://example.com:1234/v1"


def test_get_base_url_domain_can_include_scheme() -> None:
    cfg = ConnectionConfig(domain="https://example.com:9999", protocol="http")
    assert cfg.get_base_url() == "https://example.com:9999/v1"


@pytest.mark.asyncio
async def test_close_transport_if_owned_default_transport() -> None:
    cfg = ConnectionConfig().with_transport_if_missing()
    # default transport should be closable and owned
    await cfg.close_transport_if_owned()


@pytest.mark.asyncio
async def test_close_transport_if_owned_does_not_close_user_transport() -> None:
    class CustomTransport(httpx.AsyncBaseTransport):
        def __init__(self) -> None:
            self.closed = False

        async def handle_async_request(self, request: httpx.Request) -> httpx.Response:  # pragma: no cover
            raise RuntimeError("not used")

        async def aclose(self) -> None:
            self.closed = True

    t = CustomTransport()
    cfg = ConnectionConfig(transport=t)
    await cfg.close_transport_if_owned()
    assert t.closed is False


================================================
FILE: sdks/sandbox/python/tests/test_connection_config_env_and_timeout.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from datetime import timedelta

import pytest

from opensandbox.config import ConnectionConfig


def test_get_api_key_from_env(monkeypatch: pytest.MonkeyPatch) -> None:
    monkeypatch.setenv("OPEN_SANDBOX_API_KEY", "k1")
    cfg = ConnectionConfig(api_key=None)
    assert cfg.get_api_key() == "k1"


def test_get_domain_from_env_and_default(monkeypatch: pytest.MonkeyPatch) -> None:
    monkeypatch.delenv("OPEN_SANDBOX_DOMAIN", raising=False)
    cfg = ConnectionConfig(domain=None)
    assert cfg.get_domain() == "localhost:8080"

    monkeypatch.setenv("OPEN_SANDBOX_DOMAIN", "example.com:8081")
    cfg2 = ConnectionConfig(domain=None)
    assert cfg2.get_domain() == "example.com:8081"


def test_timeout_must_be_positive() -> None:
    ConnectionConfig(request_timeout=timedelta(seconds=1))
    with pytest.raises(ValueError):
        ConnectionConfig(request_timeout=timedelta(seconds=0))


================================================
FILE: sdks/sandbox/python/tests/test_converters_and_error_handling.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations

from datetime import datetime, timedelta

import pytest
from httpx import HTTPStatusError, Request, Response

from opensandbox.adapters.converter.exception_converter import (
    ExceptionConverter,
    parse_sandbox_error,
)
from opensandbox.adapters.converter.execution_converter import (
    ExecutionConverter,
)
from opensandbox.adapters.converter.filesystem_model_converter import (
    FilesystemModelConverter,
)
from opensandbox.adapters.converter.metrics_model_converter import (
    MetricsModelConverter,
)
from opensandbox.adapters.converter.response_handler import (
    handle_api_error,
    require_parsed,
)
from opensandbox.adapters.converter.sandbox_model_converter import (
    SandboxModelConverter,
)
from opensandbox.api.lifecycle.errors import UnexpectedStatus
from opensandbox.exceptions import (
    InvalidArgumentException,
    SandboxApiException,
    SandboxInternalException,
)
from opensandbox.models.execd import RunCommandOpts
from opensandbox.models.sandboxes import NetworkPolicy, NetworkRule, SandboxImageSpec


def test_parse_sandbox_error_from_json_bytes() -> None:
    err = parse_sandbox_error(b'{"code":"X","message":"m"}')
    assert err is not None
    assert err.code == "X"
    assert err.message == "m"


def test_parse_sandbox_error_from_plain_text_string() -> None:
    err = parse_sandbox_error("not-json")
    assert err is not None
    assert err.code == "UNEXPECTED_RESPONSE"
    assert err.message == "not-json"


def test_parse_sandbox_error_from_invalid_utf8_bytes_fallback_message() -> None:
    err = parse_sandbox_error(b"\xff\xfe")
    assert err is not None
    assert err.code == "UNEXPECTED_RESPONSE"
    assert err.message is not None
    assert "\ufffd" in err.message


def test_handle_api_error_raises_with_parsed_message() -> None:
    class Parsed:
        message = "bad request"

    class Resp:
        status_code = 400
        parsed = Parsed()
        headers = {"X-Request-ID": "req-123"}

    with pytest.raises(SandboxApiException) as ei:
        handle_api_error(Resp(), "Op")
    assert "bad request" in str(ei.value)
    assert ei.value.request_id == "req-123"


def test_handle_api_error_noop_on_success() -> None:
    class Resp:
        status_code = 200
        parsed = None

    handle_api_error(Resp(), "Op")


def test_require_parsed_includes_request_id_on_invalid_payload() -> None:
    class Resp:
        status_code = 200
        parsed = None
        headers = {"x-request-id": "req-456"}

    with pytest.raises(SandboxApiException) as ei:
        require_parsed(Resp(), expected_type=str, operation_name="Op")
    assert ei.value.request_id == "req-456"


def test_exception_converter_maps_common_types() -> None:
    se = ExceptionConverter.to_sandbox_exception(ValueError("x"))
    assert isinstance(se, InvalidArgumentException)

    se2 = ExceptionConverter.to_sandbox_exception(OSError("x"))
    assert isinstance(se2, SandboxInternalException)


def test_exception_converter_maps_generated_unexpected_status_to_api_exception() -> (
    None
):
    err = UnexpectedStatus(400, b'{"code":"X","message":"bad"}')

    converted = ExceptionConverter.to_sandbox_exception(err)

    assert isinstance(converted, SandboxApiException)
    assert converted.status_code == 400
    assert converted.error is not None
    assert converted.error.code == "X"


def test_exception_converter_maps_httpx_status_error_to_api_exception() -> None:
    request = Request("GET", "https://example.test")
    response = Response(
        502, request=request, content=b'{"code":"UPSTREAM","message":"gateway"}'
    )
    err = HTTPStatusError("bad gateway", request=request, response=response)

    converted = ExceptionConverter.to_sandbox_exception(err)

    assert isinstance(converted, SandboxApiException)
    assert converted.status_code == 502
    assert converted.error is not None
    assert converted.error.code == "UPSTREAM"


def test_execution_converter_to_api_run_command_request() -> None:
    from opensandbox.api.execd.types import UNSET

    api = ExecutionConverter.to_api_run_command_request("echo hi", RunCommandOpts())
    d = api.to_dict()
    assert d["command"] == "echo hi"
    assert "cwd" not in d

    api2 = ExecutionConverter.to_api_run_command_request(
        "echo hi",
        RunCommandOpts(working_directory="/tmp"),
    )
    d2 = api2.to_dict()
    assert d2["cwd"] == "/tmp"
    # background defaults to False in domain opts; when False we omit it from the API request.
    assert d2.get("background", UNSET) is UNSET

    from datetime import timedelta

    api3 = ExecutionConverter.to_api_run_command_request(
        "sleep 10",
        RunCommandOpts(timeout=timedelta(seconds=60)),
    )
    d3 = api3.to_dict()
    assert d3["command"] == "sleep 10"
    assert d3["timeout"] == 60_000
    # timeout omitted when not set (backward compat)
    assert (
        "timeout"
        not in ExecutionConverter.to_api_run_command_request(
            "x", RunCommandOpts()
        ).to_dict()
    )

    api4 = ExecutionConverter.to_api_run_command_request(
        "id",
        RunCommandOpts(
            uid=1000,
            gid=1000,
            envs={"APP_ENV": "test", "LOG_LEVEL": "debug"},
        ),
    )
    d4 = api4.to_dict()
    assert d4["uid"] == 1000
    assert d4["gid"] == 1000
    assert d4["envs"] == {"APP_ENV": "test", "LOG_LEVEL": "debug"}
    assert "cwd" not in d4


def test_run_command_opts_validates_gid_requires_uid() -> None:
    with pytest.raises(ValueError, match="uid is required when gid is provided"):
        RunCommandOpts(gid=1000)


def test_filesystem_and_metrics_converters() -> None:
    from datetime import datetime, timezone

    from opensandbox.api.execd.models import FileInfo, Metrics

    fi = FileInfo(
        path="/a",
        mode=644,
        owner="u",
        group="g",
        size=1,
        modified_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
        created_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
    )
    entry = FilesystemModelConverter.to_entry_info(fi)
    assert entry.path == "/a"

    api_metrics = Metrics(
        cpu_count=1.0,
        cpu_used_pct=2.0,
        mem_total_mib=3.0,
        mem_used_mib=4.0,
        timestamp=5,
    )
    m = MetricsModelConverter.to_sandbox_metrics(api_metrics)
    assert m.cpu_used_percentage == 2.0


def test_sandbox_model_converter_to_api_create_request_and_renew_tz() -> None:
    from datetime import timezone

    spec = SandboxImageSpec("python:3.11")
    req = SandboxModelConverter.to_api_create_sandbox_request(
        spec=spec,
        entrypoint=["/bin/sh"],
        env={},
        metadata={},
        timeout=timedelta(seconds=3),
        resource={"cpu": "100m"},
        network_policy=NetworkPolicy(
            defaultAction="deny",
            egress=[NetworkRule(action="allow", target="pypi.org")],
        ),
        extensions={},
        volumes=None,
    )
    d = req.to_dict()
    assert d["image"]["uri"] == "python:3.11"
    assert d["timeout"] == 3
    assert "env" not in d
    assert "metadata" not in d
    assert d["networkPolicy"]["defaultAction"] == "deny"
    assert d["networkPolicy"]["egress"] == [{"action": "allow", "target": "pypi.org"}]

    renew = SandboxModelConverter.to_api_renew_request(datetime(2025, 1, 1))
    assert renew.expires_at.tzinfo is timezone.utc


def test_sandbox_model_converter_omits_timeout_for_manual_cleanup() -> None:
    req = SandboxModelConverter.to_api_create_sandbox_request(
        spec=SandboxImageSpec("python:3.11"),
        entrypoint=["/bin/sh"],
        env={},
        metadata={},
        timeout=None,
        resource={"cpu": "100m"},
        network_policy=None,
        extensions={},
        volumes=None,
    )

    dumped = req.to_dict()
    assert "timeout" not in dumped


================================================
FILE: sdks/sandbox/python/tests/test_filesystem_search_error_handling.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from types import SimpleNamespace

import pytest

from opensandbox.adapters.filesystem_adapter import FilesystemAdapter
from opensandbox.config import ConnectionConfig
from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.exceptions import SandboxApiException
from opensandbox.models.filesystem import SearchEntry
from opensandbox.models.sandboxes import SandboxEndpoint
from opensandbox.sync.adapters.filesystem_adapter import FilesystemAdapterSync


@pytest.mark.asyncio
async def test_async_search_unexpected_response_without_headers_still_raises_api_exception(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    async def _fake_asyncio_detailed(**_: object) -> SimpleNamespace:
        return SimpleNamespace(status_code=200, parsed=object())

    from opensandbox.api.execd.api.filesystem import search_files

    monkeypatch.setattr(search_files, "asyncio_detailed", _fake_asyncio_detailed)

    cfg = ConnectionConfig(protocol="http")
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)
    adapter = FilesystemAdapter(cfg, endpoint)
    async def _fake_get_client() -> object:
        return object()

    monkeypatch.setattr(adapter, "_get_client", _fake_get_client)

    with pytest.raises(SandboxApiException) as ei:
        await adapter.search(SearchEntry(path="/tmp", pattern="*.log"))

    assert "unexpected response type" in str(ei.value)
    assert ei.value.request_id is None


def test_sync_search_unexpected_response_without_headers_still_raises_api_exception(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    def _fake_sync_detailed(**_: object) -> SimpleNamespace:
        return SimpleNamespace(status_code=200, parsed=object())

    from opensandbox.api.execd.api.filesystem import search_files

    monkeypatch.setattr(search_files, "sync_detailed", _fake_sync_detailed)

    cfg = ConnectionConfigSync(protocol="http")
    endpoint = SandboxEndpoint(endpoint="localhost:44772", port=44772)
    adapter = FilesystemAdapterSync(cfg, endpoint)

    with pytest.raises(SandboxApiException) as ei:
        adapter.search(SearchEntry(path="/tmp", pattern="*.log"))

    assert "unexpected response type" in str(ei.value)
    assert ei.value.request_id is None


================================================
FILE: sdks/sandbox/python/tests/test_models_stability.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations

from datetime import datetime, timezone

import pytest

from opensandbox.api.lifecycle.models.create_sandbox_response import (
    CreateSandboxResponse as ApiCreateSandboxResponse,
)
from opensandbox.api.lifecycle.models.image_spec import ImageSpec as ApiImageSpec
from opensandbox.api.lifecycle.models.sandbox import Sandbox as ApiSandbox
from opensandbox.api.lifecycle.types import UNSET
from opensandbox.models.execd import (
    Execution,
    ExecutionError,
    ExecutionLogs,
    ExecutionResult,
    OutputMessage,
)
from opensandbox.models.filesystem import MoveEntry, WriteEntry
from opensandbox.models.sandboxes import (
    OSSFS,
    PVC,
    Host,
    SandboxFilter,
    SandboxImageAuth,
    SandboxImageSpec,
    SandboxInfo,
    SandboxStatus,
    Volume,
)


def test_sandbox_image_spec_supports_positional_image() -> None:
    spec = SandboxImageSpec("python:3.11")
    assert spec.image == "python:3.11"


def test_sandbox_image_spec_rejects_blank_image() -> None:
    with pytest.raises(ValueError):
        SandboxImageSpec("   ")


def test_api_image_spec_tolerates_null_auth() -> None:
    spec = ApiImageSpec.from_dict({"uri": "python:3.11", "auth": None})
    assert spec.uri == "python:3.11"
    assert spec.auth is UNSET


def test_api_create_sandbox_response_tolerates_null_metadata() -> None:
    response = ApiCreateSandboxResponse.from_dict(
        {
            "id": "sandbox-1",
            "status": {"state": "Running", "lastTransitionAt": None},
            "createdAt": "2025-01-01T00:00:00Z",
            "entrypoint": ["/bin/sh"],
            "metadata": None,
            "expiresAt": None,
        }
    )
    assert response.metadata is UNSET
    assert response.expires_at is None
    assert response.status.last_transition_at is UNSET


def test_api_sandbox_tolerates_null_metadata() -> None:
    sandbox = ApiSandbox.from_dict(
        {
            "id": "sandbox-1",
            "image": {"uri": "python:3.11", "auth": None},
            "status": {"state": "Running", "lastTransitionAt": None},
            "entrypoint": ["/bin/sh"],
            "createdAt": "2025-01-01T00:00:00Z",
            "metadata": None,
            "expiresAt": None,
        }
    )
    assert sandbox.metadata is UNSET
    assert sandbox.expires_at is None
    assert sandbox.status.last_transition_at is UNSET


def test_sandbox_image_auth_rejects_blank_username_and_password() -> None:
    with pytest.raises(ValueError):
        SandboxImageAuth(username=" ", password="x")
    with pytest.raises(ValueError):
        SandboxImageAuth(username="u", password=" ")


def test_sandbox_filter_validations() -> None:
    SandboxFilter(page=0, page_size=1)
    with pytest.raises(ValueError):
        SandboxFilter(page=-1)
    with pytest.raises(ValueError):
        SandboxFilter(page_size=0)


def test_sandbox_status_and_info_alias_dump_is_stable() -> None:
    status = SandboxStatus(
        state="RUNNING", last_transition_at=datetime(2025, 1, 1, tzinfo=timezone.utc)
    )
    info = SandboxInfo(
        id=str(__import__("uuid").uuid4()),
        status=status,
        entrypoint=["/bin/sh"],
        expires_at=datetime(2025, 1, 2, tzinfo=timezone.utc),
        created_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
        image=SandboxImageSpec("python:3.11"),
        metadata={"k": "v"},
    )

    dumped = info.model_dump(by_alias=True, mode="json")
    assert "expires_at" in dumped
    assert "created_at" in dumped
    assert dumped["status"]["last_transition_at"].endswith(("Z", "+00:00"))


def test_sandbox_info_supports_manual_cleanup_expiration() -> None:
    info = SandboxInfo(
        id=str(__import__("uuid").uuid4()),
        status=SandboxStatus(state="RUNNING"),
        entrypoint=["/bin/sh"],
        expires_at=None,
        created_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
        image=SandboxImageSpec("python:3.11"),
    )

    dumped = info.model_dump(by_alias=True, mode="json")
    assert dumped["expires_at"] is None


def test_filesystem_models_aliases_and_validation() -> None:
    m = MoveEntry(source="/a", destination="/b")
    assert m.src == "/a"
    assert m.dest == "/b"

    with pytest.raises(ValueError):
        WriteEntry(path="  ", data="x")


# ============================================================================
# Volume Model Tests
# ============================================================================


def test_host_backend_requires_absolute_path() -> None:
    backend = Host(path="/data/shared")
    assert backend.path == "/data/shared"

    with pytest.raises(ValueError, match="absolute path"):
        Host(path="relative/path")


def test_pvc_backend_rejects_blank_claim_name() -> None:
    backend = PVC(claimName="my-pvc")
    assert backend.claim_name == "my-pvc"

    with pytest.raises(ValueError, match="blank"):
        PVC(claimName="   ")


def test_ossfs_backend_default_version_is_2_0() -> None:
    backend = OSSFS(
        bucket="bucket-test-3",
        endpoint="oss-cn-hangzhou.aliyuncs.com",
        accessKeyId="ak",
        accessKeySecret="sk",
    )
    assert backend.version == "2.0"


def test_volume_with_host_backend() -> None:
    vol = Volume(
        name="data",
        host=Host(path="/data/shared"),
        mountPath="/mnt/data",
    )
    assert vol.name == "data"
    assert vol.host is not None
    assert vol.host.path == "/data/shared"
    assert vol.pvc is None
    assert vol.mount_path == "/mnt/data"
    assert vol.read_only is False  # default is read-write
    assert vol.sub_path is None


def test_volume_with_pvc_backend() -> None:
    vol = Volume(
        name="models",
        pvc=PVC(claimName="shared-models"),
        mountPath="/mnt/models",
        readOnly=True,
        subPath="v1",
    )
    assert vol.name == "models"
    assert vol.host is None
    assert vol.pvc is not None
    assert vol.pvc.claim_name == "shared-models"
    assert vol.mount_path == "/mnt/models"
    assert vol.read_only is True
    assert vol.sub_path == "v1"


def test_volume_rejects_blank_name() -> None:
    with pytest.raises(ValueError, match="blank"):
        Volume(
            name="   ",
            host=Host(path="/data"),
            mountPath="/mnt",
        )


def test_volume_requires_absolute_mount_path() -> None:
    with pytest.raises(ValueError, match="absolute path"):
        Volume(
            name="test",
            host=Host(path="/data"),
            mountPath="relative/path",
        )


def test_volume_serialization_uses_aliases() -> None:
    vol = Volume(
        name="test",
        pvc=PVC(claimName="my-pvc"),
        mountPath="/mnt/test",
        readOnly=True,
        subPath="sub",
    )
    dumped = vol.model_dump(by_alias=True, mode="json")
    assert "mountPath" in dumped
    assert "readOnly" in dumped
    assert "subPath" in dumped
    assert dumped["pvc"]["claimName"] == "my-pvc"
    assert dumped["readOnly"] is True


def test_volume_rejects_no_backend() -> None:
    """Volume must have exactly one backend specified."""
    with pytest.raises(ValueError, match="none was provided"):
        Volume(
            name="test",
            mountPath="/mnt/test",
        )


def test_volume_rejects_multiple_backends() -> None:
    """Volume must have exactly one backend, not multiple."""
    with pytest.raises(ValueError, match="multiple were provided"):
        Volume(
            name="test",
            host=Host(path="/data"),
            pvc=PVC(claimName="my-pvc"),
            mountPath="/mnt/test",
        )


# ============================================================================
# Execution __str__ and .text Tests
# ============================================================================


def _make_output(text: str, *, is_error: bool = False) -> OutputMessage:
    return OutputMessage(text=text, timestamp=0, is_error=is_error)


def _make_result(text: str) -> ExecutionResult:
    return ExecutionResult(text=text, timestamp=0)


def test_execution_str_stdout_only() -> None:
    ex = Execution(
        logs=ExecutionLogs(
            stdout=[_make_output("hello"), _make_output("world")],
        ),
    )
    assert str(ex) == "hello\nworld"


def test_execution_str_with_stderr() -> None:
    ex = Execution(
        logs=ExecutionLogs(
            stdout=[_make_output("ok")],
            stderr=[_make_output("warn", is_error=True)],
        ),
    )
    assert str(ex) == "ok\n[stderr]\nwarn"


def test_execution_str_with_error() -> None:
    ex = Execution(
        error=ExecutionError(name="RuntimeError", value="boom", timestamp=0),
    )
    assert str(ex) == "[error] RuntimeError: boom"


def test_execution_str_empty() -> None:
    ex = Execution()
    assert str(ex) == ""


def test_execution_text_property() -> None:
    ex = Execution(
        logs=ExecutionLogs(
            stdout=[_make_output("line1"), _make_output("line2")],
            stderr=[_make_output("ignored", is_error=True)],
        ),
    )
    assert ex.text == "line1\nline2"


def test_execution_text_includes_results() -> None:
    """code-interpreter stores return values in result, not stdout."""
    ex = Execution(
        result=[_make_result("4")],
    )
    assert ex.text == "4"
    assert str(ex) == "4"


def test_execution_text_combines_stdout_and_results() -> None:
    ex = Execution(
        logs=ExecutionLogs(
            stdout=[_make_output("3.11.14")],
        ),
        result=[_make_result("4")],
    )
    assert ex.text == "3.11.14\n4"


def test_execution_text_strips_trailing_newlines() -> None:
    """code-interpreter streaming sends chunks with trailing newlines."""
    ex = Execution(
        logs=ExecutionLogs(
            stdout=[_make_output("1\n"), _make_output("2\n")],
        ),
    )
    assert ex.text == "1\n2"
    assert str(ex) == "1\n2"


================================================
FILE: sdks/sandbox/python/tests/test_sandbox_business_logic.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations

from datetime import datetime, timedelta, timezone
from uuid import uuid4

import pytest

from opensandbox.config import ConnectionConfig
from opensandbox.constants import DEFAULT_EGRESS_PORT, DEFAULT_EXECD_PORT
from opensandbox.exceptions import SandboxReadyTimeoutException
from opensandbox.models.sandboxes import NetworkPolicy, NetworkRule, SandboxEndpoint
from opensandbox.sandbox import Sandbox


class _SandboxServiceStub:
    def __init__(self) -> None:
        self.renew_calls: list[tuple[object, datetime]] = []
        self.endpoint_calls: list[tuple[object, int, bool]] = []

    async def renew_sandbox_expiration(self, sandbox_id, expires_at: datetime) -> None:
        self.renew_calls.append((sandbox_id, expires_at))

    async def get_sandbox_endpoint(self, sandbox_id, port: int, use_server_proxy: bool = False) -> SandboxEndpoint:
        self.endpoint_calls.append((sandbox_id, port, use_server_proxy))
        return SandboxEndpoint(endpoint=f"sbx.internal:{port}", headers={"X-Egress": "1"})


class _HealthServiceStub:
    def __init__(self, *, should_raise: bool = False) -> None:
        self.should_raise = should_raise
        self.ping_calls: list[object] = []

    async def ping(self, sandbox_id) -> bool:
        self.ping_calls.append(sandbox_id)
        if self.should_raise:
            raise RuntimeError("boom")
        return True


class _Noop:
    pass


class _EgressServiceStub:
    def __init__(self) -> None:
        self.patch_calls: list[list[NetworkRule]] = []

    async def get_policy(self) -> NetworkPolicy:
        return NetworkPolicy(
            defaultAction="deny",
            egress=[NetworkRule(action="allow", target="pypi.org")],
        )

    async def patch_rules(self, rules: list[NetworkRule]) -> None:
        self.patch_calls.append(rules)


def _make_sandbox(
    *,
    health_service,
    sandbox_service,
    custom_health_check=None,
    connection_config: ConnectionConfig | None = None,
) -> Sandbox:
    return Sandbox(
        sandbox_id=str(uuid4()),
        sandbox_service=sandbox_service,
        filesystem_service=_Noop(),
        command_service=_Noop(),
        health_service=health_service,
        metrics_service=_Noop(),
        egress_service=_EgressServiceStub(),
        connection_config=connection_config or ConnectionConfig(),
        custom_health_check=custom_health_check,
    )


@pytest.mark.asyncio
async def test_is_healthy_uses_ping_and_swallows_ping_errors() -> None:
    sbx = _make_sandbox(
        health_service=_HealthServiceStub(should_raise=True),
        sandbox_service=_SandboxServiceStub(),
    )
    assert await sbx.is_healthy() is False


@pytest.mark.asyncio
async def test_check_ready_succeeds_after_retries_without_real_sleep(monkeypatch: pytest.MonkeyPatch) -> None:
    # Avoid actual sleeping even if polling_interval > 0.
    async def _no_sleep(_: float) -> None:
        return None

    monkeypatch.setattr("opensandbox.sandbox.asyncio.sleep", _no_sleep)

    calls = {"n": 0}

    async def _custom_health(_: Sandbox) -> bool:
        calls["n"] += 1
        return calls["n"] >= 3

    sbx = _make_sandbox(
        health_service=_HealthServiceStub(),
        sandbox_service=_SandboxServiceStub(),
        custom_health_check=_custom_health,
    )

    await sbx.check_ready(timeout=timedelta(seconds=1), polling_interval=timedelta(seconds=0.01))
    assert calls["n"] == 3


@pytest.mark.asyncio
async def test_check_ready_timeout_raises() -> None:
    async def _always_false(_: Sandbox) -> bool:
        return False

    sbx = _make_sandbox(
        health_service=_HealthServiceStub(),
        sandbox_service=_SandboxServiceStub(),
        custom_health_check=_always_false,
    )

    with pytest.raises(SandboxReadyTimeoutException):
        await sbx.check_ready(timeout=timedelta(seconds=0.01), polling_interval=timedelta(seconds=0))


@pytest.mark.asyncio
async def test_check_ready_timeout_message_includes_troubleshooting_hints() -> None:
    async def _always_false(_: Sandbox) -> bool:
        return False

    sbx = _make_sandbox(
        health_service=_HealthServiceStub(),
        sandbox_service=_SandboxServiceStub(),
        custom_health_check=_always_false,
        connection_config=ConnectionConfig(domain="10.0.0.1:8080", use_server_proxy=False),
    )

    with pytest.raises(SandboxReadyTimeoutException) as exc_info:
        await sbx.check_ready(timeout=timedelta(seconds=0.01), polling_interval=timedelta(seconds=0))

    message = str(exc_info.value)
    assert "ConnectionConfig(domain=10.0.0.1:8080, use_server_proxy=False)" in message
    assert "ConnectionConfig(use_server_proxy=True)" in message


@pytest.mark.asyncio
async def test_renew_passes_timezone_aware_utc_datetime() -> None:
    svc = _SandboxServiceStub()
    sbx = _make_sandbox(
        health_service=_HealthServiceStub(),
        sandbox_service=svc,
    )

    before = datetime.now(timezone.utc)
    await sbx.renew(timedelta(seconds=10))
    after = datetime.now(timezone.utc)

    assert len(svc.renew_calls) == 1
    _, expires_at = svc.renew_calls[0]
    assert expires_at.tzinfo is timezone.utc
    assert before <= expires_at <= after + timedelta(seconds=12)


@pytest.mark.asyncio
async def test_get_egress_policy_uses_injected_egress_service() -> None:
    sbx = _make_sandbox(
        health_service=_HealthServiceStub(),
        sandbox_service=_SandboxServiceStub(),
        connection_config=ConnectionConfig(use_server_proxy=True),
    )

    policy = await sbx.get_egress_policy()

    assert policy.default_action == "deny"
    assert policy.egress is not None
    assert policy.egress[0].target == "pypi.org"


@pytest.mark.asyncio
async def test_patch_egress_rules_uses_injected_egress_service(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    svc = _SandboxServiceStub()
    egress_service = _EgressServiceStub()

    sbx = Sandbox(
        sandbox_id=str(uuid4()),
        sandbox_service=svc,
        filesystem_service=_Noop(),
        command_service=_Noop(),
        health_service=_HealthServiceStub(),
        metrics_service=_Noop(),
        egress_service=egress_service,
        connection_config=ConnectionConfig(use_server_proxy=False),
    )
    rules = [NetworkRule(action="allow", target="www.github.com")]

    await sbx.patch_egress_rules(rules)

    assert svc.endpoint_calls == []
    assert egress_service.patch_calls == [rules]


@pytest.mark.asyncio
async def test_create_resolves_egress_endpoint_and_builds_service(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    egress_service = _EgressServiceStub()
    factory_calls: list[SandboxEndpoint] = []

    class _CreateResponse:
        id = "sbx-created"

    class _SandboxServiceCreateStub:
        def __init__(self) -> None:
            self.endpoint_calls: list[tuple[str, int, bool]] = []

        async def create_sandbox(self, *_args, **_kwargs):
            return _CreateResponse()

        async def get_sandbox_endpoint(self, sandbox_id, port: int, use_server_proxy: bool = False) -> SandboxEndpoint:
            self.endpoint_calls.append((sandbox_id, port, use_server_proxy))
            return SandboxEndpoint(endpoint=f"sbx.internal:{port}", headers={"X-Port": str(port)})

        async def kill_sandbox(self, _sandbox_id: str) -> None:
            return None

    class _FactoryStub:
        def __init__(self, connection_config: ConnectionConfig) -> None:
            self.connection_config = connection_config

        def create_sandbox_service(self):
            return sandbox_service

        def create_filesystem_service(self, endpoint: SandboxEndpoint):
            return _Noop()

        def create_command_service(self, endpoint: SandboxEndpoint):
            return _Noop()

        def create_health_service(self, endpoint: SandboxEndpoint):
            return _Noop()

        def create_metrics_service(self, endpoint: SandboxEndpoint):
            return _Noop()

        def create_egress_service(self, endpoint: SandboxEndpoint) -> _EgressServiceStub:
            factory_calls.append(endpoint)
            return egress_service

    sandbox_service = _SandboxServiceCreateStub()
    monkeypatch.setattr("opensandbox.sandbox.AdapterFactory", _FactoryStub)

    async def _healthy(_sbx: Sandbox) -> bool:
        return True

    await Sandbox.create(
        "python:3.11",
        connection_config=ConnectionConfig(use_server_proxy=False),
        health_check=_healthy,
    )

    assert sandbox_service.endpoint_calls == [
        ("sbx-created", DEFAULT_EXECD_PORT, False),
        ("sbx-created", DEFAULT_EGRESS_PORT, False),
    ]
    assert len(factory_calls) == 1
    assert factory_calls == [
        SandboxEndpoint(
            endpoint=f"sbx.internal:{DEFAULT_EGRESS_PORT}",
            headers={"X-Port": str(DEFAULT_EGRESS_PORT)},
        )
    ]


================================================
FILE: sdks/sandbox/python/tests/test_sandbox_close_and_connect_validation.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import httpx
import pytest

from opensandbox.config import ConnectionConfig
from opensandbox.exceptions import InvalidArgumentException
from opensandbox.models.sandboxes import NetworkPolicy, NetworkRule
from opensandbox.sandbox import Sandbox


class _NoopService:
    pass


class _NoopEgressService:
    async def get_policy(self) -> NetworkPolicy:  # pragma: no cover
        return NetworkPolicy(
            defaultAction="deny",
            egress=[NetworkRule(action="allow", target="pypi.org")],
        )

    async def patch_rules(self, rules: list[NetworkRule]) -> None:  # pragma: no cover
        return None


@pytest.mark.asyncio
async def test_sandbox_close_does_not_close_user_transport() -> None:
    class CustomTransport(httpx.AsyncBaseTransport):
        def __init__(self) -> None:
            self.closed = False

        async def handle_async_request(self, request: httpx.Request) -> httpx.Response:  # pragma: no cover
            raise RuntimeError("not used")

        async def aclose(self) -> None:
            self.closed = True

    t = CustomTransport()
    cfg = ConnectionConfig(transport=t)

    sbx = Sandbox(
        sandbox_id=str(__import__("uuid").uuid4()),
        sandbox_service=_NoopService(),
        filesystem_service=_NoopService(),
        command_service=_NoopService(),
        health_service=_NoopService(),
        metrics_service=_NoopService(),
        egress_service=_NoopEgressService(),
        connection_config=cfg,
        custom_health_check=None,
    )

    await sbx.close()
    assert t.closed is False


@pytest.mark.asyncio
async def test_sandbox_connect_requires_id() -> None:
    with pytest.raises(InvalidArgumentException):
        await Sandbox.connect(sandbox_id="", connection_config=ConnectionConfig())


================================================
FILE: sdks/sandbox/python/tests/test_sandbox_manager_business_logic.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations

from datetime import datetime, timedelta, timezone
from uuid import uuid4

import httpx
import pytest

from opensandbox.config import ConnectionConfig
from opensandbox.manager import SandboxManager


class _SandboxServiceStub:
    def __init__(self) -> None:
        self.renew_calls: list[tuple[object, datetime]] = []
        self.pause_calls: list[object] = []

    async def list_sandboxes(self, _filter):  # pragma: no cover
        raise RuntimeError("not used")

    async def get_sandbox_info(self, _sandbox_id):  # pragma: no cover
        raise RuntimeError("not used")

    async def kill_sandbox(self, _sandbox_id):  # pragma: no cover
        raise RuntimeError("not used")

    async def renew_sandbox_expiration(self, sandbox_id, new_expiration_time: datetime) -> None:
        self.renew_calls.append((sandbox_id, new_expiration_time))

    async def pause_sandbox(self, sandbox_id) -> None:
        self.pause_calls.append(sandbox_id)

    async def resume_sandbox(self, _sandbox_id):  # pragma: no cover
        raise RuntimeError("not used")


@pytest.mark.asyncio
async def test_manager_renew_uses_utc_datetime() -> None:
    svc = _SandboxServiceStub()
    mgr = SandboxManager(svc, ConnectionConfig())

    sid = str(uuid4())
    await mgr.renew_sandbox(sid, timedelta(seconds=5))

    assert len(svc.renew_calls) == 1
    _, dt = svc.renew_calls[0]
    assert dt.tzinfo is timezone.utc


@pytest.mark.asyncio
async def test_manager_close_does_not_close_user_transport() -> None:
    class CustomTransport(httpx.AsyncBaseTransport):
        def __init__(self) -> None:
            self.closed = False

        async def handle_async_request(self, request: httpx.Request) -> httpx.Response:  # pragma: no cover
            raise RuntimeError("not used")

        async def aclose(self) -> None:
            self.closed = True

    t = CustomTransport()
    cfg = ConnectionConfig(transport=t)

    mgr = SandboxManager(_SandboxServiceStub(), cfg)
    await mgr.close()
    assert t.closed is False


================================================
FILE: sdks/sandbox/python/tests/test_sandbox_manager_sync_business_logic.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations

from datetime import datetime, timedelta, timezone
from uuid import uuid4

import httpx

from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.sync.manager import SandboxManagerSync


class _SandboxServiceStub:
    def __init__(self) -> None:
        self.renew_calls: list[tuple[object, datetime]] = []

    def list_sandboxes(self, _filter):  # pragma: no cover
        raise RuntimeError("not used")

    def get_sandbox_info(self, _sandbox_id):  # pragma: no cover
        raise RuntimeError("not used")

    def kill_sandbox(self, _sandbox_id):  # pragma: no cover
        raise RuntimeError("not used")

    def renew_sandbox_expiration(self, sandbox_id, new_expiration_time: datetime) -> None:
        self.renew_calls.append((sandbox_id, new_expiration_time))

    def pause_sandbox(self, _sandbox_id) -> None:  # pragma: no cover
        raise RuntimeError("not used")

    def resume_sandbox(self, _sandbox_id):  # pragma: no cover
        raise RuntimeError("not used")


def test_sync_manager_renew_uses_utc_datetime() -> None:
    svc = _SandboxServiceStub()
    mgr = SandboxManagerSync(svc, ConnectionConfigSync())

    sid = str(uuid4())
    mgr.renew_sandbox(sid, timedelta(seconds=5))

    assert len(svc.renew_calls) == 1
    _, dt = svc.renew_calls[0]
    assert dt.tzinfo is timezone.utc


def test_sync_manager_close_does_not_close_user_transport() -> None:
    class CustomTransport(httpx.BaseTransport):
        def __init__(self) -> None:
            self.closed = False

        def handle_request(self, request: httpx.Request) -> httpx.Response:  # pragma: no cover
            raise RuntimeError("not used")

        def close(self) -> None:
            self.closed = True

    t = CustomTransport()
    cfg = ConnectionConfigSync(transport=t)

    mgr = SandboxManagerSync(_SandboxServiceStub(), cfg)
    mgr.close()
    assert t.closed is False


================================================
FILE: sdks/sandbox/python/tests/test_sandbox_service_adapter_lifecycle.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations

from datetime import datetime, timedelta, timezone
from uuid import uuid4

import pytest

from opensandbox.adapters.sandboxes_adapter import SandboxesAdapter
from opensandbox.config import ConnectionConfig
from opensandbox.exceptions import SandboxApiException
from opensandbox.models.sandboxes import (
    NetworkPolicy,
    NetworkRule,
    SandboxFilter,
    SandboxImageSpec,
)


class _Resp:
    def __init__(self, *, status_code: int, parsed) -> None:
        self.status_code = status_code
        self.parsed = parsed


def _api_create_sandbox_response(sandbox_id: str):
    from opensandbox.api.lifecycle.models.create_sandbox_response import (
        CreateSandboxResponse,
    )
    from opensandbox.api.lifecycle.models.sandbox_status import SandboxStatus

    return CreateSandboxResponse(
        id=sandbox_id,
        status=SandboxStatus(state="Running"),
        expires_at=datetime(2025, 1, 2, tzinfo=timezone.utc),
        created_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
        entrypoint=["/bin/sh"],
    )


def _api_list_sandboxes_response():
    from opensandbox.api.lifecycle.models.image_spec import ImageSpec
    from opensandbox.api.lifecycle.models.list_sandboxes_response import (
        ListSandboxesResponse,
    )
    from opensandbox.api.lifecycle.models.pagination_info import PaginationInfo
    from opensandbox.api.lifecycle.models.sandbox import Sandbox
    from opensandbox.api.lifecycle.models.sandbox_status import SandboxStatus

    sbx = Sandbox(
        id=str(uuid4()),
        image=ImageSpec(uri="python:3.11"),
        status=SandboxStatus(state="Running"),
        entrypoint=["/bin/sh"],
        expires_at=datetime(2025, 1, 2, tzinfo=timezone.utc),
        created_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
    )
    return ListSandboxesResponse(
        items=[sbx],
        pagination=PaginationInfo(
            page=0,
            page_size=10,
            total_items=1,
            total_pages=1,
            has_next_page=False,
        ),
    )


@pytest.mark.asyncio
async def test_create_sandbox_success(monkeypatch: pytest.MonkeyPatch) -> None:
    called = {}

    async def _fake_asyncio_detailed(*, client, body):
        called["body"] = body
        return _Resp(status_code=200, parsed=_api_create_sandbox_response(str(uuid4())))

    monkeypatch.setattr(
        "opensandbox.api.lifecycle.api.sandboxes.post_sandboxes.asyncio_detailed",
        _fake_asyncio_detailed,
    )

    cfg = ConnectionConfig(domain="example.com:8080", api_key="k")
    adapter = SandboxesAdapter(cfg)

    out = await adapter.create_sandbox(
        spec=SandboxImageSpec("python:3.11"),
        entrypoint=["/bin/sh"],
        env={},
        metadata={},
        timeout=timedelta(seconds=3),
        resource={"cpu": "100m"},
        network_policy=NetworkPolicy(
            defaultAction="deny",
            egress=[NetworkRule(action="allow", target="pypi.org")],
        ),
        extensions={"storage.id": "abc123", "debug": "true"},
        volumes=None,
    )
    assert isinstance(out.id, str)
    assert "image" in called["body"].to_dict()
    assert called["body"].to_dict()["extensions"] == {"storage.id": "abc123", "debug": "true"}
    network_policy = called["body"].to_dict()["networkPolicy"]
    assert network_policy["defaultAction"] == "deny"
    assert network_policy["egress"] == [{"action": "allow", "target": "pypi.org"}]


@pytest.mark.asyncio
async def test_create_sandbox_manual_cleanup_omits_timeout(monkeypatch: pytest.MonkeyPatch) -> None:
    called = {}

    async def _fake_asyncio_detailed(*, client, body):
        called["body"] = body
        return _Resp(status_code=200, parsed=_api_create_sandbox_response(str(uuid4())))

    monkeypatch.setattr(
        "opensandbox.api.lifecycle.api.sandboxes.post_sandboxes.asyncio_detailed",
        _fake_asyncio_detailed,
    )

    adapter = SandboxesAdapter(ConnectionConfig(domain="example.com:8080", api_key="k"))
    await adapter.create_sandbox(
        spec=SandboxImageSpec("python:3.11"),
        entrypoint=["/bin/sh"],
        env={},
        metadata={},
        timeout=None,
        resource={"cpu": "100m"},
        network_policy=None,
        extensions={},
        volumes=None,
    )

    assert "timeout" not in called["body"].to_dict()


@pytest.mark.asyncio
async def test_create_sandbox_empty_response_raises(monkeypatch: pytest.MonkeyPatch) -> None:
    async def _fake_asyncio_detailed(*, client, body):
        return _Resp(status_code=200, parsed=None)

    monkeypatch.setattr(
        "opensandbox.api.lifecycle.api.sandboxes.post_sandboxes.asyncio_detailed",
        _fake_asyncio_detailed,
    )

    adapter = SandboxesAdapter(ConnectionConfig())
    with pytest.raises(SandboxApiException):
        await adapter.create_sandbox(
            spec=SandboxImageSpec("python:3.11"),
            entrypoint=["/bin/sh"],
            env={},
            metadata={},
            timeout=timedelta(seconds=1),
            resource={"cpu": "100m"},
            extensions={"debug": "true"},
            network_policy=NetworkPolicy(),
            volumes=None,
        )


@pytest.mark.asyncio
async def test_list_sandboxes_metadata_double_encoded(monkeypatch: pytest.MonkeyPatch) -> None:
    from opensandbox.api.lifecycle.types import UNSET as API_UNSET

    captured = {}

    async def _fake_asyncio_detailed(*, client, state, metadata, page, page_size):
        captured.update(
            {"state": state, "metadata": metadata, "page": page, "page_size": page_size}
        )
        return _Resp(status_code=200, parsed=_api_list_sandboxes_response())

    monkeypatch.setattr(
        "opensandbox.api.lifecycle.api.sandboxes.get_sandboxes.asyncio_detailed",
        _fake_asyncio_detailed,
    )

    adapter = SandboxesAdapter(ConnectionConfig())
    f = SandboxFilter(metadata={"k k": "v/v"})
    await adapter.list_sandboxes(f)

    assert captured["metadata"] == "k k=v/v"
    assert captured["state"] is API_UNSET


@pytest.mark.asyncio
async def test_pause_resume_kill_call_openapi(monkeypatch: pytest.MonkeyPatch) -> None:
    sbx_id = str(uuid4())
    calls: list[tuple[str, str]] = []

    async def _ok_pause(*, client, sandbox_id):
        calls.append(("pause", sandbox_id))
        return _Resp(status_code=204, parsed=None)

    async def _ok_resume(*, client, sandbox_id):
        calls.append(("resume", sandbox_id))
        return _Resp(status_code=204, parsed=None)

    async def _ok_kill(*, client, sandbox_id):
        calls.append(("kill", sandbox_id))
        return _Resp(status_code=204, parsed=None)

    monkeypatch.setattr(
        "opensandbox.api.lifecycle.api.sandboxes.post_sandboxes_sandbox_id_pause.asyncio_detailed",
        _ok_pause,
    )
    monkeypatch.setattr(
        "opensandbox.api.lifecycle.api.sandboxes.post_sandboxes_sandbox_id_resume.asyncio_detailed",
        _ok_resume,
    )
    monkeypatch.setattr(
        "opensandbox.api.lifecycle.api.sandboxes.delete_sandboxes_sandbox_id.asyncio_detailed",
        _ok_kill,
    )

    adapter = SandboxesAdapter(ConnectionConfig())
    await adapter.pause_sandbox(sbx_id)
    await adapter.resume_sandbox(sbx_id)
    await adapter.kill_sandbox(sbx_id)

    assert calls == [("pause", sbx_id), ("resume", sbx_id), ("kill", sbx_id)]


@pytest.mark.asyncio
async def test_renew_sandbox_expiration_sends_timezone_aware(monkeypatch: pytest.MonkeyPatch) -> None:
    captured = {}

    async def _fake_asyncio_detailed(*, client, sandbox_id, body):
        from opensandbox.api.lifecycle.models.renew_sandbox_expiration_response import (
            RenewSandboxExpirationResponse,
        )

        captured["expires_at"] = body.expires_at
        return _Resp(
            status_code=200,
            parsed=RenewSandboxExpirationResponse(expires_at=body.expires_at),
        )

    monkeypatch.setattr(
        "opensandbox.api.lifecycle.api.sandboxes.post_sandboxes_sandbox_id_renew_expiration.asyncio_detailed",
        _fake_asyncio_detailed,
    )

    adapter = SandboxesAdapter(ConnectionConfig())
    await adapter.renew_sandbox_expiration(str(uuid4()), datetime(2025, 1, 1))  # naive

    assert captured["expires_at"].tzinfo is timezone.utc


================================================
FILE: sdks/sandbox/python/tests/test_sandbox_sync_business_logic.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations

from datetime import timedelta
from uuid import uuid4

import pytest

from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.constants import DEFAULT_EGRESS_PORT, DEFAULT_EXECD_PORT
from opensandbox.exceptions import SandboxReadyTimeoutException
from opensandbox.models.sandboxes import NetworkPolicy, NetworkRule, SandboxEndpoint
from opensandbox.sync.sandbox import SandboxSync


class _Noop:
    pass


class _SandboxServiceStub:
    def __init__(self) -> None:
        self.endpoint_calls: list[tuple[object, int, bool]] = []

    def get_sandbox_endpoint(self, sandbox_id, port: int, use_server_proxy: bool = False) -> SandboxEndpoint:
        self.endpoint_calls.append((sandbox_id, port, use_server_proxy))
        return SandboxEndpoint(endpoint=f"sync-egress:{port}", headers={"X-Egress": "1"})


class _EgressServiceStub:
    def __init__(self) -> None:
        self.patch_calls: list[list[NetworkRule]] = []

    def get_policy(self) -> NetworkPolicy:
        return NetworkPolicy(
            defaultAction="deny",
            egress=[NetworkRule(action="allow", target="pypi.org")],
        )

    def patch_rules(self, rules: list[NetworkRule]) -> None:
        self.patch_calls.append(rules)


def test_sync_check_ready_timeout_message_includes_troubleshooting_hints() -> None:
    def _always_false(_: SandboxSync) -> bool:
        return False

    sbx = SandboxSync(
        sandbox_id=str(uuid4()),
        sandbox_service=_Noop(),
        filesystem_service=_Noop(),
        command_service=_Noop(),
        health_service=_Noop(),
        metrics_service=_Noop(),
        egress_service=_EgressServiceStub(),
        connection_config=ConnectionConfigSync(
            domain="10.0.0.2:8080",
            use_server_proxy=False,
        ),
        custom_health_check=_always_false,
    )

    with pytest.raises(SandboxReadyTimeoutException) as exc_info:
        sbx.check_ready(timeout=timedelta(seconds=0.01), polling_interval=timedelta(seconds=0))

    message = str(exc_info.value)
    assert "ConnectionConfig(domain=10.0.0.2:8080, use_server_proxy=False)" in message
    assert "ConnectionConfigSync(use_server_proxy=True)" in message


def test_sync_get_egress_policy_uses_injected_egress_service() -> None:
    sbx = SandboxSync(
        sandbox_id=str(uuid4()),
        sandbox_service=_SandboxServiceStub(),
        filesystem_service=_Noop(),
        command_service=_Noop(),
        health_service=_Noop(),
        metrics_service=_Noop(),
        egress_service=_EgressServiceStub(),
        connection_config=ConnectionConfigSync(use_server_proxy=True),
    )

    policy = sbx.get_egress_policy()

    assert policy.default_action == "deny"
    assert policy.egress is not None
    assert policy.egress[0].target == "pypi.org"


def test_sync_patch_egress_rules_uses_injected_egress_service() -> None:
    svc = _SandboxServiceStub()
    egress_service = _EgressServiceStub()

    sbx = SandboxSync(
        sandbox_id=str(uuid4()),
        sandbox_service=svc,
        filesystem_service=_Noop(),
        command_service=_Noop(),
        health_service=_Noop(),
        metrics_service=_Noop(),
        egress_service=egress_service,
        connection_config=ConnectionConfigSync(use_server_proxy=False),
    )
    rules = [NetworkRule(action="allow", target="www.github.com")]

    sbx.patch_egress_rules(rules)

    assert svc.endpoint_calls == []
    assert egress_service.patch_calls == [rules]


def test_sync_create_resolves_egress_endpoint_and_builds_service(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    egress_service = _EgressServiceStub()
    factory_calls: list[SandboxEndpoint] = []

    class _CreateResponse:
        id = "sync-created"

    class _SandboxServiceCreateStub:
        def __init__(self) -> None:
            self.endpoint_calls: list[tuple[str, int, bool]] = []

        def create_sandbox(self, *_args, **_kwargs):
            return _CreateResponse()

        def get_sandbox_endpoint(self, sandbox_id, port: int, use_server_proxy: bool = False) -> SandboxEndpoint:
            self.endpoint_calls.append((sandbox_id, port, use_server_proxy))
            return SandboxEndpoint(endpoint=f"sync-egress:{port}", headers={"X-Port": str(port)})

        def kill_sandbox(self, _sandbox_id: str) -> None:
            return None

    class _FactoryStub:
        def __init__(self, connection_config: ConnectionConfigSync) -> None:
            self.connection_config = connection_config

        def create_sandbox_service(self):
            return sandbox_service

        def create_filesystem_service(self, endpoint: SandboxEndpoint):
            return _Noop()

        def create_command_service(self, endpoint: SandboxEndpoint):
            return _Noop()

        def create_health_service(self, endpoint: SandboxEndpoint):
            return _Noop()

        def create_metrics_service(self, endpoint: SandboxEndpoint):
            return _Noop()

        def create_egress_service(self, endpoint: SandboxEndpoint) -> _EgressServiceStub:
            factory_calls.append(endpoint)
            return egress_service

    sandbox_service = _SandboxServiceCreateStub()
    monkeypatch.setattr("opensandbox.sync.sandbox.AdapterFactorySync", _FactoryStub)

    SandboxSync.create(
        "python:3.11",
        connection_config=ConnectionConfigSync(use_server_proxy=False),
        health_check=lambda _sbx: True,
    )

    assert sandbox_service.endpoint_calls == [
        ("sync-created", DEFAULT_EXECD_PORT, False),
        ("sync-created", DEFAULT_EGRESS_PORT, False),
    ]
    assert len(factory_calls) == 1
    assert factory_calls == [
        SandboxEndpoint(
            endpoint=f"sync-egress:{DEFAULT_EGRESS_PORT}",
            headers={"X-Port": str(DEFAULT_EGRESS_PORT)},
        )
    ]


================================================
FILE: sdks/tsconfig.base.json
================================================
{
  "compilerOptions": {
    "target": "ES2022",
    "module": "NodeNext",
    "moduleResolution": "NodeNext",
    "lib": ["ES2022", "DOM"],

    "declaration": true,
    "declarationMap": true,
    "sourceMap": true,
    "removeComments": false,

    "strict": true,
    "noImplicitAny": true,
    "strictNullChecks": true,
    "noUnusedLocals": true,
    "noUnusedParameters": true,
    "noImplicitReturns": true,
    "noFallthroughCasesInSwitch": true,

    "esModuleInterop": true,
    "forceConsistentCasingInFileNames": true,
    "skipLibCheck": true
  }
}


================================================
FILE: server/.python-version
================================================
3.10


================================================
FILE: server/DEVELOPMENT.md
================================================
# Development Guide

This guide provides comprehensive information for developers working on OpenSandbox Server, including environment setup, architecture deep-dive, testing strategies, and contribution workflows.

## 📋 Table of Contents

- [Development Environment Setup](#development-environment-setup)
- [Project Structure](#project-structure)
- [Architecture Deep Dive](#architecture-deep-dive)
- [Development Workflow](#development-workflow)
- [Testing Guide](#testing-guide)
- [Working with Docker Runtime](#working-with-docker-runtime)
- [Working with Kubernetes Runtime](#working-with-kubernetes-runtime)
- [Code Style and Standards](#code-style-and-standards)
- [Debugging](#debugging)
- [Performance Optimization](#performance-optimization)
- [Contributing](#contributing)

## Development Environment Setup

### Prerequisites

- **Python 3.10+**: Check version with `python --version`
- **uv**: Install from [https://github.com/astral-sh/uv](https://github.com/astral-sh/uv)
- **Docker**: For local development and testing
- **Git**: Version control
- **IDE**: VS Code, PyCharm, or Cursor (recommended for AI assistance)

### Initial Setup

1. **Clone and Navigate**
   ```bash
   git clone https://github.com/alibaba/OpenSandbox.git
   cd OpenSandbox/server
   ```

2. **Install Dependencies**
   ```bash
   uv sync
   ```

3. **Verify Installation**
   ```bash
   uv run python -c "import fastapi; print(fastapi.__version__)"
   ```

4. **Configure Development Environment**
   ```bash
   cp example.config.toml ~/.sandbox.toml
   ```

   Edit `~/.sandbox.toml` for local development:
   ```toml
   [server]
   host = "0.0.0.0"
   port = 8080
   log_level = "DEBUG"
   api_key = "your-secret-api-key-change-this"

   [runtime]
   type = "docker"
   execd_image = "opensandbox/execd:v1.0.7"

   [docker]
   network_mode = "host"
   ```

5. **Run Development Server**
   ```bash
   uv run python -m src.main
   ```

### IDE Configuration

#### VS Code / Cursor

Create `.vscode/launch.json`:

```json
{
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python: FastAPI",
            "type": "python",
            "request": "launch",
            "module": "src.main",
            "justMyCode": false,
            "env": {
                "SANDBOX_CONFIG_PATH": "${workspaceFolder}/.sandbox.toml"
            }
        }
    ]
}
```

#### PyCharm

1. Open project in PyCharm
2. Configure Python interpreter: **Settings → Project → Python Interpreter**
3. Select the virtual environment created by `uv sync`
4. Enable pytest: **Settings → Tools → Python Integrated Tools → Testing → pytest**

## Project Structure

```
server/
├── src/                          # Source code
│   ├── main.py                   # FastAPI application entry point
│   ├── config.py                 # Configuration management
│   ├── api/                      # API layer
│   │   ├── lifecycle.py          # Sandbox lifecycle routes
│   │   └── schema.py             # Pydantic models
│   ├── middleware/               # Middleware components
│   │   └── auth.py               # API Key authentication
│   └── services/                 # Business logic layer
│       ├── sandbox_service.py    # Abstract base class
│       ├── docker.py             # Docker implementation
│       └── factory.py            # Service factory
├── tests/                        # Test suite
├── scripts/                      # Utility scripts
├── pyproject.toml                # Project metadata and dependencies
└── example.config.toml           # Example configuration
```

## Architecture Deep Dive

### Layered Architecture

The server follows a clean layered architecture:

1. **HTTP Layer** (FastAPI routes) - Request validation and response serialization
2. **Middleware Layer** - Authentication and cross-cutting concerns
3. **Service Layer** - Business logic abstraction
4. **Runtime Implementation Layer** - Docker/Kubernetes specific code

### Request Flow

#### Create Sandbox (Async)

```
Client → POST /sandboxes
  ↓
Auth Middleware validates API key
  ↓
lifecycle.create_sandbox() receives CreateSandboxRequest
  ↓
sandbox_service.create_sandbox_async(request)
  ↓
Returns 202 Accepted with Pending status immediately
  ↓
Background thread provisions the sandbox
```

### Internal Systems

#### Expiration Timer System

Tracks sandbox timeouts using in-memory data structures:
- `_sandbox_expirations: Dict[str, datetime]` - Expiration times
- `_expiration_timers: Dict[str, Timer]` - Active timer threads
- `_expiration_lock: Lock` - Thread synchronization

#### Async Provisioning System

Avoids blocking API requests during slow operations by:
1. Storing sandboxes in pending state
2. Starting background provisioning thread
3. Returning 202 Accepted immediately
4. Transitioning to running state when ready

## Development Workflow

### Feature Development

```bash
git checkout -b feature/my-feature
# Implement feature
uv run pytest
git commit -m "feat: add my feature"
git push origin feature/my-feature
```

### Bug Fixes

```bash
git checkout -b fix/bug-description
# Write failing test
# Fix bug
uv run pytest
git commit -m "fix: resolve bug"
```

## Testing Guide

### Running Tests
> **Note**: A local Docker daemon is required to run the full test suite, as integration tests interact with the Docker Engine.

```bash
# All tests
uv run pytest

# Specific file
uv run pytest tests/test_docker_service.py

# With coverage
uv run pytest --cov=src --cov-report=html
```

### Writing Tests

Example unit test:

```python
@patch("src.services.docker.docker")
def test_create_sandbox_validates_entrypoint(mock_docker):
    service = DockerSandboxService(config=test_config())
    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        entrypoint=[]  # Invalid
    )
    with pytest.raises(HTTPException):
        service.create_sandbox(request)
```

## Working with Docker Runtime

### Local Development

```bash
# Use local Docker
export DOCKER_HOST="unix:///var/run/docker.sock"
uv run python -m src.main

# Use remote Docker
export DOCKER_HOST="ssh://user@remote-host"
uv run python -m src.main
```

### Network Modes

**Host Mode (Default):**
- Sandboxes share host network
- Direct port access
- Endpoint format: `http://{domain}/{sandbox_id}/{port}`

**Bridge Mode:**
- Isolated networks
- HTTP proxy required
- Endpoint format: `http://{server}/route/{sandbox_id}/{port}/path`

### Egress sidecar (bridge + `networkPolicy`)

- Config: set `[egress].image`; sidecar starts only when the request carries `networkPolicy`. Requires Docker `network_mode="bridge"`.
- Network & privileges: main container shares the sidecar netns (`network_mode=container:<sidecar>`); main container explicitly drops `NET_ADMIN`; sidecar keeps `NET_ADMIN` to manage iptables / DNS transparent redirect.
- Ports: host port bindings live on the sidecar; main container labels record the mapped ports for upstream endpoint resolution.
- Lifecycle: on create failure / delete / expiration / abnormal recovery, the sidecar is cleaned up; startup also removes orphaned sidecars.
- Injection: `OPENSANDBOX_EGRESS_RULES` env passes the `networkPolicy` JSON; sidecar image is pulled/ensured before start.

## Working with Kubernetes Runtime

> **Status:** Planned / Configuration Ready

Architecture will include:
- Pod management with execd init container
- Service/Ingress for networking
- CronJob or operator for expiration handling

## Code Style and Standards

Follow PEP 8 with Ruff enforcement:

```bash
uv run ruff check src tests
```

### Naming Conventions

- Functions: `snake_case`
- Classes: `PascalCase`
- Constants: `UPPER_SNAKE_CASE`
- Private: `_leading_underscore`

### Type Hints

Always use type hints:

```python
def get_sandbox(self, sandbox_id: str) -> Sandbox:
    pass
```

## Debugging

### Enable Debug Logging

```toml
[server]
log_level = "DEBUG"
```

### Interactive Debugging

Use VS Code/Cursor breakpoints or:

```python
breakpoint()  # Python 3.7+
```

### Docker Debugging

```python
import logging
logging.getLogger("docker").setLevel(logging.DEBUG)
```

## Performance Optimization

### Profiling

```bash
python -m cProfile -o profile.stats -m src.main
```

### Optimization Tips

1. **Async Operations**: Use async provisioning to avoid blocking
2. **Connection Pooling**: Reuse Docker client connections
3. **Caching**: Cache configuration and frequently accessed data
4. **Resource Limits**: Set appropriate container resource limits
5. **Monitoring**: Track container creation/deletion metrics

## Contributing

### Pull Request Process

1. Fork the repository
2. Create feature branch from `main`
3. Write tests for new functionality
4. Ensure all tests pass: `uv run pytest`
5. Run linter: `uv run ruff check`
6. Write clear commit messages
7. Submit PR with description

### Code Review Guidelines

- Focus on readability and maintainability
- Ensure test coverage for new code
- Check for proper error handling
- Verify documentation updates
- Test Docker and potential Kubernetes compatibility

### Commit Message Format

```
<type>: <description>

Types: feat, fix, docs, style, refactor, test, chore
```

Examples:
- `feat: add Kubernetes runtime support`
- `fix: resolve expiration timer memory leak`
- `docs: update API documentation`

---

For questions or support, please open an issue on the project repository.


================================================
FILE: server/Dockerfile
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM python:3.10-slim AS builder

ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    UV_PROJECT_ENV=/app/.venv \
    UV_LINK_MODE=copy

WORKDIR /app

RUN apt-get update \
    && apt-get install -y --no-install-recommends curl ca-certificates \
    && rm -rf /var/lib/apt/lists/*

RUN curl -LsSf https://astral.sh/uv/install.sh | sh
ENV PATH="/root/.local/bin:/root/.cargo/bin:${PATH}"

COPY pyproject.toml uv.lock ./
RUN uv sync --frozen --no-dev --no-install-project

COPY src ./src
COPY LICENSE README.md README_zh.md example.config.toml example.config.zh.toml \
     example.config.k8s.toml example.config.k8s.zh.toml example.batchsandbox-template.yaml ./

# Install the project itself into the venv (deps already synced)
RUN uv pip install --no-deps --editable .

FROM python:3.10-slim AS runtime

ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    UV_PROJECT_ENV=/app/.venv \
    PATH="/app/.venv/bin:${PATH}" \
    SANDBOX_CONFIG_PATH=/etc/opensandbox/config.toml

WORKDIR /app

COPY --from=builder /app/.venv /app/.venv
COPY --from=builder /app/src /app/src
COPY --from=builder /app/example.config.k8s.toml /etc/opensandbox/config.toml
COPY --from=builder /app/example.config.k8s.zh.toml /etc/opensandbox/config.zh.toml
COPY --from=builder /app/example.batchsandbox-template.yaml /etc/opensandbox/example.batchsandbox-template.yaml

EXPOSE 8080

ENTRYPOINT ["opensandbox-server"]
CMD ["--config", "/etc/opensandbox/config.toml"]


================================================
FILE: server/LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: server/README.md
================================================
# OpenSandbox Server

English | [中文](README_zh.md)

A production-grade, FastAPI-based service for managing the lifecycle of containerized sandboxes. It acts as the control plane to create, run, monitor, and dispose isolated execution environments across container platforms.

## Features

### Core capabilities
- **Lifecycle APIs**: Standardized REST interfaces for create, start, pause, resume, delete
- **Pluggable runtimes**:
  - **Docker**: Production-ready
  - **Kubernetes**: Production-ready (see `kubernetes/` for deployment)
- **Lifecycle cleanup modes**: Configurable TTL with renewal, or manual cleanup with explicit delete
- **Access control**: API Key authentication (`OPEN-SANDBOX-API-KEY`); can be disabled for local/dev
- **Networking modes**:
  - Host: shared host network, performance first
  - Bridge: isolated network with built-in HTTP routing
- **Resource quotas**: CPU/memory limits with Kubernetes-style specs
- **Observability**: Unified status with transition tracking
- **Registry support**: Public and private images

### Extended capabilities
- **Async provisioning**: Background creation to reduce latency
- **Timer restoration**: Expiration timers restored after restart
- **Env/metadata injection**: Per-sandbox environment and metadata
- **Port resolution**: Dynamic endpoint generation
- **Structured errors**: Standard error codes and messages

Metadata keys under the reserved prefix `opensandbox.io/` are system-managed
and cannot be supplied by users.

## Requirements

- **Python**: 3.10 or higher
- **Package Manager**: [uv](https://github.com/astral-sh/uv) (recommended) or pip
- **Runtime Backend**:
  - Docker Engine 20.10+ (for Docker runtime)
  - Kubernetes 1.21.1+ (for Kubernetes runtime)
- **Operating System**: Linux, macOS, or Windows with WSL2

## Quick Start

### Installation

1. **Install from PyPI**:
   > For source development or contributions, you can still clone the repo and run `uv sync` inside `server/`.
   ```bash
   uv pip install opensandbox-server
   ```

### Configuration

The server uses a TOML configuration file to select and configure the underlying runtime.

**Init configuration from simple example**:
```bash
# run opensandbox-server -h for help
opensandbox-server init-config ~/.sandbox.toml --example docker
```

**Create K8S configuration file**

The K8S version of the Sandbox Operator needs to be deployed in the cluster, refer to the Kubernetes directory.
```bash
# run opensandbox-server -h for help
opensandbox-server init-config ~/.sandbox.toml --example k8s
```

**[optional] Edit configuration for your environment**

- For quick e2e/demo (specify which one):
  ```bash
  opensandbox-server init-config ~/.sandbox.toml --example docker  # or docker-zh|k8s|k8s-zh
  # add --force to overwrite existing file
  ```
- Render the full schema-driven skeleton (no defaults, just placeholders) by omitting --example:
  ```bash
  opensandbox-server init-config ~/.sandbox.toml
  # add --force to overwrite existing file
  ```

**[optional] Edit `~/.sandbox.toml` for your environment**

Before you start the server, edit the configuration file to suit your environment. You could also generate a new empty configuration file by `opensandbox-server init-config ~/.sandbox.toml`.

**Docker runtime + host networking**
   ```toml
   [server]
   host = "0.0.0.0"
   port = 8080
   log_level = "INFO"
   api_key = "your-secret-api-key-change-this"
   max_sandbox_timeout_seconds = 86400  # Maximum TTL for requests that specify timeout

   [runtime]
   type = "docker"
   execd_image = "opensandbox/execd:v1.0.7"

   [docker]
   network_mode = "host"  # Containers share host network; only one sandbox instance at a time
   ```

**Docker runtime + bridge networking**
   ```toml
   [server]
   host = "0.0.0.0"
   port = 8080
   log_level = "INFO"
   api_key = "your-secret-api-key-change-this"
    max_sandbox_timeout_seconds = 86400  # Maximum TTL for requests that specify timeout

   [runtime]
   type = "docker"
   execd_image = "opensandbox/execd:v1.0.7"

   [docker]
   network_mode = "bridge"  # Isolated container networking
   ```

**Docker Compose deployment (server runs in a container)**

When `opensandbox-server` itself runs inside Docker Compose and manages sandboxes via
mounted `/var/run/docker.sock`, configure a reachable host value for bridge-mode endpoint
resolution:

```toml
[docker]
network_mode = "bridge"
host_ip = "host.docker.internal"  # or host LAN IP (for Linux: explicit host IP is recommended)
```

Why this matters:
- In bridge mode, sandbox containers get internal Docker IPs.
- External callers usually cannot reach those internal IPs directly.
- `host_ip` lets endpoint resolution return host-reachable addresses.

For SDK/API clients that cannot directly reach sandbox bridge addresses, request proxied
endpoints through the server:

```bash
curl -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  "http://localhost:8080/v1/sandboxes/<sandbox-id>/endpoints/44772?use_server_proxy=true"
```

The returned endpoint is rewritten to the server proxy route:
- `<server-host>/sandboxes/<sandbox-id>/proxy/<port>`

Reference runtime compose file:
- `server/docker-compose.example.yaml`

**Sandbox TTL configuration**

- `timeout` requests must be at least 60 seconds.
- The maximum allowed TTL is controlled by `server.max_sandbox_timeout_seconds`.
- Omit `timeout` or set it to `null` in the create request to use manual cleanup mode instead of automatic expiration.

**Upgrade order for manual cleanup**

- Existing TTL-only clients can continue to work without changes as long as they do not encounter manual-cleanup sandboxes.
- Manual cleanup changes the lifecycle response contract: `expiresAt` may be `null`, and other nullable lifecycle fields may also be serialized explicitly as `null`.
- In practice this can include fields such as `metadata`, `status.reason`, `status.message`, and `status.lastTransitionAt`, depending on the sandbox state and the server response model.
- Before creating any manual-cleanup sandbox, upgrade every SDK/client that may call `create`, `get`, or `list` on the lifecycle API.
- Recommended rollout order:
  1. Upgrade SDKs/clients
  2. Upgrade the server
  3. Start creating sandboxes with `timeout` omitted or `null`
- Do not introduce manual-cleanup sandboxes into a shared environment while old SDKs are still actively reading lifecycle responses.

**Security hardening (applies to all Docker modes)**
   ```toml
   [docker]
   # Drop dangerous capabilities and block privilege escalation by default
   drop_capabilities = ["AUDIT_WRITE", "MKNOD", "NET_ADMIN", "NET_RAW", "SYS_ADMIN", "SYS_MODULE", "SYS_PTRACE", "SYS_TIME", "SYS_TTY_CONFIG"]
   no_new_privileges = true
   apparmor_profile = ""        # e.g. "docker-default" when AppArmor is available
   # Limit fork bombs and optionally enforce seccomp / read-only rootfs
   pids_limit = 512             # set to null to disable
   seccomp_profile = ""        # path or profile name; empty uses Docker default
   ```
   Further reading on Docker container security: https://docs.docker.com/engine/security/

For common issues and solutions, see [Troubleshooting](TROUBLESHOOTING.md).

**Secure container runtime (optional)**

OpenSandbox supports secure container runtimes for enhanced isolation:

```toml
[secure_runtime]
type = "gvisor"              # Options: "", "gvisor", "kata", "firecracker"
docker_runtime = "runsc"      # Docker OCI runtime name (for gVisor, Kata)
# k8s_runtime_class = "gvisor"  # Kubernetes RuntimeClass name (for K8s)
```

- `type=""` (default): No secure runtime, uses runc
- `type="gvisor"`: Uses gVisor (runsc) for user-space kernel isolation
- `type="kata"`: Uses Kata Containers for VM-level isolation
- `type="firecracker"`: Uses Firecracker microVM (Kubernetes only)

> **Detailed guide**: See [Secure Container Runtime Guide](../docs/secure-container.md) for complete installation instructions, system requirements, and troubleshooting.

**Docker daemon setup** for gVisor:
```json
{
  "runtimes": {
    "runsc": {
      "path": "/usr/bin/runsc"
    }
  }
}
```

**Kubernetes setup**: Create RuntimeClass before using:
```bash
kubectl create -f - <<EOF
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: gvisor
handler: runsc
EOF
```

**Ingress exposure (direct | gateway)**
   ```toml
   [ingress]
   mode = "direct"  # docker runtime only supports direct
   # gateway.address = "*.example.com"         # host only (domain or IP[:port]); scheme is not allowed
   # gateway.route.mode = "wildcard"            # wildcard | uri | header
   ```
   - `mode=direct`: default; required when `runtime.type=docker` (client ↔ sandbox direct reachability, no L7 gateway).
   - `mode=gateway`: configure external ingress.
     - `gateway.address`: wildcard domain required when `gateway.route.mode=wildcard`; otherwise must be domain, IP, or IP:port. Do not include scheme; clients decide http/https.
     - `gateway.route.mode`: `wildcard` (host-based wildcard), `uri` (path-prefix), `header` (header-based routing).
     - Response format examples:
       - `wildcard`: `<sandbox-id>-<port>.example.com/path/to/request`
       - `uri`: `10.0.0.1:8000/<sandbox-id>/<port>/path/to/request`
       - `header`: `gateway.example.com` with header `OpenSandbox-Ingress-To: <sandbox-id>-<port>`

**Kubernetes runtime**
   ```toml
   [runtime]
   type = "kubernetes"
   execd_image = "opensandbox/execd:v1.0.7"

   [kubernetes]
   kubeconfig_path = "~/.kube/config"
   namespace = "opensandbox"
   workload_provider = "batchsandbox"   # or "agent-sandbox"
   informer_enabled = true              # Beta: enable watch-based cache
   informer_resync_seconds = 300        # Beta: full list interval
   informer_watch_timeout_seconds = 60  # Beta: watch restart interval
   ```
   - Informer settings are **beta** and enabled by default to reduce API calls; set `informer_enabled = false` to turn off.
   - Resync and watch timeouts control how often the cache refreshes; tune for your cluster API limits.

### Egress configuration

The **`[egress]`** block configures the **egress sidecar** image and enforcement mode. The server only starts this sidecar when a sandbox is created **with** a `networkPolicy` (outbound allow/deny rules). If the create request omits `networkPolicy`, no egress sidecar is added and outbound traffic is not restricted by this mechanism.

#### Keys

| Key | Type | Default | Required | Description |
|-----|------|---------|----------|-------------|
| `image` | string | — | **Yes** whenever `networkPolicy` is used in a create request | OCI image containing the egress binary. Pulled before the sidecar starts. |
| `mode` | `dns` or `dns+nft` | `dns` | No | How the sidecar enforces policy. Written to the sidecar as `OPENSANDBOX_EGRESS_MODE` (see below). |

#### `mode` values

- **`dns`**: DNS-based enforcement via the in-sidecar DNS proxy. No nftables layer-2 rules from this path. **CIDR and static IP targets in the policy are not enforced** (use domain-style rules only if you rely on `dns` mode).
- **`dns+nft`**: Same DNS path, plus nftables where available (see the [egress component README](../components/egress/README.md) for capabilities and fallbacks). **CIDR and static IP allow/deny rules are supported** via nftables when the table is applied successfully.

#### Per-request `networkPolicy`

- Rules are defined on **`CreateSandboxRequest.networkPolicy`** (default action and ordered egress rules: hostnames / patterns, and IP or CIDR entries when using **`dns+nft`**).
- The serialized policy is passed into the sidecar as **`OPENSANDBOX_EGRESS_RULES`** (JSON).
- An auth token may be attached for the egress HTTP API; see runtime behavior below.

#### Docker runtime

- **`egress.image` must be set** in config when clients send `networkPolicy`; otherwise the request is rejected.
- Outbound policy requires **`docker.network_mode = "bridge"`**. Requests with `networkPolicy` are rejected for `network_mode=host` or for user-defined Docker networks that are incompatible with the sidecar attachment model.
- The main sandbox container shares the sidecar’s network namespace, **drops `NET_ADMIN`**, and relies on the sidecar for policy; the sidecar **keeps `NET_ADMIN`**.
- **IPv6** is disabled in the shared namespace so allow/deny behavior stays consistent.

#### Kubernetes runtime

- When `networkPolicy` is present, the workload pod includes an **egress** sidecar built from `egress.image`, in addition to the main sandbox container.
- **`egress.image`** is required in the same way as for Docker.

#### Operational notes

- The sidecar image is pulled (or validated) before start; delete, expiry, and failure paths attempt to remove the sidecar.
- For deeper behavior (DNS proxy, nftables, limits), refer to the **egress** component documentation under `components/egress/`.

#### Example (`~/.sandbox.toml`)

```toml
[runtime]
type = "docker"
execd_image = "opensandbox/execd:v1.0.7"

[egress]
image = "opensandbox/egress:v1.0.3"
mode = "dns"
```

#### Example create request with `networkPolicy`

```json
{
  "image": {"uri": "python:3.11-slim"},
  "entrypoint": ["python", "-m", "http.server", "8000"],
  "timeout": 3600,
  "resourceLimits": {"cpu": "500m", "memory": "512Mi"},
  "networkPolicy": {
    "defaultAction": "deny",
    "egress": [
      {"action": "allow", "target": "pypi.org"},
      {"action": "allow", "target": "*.python.org"}
    ]
  }
}
```

### Run the server

Start the server using the installed CLI (reads `~/.sandbox.toml` by default):

```bash
opensandbox-server
```

The server will start at `http://0.0.0.0:8080` (or your configured host/port).

### Run the server (installed package)

After installing the package (wheel or PyPI), you can use the CLI entrypoint:

```bash
opensandbox-server --config ~/.sandbox.toml
```

**Health check**

```bash
curl http://localhost:8080/health
```

Expected response:
```json
{"status": "healthy"}
```

## API documentation

Once the server is running, interactive API documentation is available:

- **Swagger UI**: [http://localhost:8080/docs](http://localhost:8080/docs)
- **ReDoc**: [http://localhost:8080/redoc](http://localhost:8080/redoc)

Further reading on Docker container security: https://docs.docker.com/engine/security/

### API authentication

Authentication is enforced only when `server.api_key` is set. If the value is empty or missing, the middleware skips API Key checks (intended for local/dev). For production, always set a non-empty `server.api_key` and send it via the `OPEN-SANDBOX-API-KEY` header.

All API endpoints (except `/health`, `/docs`, `/redoc`) require authentication via the `OPEN-SANDBOX-API-KEY` header when authentication is enabled:

```bash
curl http://localhost:8080/v1/sandboxes
```

### Example usage

**Create a Sandbox**

```bash
curl -X POST "http://localhost:8080/v1/sandboxes" \
  -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "image": {
      "uri": "python:3.11-slim"
    },
    "entrypoint": [
      "python",
      "-m",
      "http.server",
      "8000"
    ],
    "timeout": 3600,
    "resourceLimits": {
      "cpu": "500m",
      "memory": "512Mi"
    },
    "env": {
      "PYTHONUNBUFFERED": "1"
    },
    "metadata": {
      "team": "backend",
      "project": "api-testing"
    }
  }'
```

Response:
```json
{
  "id": "a1b2c3d4-5678-90ab-cdef-1234567890ab",
  "status": {
    "state": "Pending",
    "reason": "CONTAINER_STARTING",
    "message": "Sandbox container is starting.",
    "lastTransitionAt": "2024-01-15T10:30:00Z"
  },
  "metadata": {
    "team": "backend",
    "project": "api-testing"
  },
  "expiresAt": "2024-01-15T11:30:00Z",
  "createdAt": "2024-01-15T10:30:00Z",
  "entrypoint": ["python", "-m", "http.server", "8000"]
}
```

**Get Sandbox Details**

```bash
curl -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  http://localhost:8080/v1/sandboxes/a1b2c3d4-5678-90ab-cdef-1234567890ab
```

**Get Service Endpoint**

```bash
curl -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  http://localhost:8080/v1/sandboxes/a1b2c3d4-5678-90ab-cdef-1234567890ab/endpoints/8000

# execd (agent) endpoint
curl -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  http://localhost:8080/v1/sandboxes/a1b2c3d4-5678-90ab-cdef-1234567890ab/endpoints/44772
```

Response:
```json
{
  "endpoint": "sandbox.example.com/a1b2c3d4-5678-90ab-cdef-1234567890ab/8000"
}
```

**Renew Expiration**

```bash
curl -X POST "http://localhost:8080/v1/sandboxes/a1b2c3d4-5678-90ab-cdef-1234567890ab/renew-expiration" \
  -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "expiresAt": "2024-01-15T12:30:00Z"
  }'
```

**Delete a Sandbox**

```bash
curl -X DELETE \
  -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  http://localhost:8080/v1/sandboxes/a1b2c3d4-5678-90ab-cdef-1234567890ab
```

## Architecture

### Component responsibilities

- **API Layer** (`src/api/`): HTTP request handling, validation, and response formatting
- **Service Layer** (`src/services/`): Business logic for sandbox lifecycle operations
- **Middleware** (`src/middleware/`): Cross-cutting concerns (authentication, logging)
- **Configuration** (`src/config.py`): Centralized configuration management
- **Runtime Implementations**: Platform-specific sandbox orchestration

### Sandbox lifecycle states

```
       create()
          │
          ▼
     ┌─────────┐
     │ Pending │────────────────────┐
     └────┬────┘                    │
          │                         │
          │ (provisioning)          │
          ▼                         │
     ┌─────────┐    pause()         │
     │ Running │───────────────┐    │
     └────┬────┘               │    │
          │      resume()      │    │
          │   ┌────────────────┘    │
          │   │                     │
          │   ▼                     │
          │ ┌────────┐              │
          ├─│ Paused │              │
          │ └────────┘              │
          │                         │
          │ delete() or expire()    │
          ▼                         │
     ┌──────────┐                   │
     │ Stopping │                   │
     └────┬─────┘                   │
          │                         │
          ├────────────────┬────────┘
          │                │
          ▼                ▼
     ┌────────────┐   ┌────────┐
     │ Terminated │   │ Failed │
     └────────────┘   └────────┘
```

## Configuration reference

### Server configuration

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `server.host` | string | `"0.0.0.0"` | Interface to bind |
| `server.port` | integer | `8080` | Port to listen on |
| `server.log_level` | string | `"INFO"` | Python logging level |
| `server.api_key` | string | `null` | API key for authentication |
| `server.eip` | string | `null` | Bound public IP; when set, used as the host part when returning sandbox endpoints (Docker runtime) |

### Runtime configuration

| Key                    | Type   | Required | Description                                           |
|------------------------|--------|----------|-------------------------------------------------------|
| `runtime.type`         | string | Yes      | Runtime implementation (`"docker"` or `"kubernetes"`) |
| `runtime.execd_image`  | string | Yes      | Container image with execd binary                     |

### Egress configuration

| Key | Type | Default | Required if using `networkPolicy` | Description |
|-----|------|---------|-----------------------------------|-------------|
| `egress.image` | string | — | Yes | Egress sidecar image (OCI reference). |
| `egress.mode` | `dns` \| `dns+nft` | `dns` | No | `OPENSANDBOX_EGRESS_MODE`. CIDR/IP rules need `dns+nft`; `dns` is domain-oriented only. |

### Docker configuration

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `docker.network_mode` | string | `"host"` | Network mode (`"host"` or `"bridge"`) |

### Agent-sandbox configuration

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `agent_sandbox.template_file` | string | `null` | Sandbox CR YAML template for agent-sandbox (used when `kubernetes.workload_provider = "agent-sandbox"`) |
| `agent_sandbox.shutdown_policy` | string | `"Delete"` | Shutdown policy on expiry (`"Delete"` or `"Retain"`) |
| `agent_sandbox.ingress_enabled` | boolean | `true` | Whether ingress routing is expected to be enabled |

### Environment variables

| Variable | Description |
|----------|-------------|
| `SANDBOX_CONFIG_PATH` | Override config file location |
| `DOCKER_HOST` | Docker daemon URL (e.g., `unix:///var/run/docker.sock`) |
| `PENDING_FAILURE_TTL` | TTL for failed pending sandboxes in seconds (default: 3600) |

## Development

### Code quality

**Run linter**:
```bash
uv run ruff check
```

**Auto-fix issues**:
```bash
uv run ruff check --fix
```

**Format code**:
```bash
uv run ruff format
```

### Testing

**Run all tests**:
```bash
uv run pytest
```

**Run with coverage**:
```bash
uv run pytest --cov=src --cov-report=html
```

**Run specific test**:
```bash
uv run pytest tests/test_docker_service.py::test_create_sandbox_requires_entrypoint
```

## License

This project is licensed under the terms specified in the LICENSE file in the repository root.

## Contributing

Contributions are welcome. Suggested flow:

1. Fork the repository
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
3. Write tests for new functionality
4. Ensure all tests pass (`uv run pytest`)
5. Run linting (`uv run ruff check`)
6. Commit with clear messages
7. Push to your fork
8. Open a Pull Request

## Support

- Documentation: See `DEVELOPMENT.md` for development guidance
- Issues: Report defects via GitHub Issues
- Discussions: Use GitHub Discussions for Q&A and ideas


================================================
FILE: server/README_zh.md
================================================
# OpenSandbox Server（沙箱服务端）

中文 | [English](README.md)

基于 FastAPI 的生产级容器化沙箱生命周期管理服务。作为控制平面，协调在不同容器编排环境中的隔离运行时的创建、执行、监控与销毁。

## 功能特性

### 核心能力
- **生命周期管理**：标准化 REST API 覆盖创建、启动、暂停、恢复、删除
- **可插拔运行时**：
  - **Docker**：已支持生产部署
  - **Kubernetes**：已支持生产部署
- **自动过期**：可配置 TTL，支持续期
- **访问控制**：API Key 认证（`OPEN-SANDBOX-API-KEY`），本地/开发可配置为空跳过
- **网络模式**：
  - Host：共享宿主网络，性能优先
  - Bridge：隔离网络，内置 HTTP 代理路由
- **资源配额**：CPU/内存限制，Kubernetes 风格规范
- **状态可观测性**：统一状态与转换跟踪
- **镜像仓库**：支持公共与私有镜像

### 扩展能力
- **异步供应**：后台创建，降低请求延迟
- **定时恢复**：重启后自动恢复过期定时器
- **环境与元数据注入**：按沙箱注入 env 与 metadata
- **端口解析**：动态生成访问端点
- **结构化错误**：标准错误码与消息，便于排障

## 环境要求

- **Python**：3.10 或更高版本
- **包管理器**：[uv](https://github.com/astral-sh/uv)（推荐）或 pip
- **运行时后端**：
  - Docker Engine 20.10+（使用 Docker 运行时）
  - Kubernetes 1.21.1+（使用 Kubernetes 运行时）
- **操作系统**：Linux、macOS 或带 WSL2 的 Windows

## 快速开始

### 安装步骤

1. **通过 PyPI 安装**（无需克隆仓库）：

```bash
uv pip install opensandbox-server
```
> 如需源码开发或贡献，可仍然克隆仓库并在 `server/` 下执行 `uv sync`。

### 配置指南

服务端使用 TOML 配置文件来选择和配置底层运行时。

**从简单示例初始化配置**：
```bash
# 运行 opensandbox-server -h 查看帮助
opensandbox-server init-config ~/.sandbox.toml --example docker-zh
```

**创建 K8S 配置文件**

需要在集群中部署 K8S 版本的 Sandbox Operator，参考 Kubernetes 目录。
```bash
# 运行 opensandbox-server -h 查看帮助
opensandbox-server init-config ~/.sandbox.toml --example k8s-zh
```

**[可选] 编辑配置以适配您的环境**

- 用于快速 e2e/demo：
  ```bash
  opensandbox-server init-config ~/.sandbox.toml --example docker-zh  # 或 docker-zh|k8s|k8s-zh
  # 已有文件需覆盖时加 --force
  ```
- 省略 `--example` 时生成“配置框架”（无默认值，只有占位符）：
  ```bash
  opensandbox-server init-config ~/.sandbox.toml
  # 已有文件需覆盖时加 --force
  ```

**[可选] 编辑 `~/.sandbox.toml`** 适配您的环境

在启动服务器前，编辑配置文件以适配您的环境。您也可以通过 `opensandbox-server init-config ~/.sandbox.toml` 生成一个新的完整配置模板。

**Docker 运行时 + Host 网络模式**
   ```toml
   [server]
   host = "0.0.0.0"
   port = 8080
   log_level = "INFO"
   api_key = "your-secret-api-key-change-this"

   [runtime]
   type = "docker"
   execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.7"

   [docker]
   network_mode = "host"  # 容器共享宿主机网络，只能创建一个sandbox实例
   ```

**Docker 运行时 + Bridge 网络模式**
   ```toml
   [server]
   host = "0.0.0.0"
   port = 8080
   log_level = "INFO"
   api_key = "your-secret-api-key-change-this"

   [runtime]
   type = "docker"
   execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.7"

   [docker]
   network_mode = "bridge"  # 容器隔离网络
   ```

**Docker Compose 部署（server 本身运行在容器中）**

当 `opensandbox-server` 运行在 Docker Compose 容器内，并通过挂载
`/var/run/docker.sock` 管理沙箱时，需要为 bridge 模式端点解析配置一个可达的宿主地址：

```toml
[docker]
network_mode = "bridge"
host_ip = "host.docker.internal"  # 或宿主机 LAN IP（Linux 建议显式填写）
```

原因：
- bridge 模式下沙箱容器会分配 Docker 内部 IP。
- 外部调用方通常无法直接访问这些内部 IP。
- `host_ip` 会让端点解析返回对调用方可达的宿主地址。

对于无法直连 sandbox bridge 地址的 SDK/API 调用方，可通过 server 代理获取端点：

```bash
curl -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  "http://localhost:8080/v1/sandboxes/<sandbox-id>/endpoints/44772?use_server_proxy=true"
```

返回端点会被重写为 server 代理路径：
- `<server-host>/sandboxes/<sandbox-id>/proxy/<port>`

可参考 Compose 运行示例：
- `server/docker-compose.example.yaml`

**安全加固（适用于所有 Docker 模式）**
   ```toml
   [docker]
   # 默认关闭危险能力、防止提权
   drop_capabilities = ["AUDIT_WRITE", "MKNOD", "NET_ADMIN", "NET_RAW", "SYS_ADMIN", "SYS_MODULE", "SYS_PTRACE", "SYS_TIME", "SYS_TTY_CONFIG"]
   no_new_privileges = true
   apparmor_profile = ""        # 例如当 AppArmor 可用时使用 "docker-default"
   # 限制进程数量
   pids_limit = 512             # 设为 null 可关闭
   seccomp_profile = ""        # 配置文件路径或名称；为空使用 Docker 默认
   ```
   更多 Docker 容器安全参考：https://docs.docker.com/engine/security/

常见问题及解决方案请参阅 [故障排查](TROUBLESHOOTING_zh.md)。

**安全容器运行时（可选）**

OpenSandbox 支持安全容器运行时以增强隔离性：

```toml
[secure_runtime]
type = "gvisor"              # 选项: "", "gvisor", "kata", "firecracker"
docker_runtime = "runsc"      # Docker OCI 运行时名称（用于 gVisor、Kata）
# k8s_runtime_class = "gvisor"  # Kubernetes RuntimeClass 名称（用于 K8s）
```

- `type=""`（默认）：不使用安全运行时，使用 runc
- `type="gvisor"`：使用 gVisor (runsc) 实现用户态内核隔离
- `type="kata"`：使用 Kata Containers 实现 VM 级隔离
- `type="firecracker"`：使用 Firecracker 微虚拟机（仅 Kubernetes）

> **详细指南**：参阅 [安全容器运行时指南](../docs/secure-container.md) 获取完整的安装说明、系统要求和故障排除。

**Docker daemon 配置** gVisor 示例：
```json
{
  "runtimes": {
    "runsc": {
      "path": "/usr/bin/runsc"
    }
  }
}
```

**Kubernetes 配置**：使用前需先创建 RuntimeClass：
```bash
kubectl create -f - <<EOF
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: gvisor
handler: runsc
EOF
```

**Ingress 暴露（direct | gateway）**
```toml
[ingress]
mode = "direct"  # Docker 运行时仅支持 direct（直连，无 L7 网关）
# gateway.address = "*.example.com"  # 仅主机（域名/IP 或 IP:port），不允许带 scheme
# gateway.route.mode = "wildcard"            # wildcard | uri | header
```
- `mode=direct`：默认；当 `runtime.type=docker` 时必须使用（客户端与 sandbox 直连，不经过网关）。
- `mode=gateway`：配置外部入口。
  - `gateway.address`：当 `gateway.route.mode=wildcard` 时必须是泛域名；其他模式需为域名/IP 或 IP:port。不允许携带 scheme，客户端自行选择 http/https。
  - `gateway.route.mode`：`wildcard`（域名泛匹配）、`uri`（基于路径前缀）、`header`（基于请求头路由）。
  - 返回示例：
    - `wildcard`：`<sandbox-id>-<port>.example.com/path/to/request`
    - `uri`：`10.0.0.1:8000/<sandbox-id>/<port>/path/to/request`
    - `header`：`gateway.example.com`，请求头 `OpenSandbox-Ingress-To: <sandbox-id>-<port>`

**Kubernetes 运行时**
   ```toml
   [runtime]
   type = "kubernetes"
   execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.7"

   [kubernetes]
   kubeconfig_path = "~/.kube/config"
   namespace = "opensandbox"
   workload_provider = "batchsandbox"        # 或 "agent-sandbox"
   informer_enabled = true                   # Beta：启用 watch 缓存
   informer_resync_seconds = 300             # Beta：全量刷新间隔
   informer_watch_timeout_seconds = 60       # Beta：watch 超时重连间隔
   ```
   - Informer 配置为 **Beta**，默认开启以减少 API 压力；若需关闭设置 `informer_enabled = false`。
   - resync / watch 超时用于控制缓存刷新频率，可根据集群 API 限流调优。

### Egress 配置（`[egress]` 配置块）

**`[egress]`** 用于配置 **egress 侧车** 的镜像与执行模式。仅当创建沙箱的请求中带有 **`networkPolicy`**（出站允许/拒绝规则）时，服务器才会注入该侧车；若请求未带 `networkPolicy`，不会添加 egress 侧车，也不会通过该机制限制出站流量。

#### 配置项

| 键 | 类型 | 默认值 | 何时必填 | 说明 |
|----|------|--------|----------|------|
| `image` | string | — | 任意一次创建请求携带 `networkPolicy` 时 **必填** | 包含 egress 可执行文件的容器镜像；侧车启动前会拉取或校验镜像。 |
| `mode` | `dns` 或 `dns+nft` | `dns` | 否 | 侧车如何执行策略，写入环境变量 `OPENSANDBOX_EGRESS_MODE`（见下）。 |

#### `mode` 取值

- **`dns`**：通过侧车内 DNS 代理做基于域名的策略；不依赖本路径下的 nftables 二层规则。**策略中的 CIDR、静态 IP 类目标不会被强制执行**（若只用 `dns` 模式，请使用域名类规则）。
- **`dns+nft`**：在 `dns` 的基础上启用 nftables（能力与回退行为见 [egress 组件说明](../components/egress/README.md)）。**支持 CIDR 与静态 IP 的放行/拒绝规则**（nftables 表成功下发时生效）。

#### 请求体中的 `networkPolicy`

- 规则在 **`CreateSandboxRequest.networkPolicy`** 中声明（默认动作与有序的 egress 规则：域名/通配符；在使用 **`dns+nft`** 时还可包含 IP 或 CIDR 条目）。
- 序列化后的策略以 JSON 形式注入侧车环境变量 **`OPENSANDBOX_EGRESS_RULES`**。
- 可能同时下发用于 egress HTTP API 的鉴权信息（与运行时行为一致）。

#### Docker 运行时

- 客户端传入 `networkPolicy` 时，配置中必须设置 **`egress.image`**，否则请求会被拒绝。
- 出站策略要求 **`docker.network_mode = "bridge"`**；`network_mode=host` 或与侧车挂载模型不兼容的用户自定义网络下，携带 `networkPolicy` 的请求会被拒绝。
- 主沙箱容器与侧车 **共享网络命名空间**，主容器 **drop `NET_ADMIN`**，由侧车保留 **`NET_ADMIN`** 完成策略相关操作。
- 共享 netns 内会 **禁用 IPv6**，以保证放行/拒绝行为一致。

#### Kubernetes 运行时

- 当请求带有 `networkPolicy` 时，工作负载 Pod 中除主容器外，还会增加基于 **`egress.image`** 的 **egress** 侧车。
- **`egress.image`** 的必填规则与 Docker 相同。

#### 运维说明

- 侧车镜像在启动前拉取或校验；删除、过期、失败等路径会尽量清理侧车。
- DNS 代理、nftables、能力边界等详见仓库内 **`components/egress/`** 文档。

#### 配置示例（`~/.sandbox.toml`）

```toml
[runtime]
type = "docker"
execd_image = "opensandbox/execd:v1.0.7"

[egress]
image = "opensandbox/egress:v1.0.3"
mode = "dns"
```

#### 带 `networkPolicy` 的创建请求示例

```json
{
  "image": {"uri": "python:3.11-slim"},
  "entrypoint": ["python", "-m", "http.server", "8000"],
  "timeout": 3600,
  "resourceLimits": {"cpu": "500m", "memory": "512Mi"},
  "networkPolicy": {
    "defaultAction": "deny",
    "egress": [
      {"action": "allow", "target": "pypi.org"},
      {"action": "allow", "target": "*.python.org"}
    ]
  }
}
```

### 启动服务

使用安装后的 CLI 启动（默认读取 `~/.sandbox.toml`）：

```bash
opensandbox-server
```

服务将在 `http://0.0.0.0:8080`（或您配置的主机/端口）启动。

### 启动服务（安装包方式）

安装为 Python 包后，可直接使用 CLI 启动：

```bash
opensandbox-server --config ~/.sandbox.toml
```

**健康检查**

```bash
curl http://localhost:8080/health
```

预期响应：
```json
{"status": "healthy"}
```

## API 文档

服务启动后，可访问交互式 API 文档：

- **Swagger UI**：[http://localhost:8080/docs](http://localhost:8080/docs)
- **ReDoc**：[http://localhost:8080/redoc](http://localhost:8080/redoc)

### API 认证

仅当 `server.api_key` 设置为非空值时才启用鉴权；当该值为空或缺省时，中间件会跳过 API Key 校验（适合本地/开发调试）。生产环境请务必设置非空的 `server.api_key`，并通过 `OPEN-SANDBOX-API-KEY` 请求头发送。

当鉴权开启时，除 `/health`、`/docs`、`/redoc` 外的 API 端点均需要通过 `OPEN-SANDBOX-API-KEY` 请求头进行认证：

```bash
curl http://localhost:8080/v1/sandboxes
```

### 使用示例

**创建沙箱**

```bash
curl -X POST "http://localhost:8080/v1/sandboxes" \
  -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "image": {
      "uri": "python:3.11-slim"
    },
    "entrypoint": [
      "python",
      "-m",
      "http.server",
      "8000"
    ],
    "timeout": 3600,
    "resourceLimits": {
      "cpu": "500m",
      "memory": "512Mi"
    },
    "env": {
      "PYTHONUNBUFFERED": "1"
    },
    "metadata": {
      "team": "backend",
      "project": "api-testing"
    }
  }'
```

响应：
```json
{
  "id": "a1b2c3d4-5678-90ab-cdef-1234567890ab",
  "status": {
    "state": "Pending",
    "reason": "CONTAINER_STARTING",
    "message": "Sandbox container is starting.",
    "lastTransitionAt": "2024-01-15T10:30:00Z"
  },
  "metadata": {
    "team": "backend",
    "project": "api-testing"
  },
  "expiresAt": "2024-01-15T11:30:00Z",
  "createdAt": "2024-01-15T10:30:00Z",
  "entrypoint": ["python", "-m", "http.server", "8000"]
}
```

**获取沙箱详情**

```bash
curl -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  http://localhost:8080/v1/sandboxes/a1b2c3d4-5678-90ab-cdef-1234567890ab
```

**获取服务端点**

```bash
# 获取自定义服务端点
curl -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  http://localhost:8080/v1/sandboxes/a1b2c3d4-5678-90ab-cdef-1234567890ab/endpoints/8000

# 获取OpenSandbox守护进程（execd）端点
curl -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  http://localhost:8080/v1/sandboxes/a1b2c3d4-5678-90ab-cdef-1234567890ab/endpoints/44772
```

响应：
```json
{
  "endpoint": "sandbox.example.com/a1b2c3d4-5678-90ab-cdef-1234567890ab/8000"
}
```

**续期沙箱**

```bash
curl -X POST "http://localhost:8080/v1/sandboxes/a1b2c3d4-5678-90ab-cdef-1234567890ab/renew-expiration" \
  -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "expiresAt": "2024-01-15T12:30:00Z"
  }'
```

**删除沙箱**

```bash
curl -X DELETE \
  -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \
  http://localhost:8080/v1/sandboxes/a1b2c3d4-5678-90ab-cdef-1234567890ab
```

## 系统架构

### 组件职责

- **API 层**（`src/api/`）：HTTP 请求处理、验证和响应格式化
- **服务层**（`src/services/`）：沙箱生命周期操作的业务逻辑
- **中间件**（`src/middleware/`）：横切关注点（认证、日志）
- **配置**（`src/config.py`）：集中式配置管理
- **运行时实现**：平台特定的沙箱编排

### 沙箱生命周期状态

```
       create()
          │
          ▼
     ┌─────────┐
     │ Pending │────────────────────┐
     └────┬────┘                    │
          │                         │
          │ (provisioning)          │
          ▼                         │
     ┌─────────┐    pause()         │
     │ Running │───────────────┐    │
     └────┬────┘               │    │
          │      resume()      │    │
          │   ┌────────────────┘    │
          │   │                     │
          │   ▼                     │
          │ ┌────────┐              │
          ├─│ Paused │              │
          │ └────────┘              │
          │                         │
          │ delete() or expire()    │
          ▼                         │
     ┌──────────┐                   │
     │ Stopping │                   │
     └────┬─────┘                   │
          │                         │
          ├────────────────┬────────┘
          │                │
          ▼                ▼
     ┌────────────┐   ┌────────┐
     │ Terminated │   │ Failed │
     └────────────┘   └────────┘
```

## 配置参考

### 服务器配置

| 键 | 类型 | 默认值 | 描述 |
|----|------|--------|------|
| `server.host` | string | `"0.0.0.0"` | 绑定的网络接口 |
| `server.port` | integer | `8080` | 监听端口 |
| `server.log_level` | string | `"INFO"` | Python 日志级别 |
| `server.api_key` | string | `null` | API 认证密钥 |
| `server.eip` | string | `null` | 绑定的公网 IP；配置后，返回 sandbox endpoint 时作为地址的 host 部分（Docker 运行时） |

### 运行时配置

| 键                      | 类型     | 必需 | 描述                                 |
|------------------------|--------|----|------------------------------------|
| `runtime.type`         | string | 是  | 运行时实现（`"docker"` 或 `"kubernetes"`） |
| `runtime.execd_image`  | string | 是  | 包含 execd 二进制文件的容器镜像                |

### Egress 配置

| 键 | 类型 | 默认值 | 使用 `networkPolicy` 时是否必填 | 说明 |
|----|------|--------|--------------------------------|------|
| `egress.image` | string | — | 是 | Egress 侧车镜像（OCI 引用）。 |
| `egress.mode` | `dns` \| `dns+nft` | `dns` | 否 | `OPENSANDBOX_EGRESS_MODE`。CIDR/IP 类规则需 `dns+nft`；`dns` 仅面向域名类策略。 |

### Docker 配置

| 键 | 类型 | 默认值 | 描述 |
|----|------|--------|------|
| `docker.network_mode` | string | `"host"` | 网络模式（`"host"` 或 `"bridge"`）|

### Agent-sandbox 配置

| 键 | 类型 | 默认值 | 描述 |
|----|------|--------|------|
| `agent_sandbox.template_file` | string | `null` | agent-sandbox 的 Sandbox CR YAML 模板路径（仅在 `kubernetes.workload_provider = "agent-sandbox"` 时使用） |
| `agent_sandbox.shutdown_policy` | string | `"Delete"` | 过期时的关停策略（`"Delete"` 或 `"Retain"`） |
| `agent_sandbox.ingress_enabled` | boolean | `true` | 是否启用 ingress 路由 |

### 环境变量

| 变量 | 描述 |
|------|------|
| `SANDBOX_CONFIG_PATH` | 覆盖配置文件位置 |
| `DOCKER_HOST` | Docker 守护进程 URL（例如 `unix:///var/run/docker.sock`）|
| `PENDING_FAILURE_TTL` | 失败的待处理沙箱的 TTL（秒，默认：3600）|

## 开发

### 代码质量

**运行代码检查**：
```bash
uv run ruff check
```

**自动修复问题**：
```bash
uv run ruff check --fix
```

**格式化代码**：
```bash
uv run ruff format
```

### 测试

**运行所有测试**：
```bash
uv run pytest
```

**带覆盖率运行**：
```bash
uv run pytest --cov=src --cov-report=html
```

**运行特定测试**：
```bash
uv run pytest tests/test_docker_service.py::test_create_sandbox_requires_entrypoint
```

## 许可证

本项目遵循仓库根目录下的 LICENSE 文件条款。

## 贡献

欢迎提交改进，建议遵循以下流程：

1. Fork 仓库
2. 创建特性分支（`git checkout -b feature/amazing-feature`）
3. 为新功能编写测试
4. 确保所有测试通过（`uv run pytest`）
5. 运行代码检查（`uv run ruff check`）
6. 使用清晰的消息提交
7. 推送到您的 fork
8. 打开 Pull Request

## 支持

- 文档：参阅 `DEVELOPMENT.md` 获取开发指南
- 问题报告：通过 GitHub Issues 报告缺陷
- 讨论：在 GitHub Discussions 进行答疑与交流


================================================
FILE: server/TROUBLESHOOTING.md
================================================
# Troubleshooting

English | [中文](TROUBLESHOOTING_zh.md)

## `exec /opt/opensandbox/bootstrap.sh: operation not permitted`

If sandbox logs show:

```text
exec /opt/opensandbox/bootstrap.sh: operation not permitted
```

check the following first:

1. Verify the script exists and is executable inside the sandbox container:
   ```bash
   docker exec -it <sandbox-container> ls -l /opt/opensandbox/bootstrap.sh
   ```
2. Verify runtime security/mount constraints are not blocking execution (for example strict
   confinement or `noexec` mount behavior in host/container runtime setup).
3. If you are running Docker from Snap-based environments (for example Ubuntu Core), prefer
   Docker CE package deployments for production OpenSandbox workloads, because strict runtime
   confinement may block this bootstrap execution path in some setups.
4. Re-run with the latest server and execd images to ensure you include the latest runtime fixes.

If this still reproduces, collect:
- `docker info`
- `docker logs opensandbox-server`
- `docker logs <sandbox-container>`
- your `config.toml` (mask secrets)

## Sandbox health check timed out (e.g. on Alibaba Cloud ECS)

If the client reports:

```text
opensandbox.exceptions.sandbox.SandboxReadyTimeoutException: Sandbox health check timed out after 30.0s (2 attempts). Health check returned false continuously
```

when the server runs on a cloud VM (e.g. [Alibaba Cloud ECS](https://github.com/alibaba/OpenSandbox/issues/297)), the client is likely trying to reach the sandbox at an address it cannot access. The server may be returning a bind address such as `127.0.0.1` or an internal LAN IP in the endpoint URL, so the health check from the client side fails.

**Solution:** Set the bound public IP so that the server returns a reachable address in the sandbox endpoint API. In your config (e.g. `~/.sandbox.toml`), under `[server]`, set `eip` to the VM’s public IP (or the hostname that clients use to reach the server):

```toml
[server]
host = "0.0.0.0"
port = 8080
eip = "47.x.x.x"   # Your ECS public IP, or the hostname clients use to reach this server
```

After restarting the server, the get-endpoint API will use `eip` as the host part of the returned URL, so the client can reach the sandbox for the health check. This applies to the Docker runtime; the server skips resolving `host` when `eip` is set.


================================================
FILE: server/TROUBLESHOOTING_zh.md
================================================
# 故障排查

[English](TROUBLESHOOTING.md) | 中文

## `exec /opt/opensandbox/bootstrap.sh: operation not permitted`

如果沙箱日志出现：

```text
exec /opt/opensandbox/bootstrap.sh: operation not permitted
```

建议先检查：

1. 确认脚本在沙箱容器内存在且可执行：
   ```bash
   docker exec -it <sandbox-container> ls -l /opt/opensandbox/bootstrap.sh
   ```
2. 检查运行时安全策略和挂载约束是否阻止执行（例如严格沙箱约束或 `noexec` 挂载行为）。
3. 如果使用 Snap 版本 Docker（如 Ubuntu Core 场景），生产环境建议优先使用 Docker CE 安装方式，因为部分严格约束环境会影响该 bootstrap 执行路径。
4. 升级并复现：使用最新 server / execd 镜像确认是否已包含修复。

如果仍可复现，建议附带以下信息提 issue：
- `docker info`
- `docker logs opensandbox-server`
- `docker logs <sandbox-container>`
- `config.toml`（注意脱敏）

## 沙箱健康检查超时（如阿里云 ECS）

若服务端部署在云主机（例如 [阿里云 ECS](https://github.com/alibaba/OpenSandbox/issues/297)），客户端创建沙箱时出现：

```text
opensandbox.exceptions.sandbox.SandboxReadyTimeoutException: Sandbox health check timed out after 30.0s (2 attempts). Health check returned false continuously
```

通常是因为服务端返回的 endpoint 地址（如 `127.0.0.1` 或内网 IP）对客户端不可达，客户端无法完成健康检查。

**解决办法：** 配置绑定的公网 IP，让服务端在返回 sandbox endpoint 时使用客户端可访问的地址。在配置文件（如 `~/.sandbox.toml`）的 `[server]` 下设置 `eip` 为云主机的公网 IP（或客户端访问该服务时使用的主机名）：

```toml
[server]
host = "0.0.0.0"
port = 8080
eip = "47.x.x.x"   # 你的 ECS 公网 IP，或客户端用来访问本机的主机名
```

重启服务后，获取 endpoint 的 API 会使用 `eip` 作为返回地址的 host 部分，客户端即可连通沙箱并通过健康检查。该行为针对 Docker 运行时；配置了 `eip` 后，服务端将不再根据 `host` 解析地址。


================================================
FILE: server/build.sh
================================================
#!/bin/bash
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

TAG=${TAG:-latest}

docker buildx rm server-builder || true

docker buildx create --use --name server-builder

docker buildx inspect --bootstrap

docker buildx ls

LATEST_TAGS=()
if [[ "${TAG}" == v* ]]; then
  LATEST_TAGS+=(-t opensandbox/server:latest -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/server:latest)
fi

docker buildx build \
  -t opensandbox/server:${TAG} \
  -t sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/server:${TAG} \
  "${LATEST_TAGS[@]}" \
  --platform linux/amd64,linux/arm64 \
  --push \
  .

================================================
FILE: server/docker-compose.example.yaml
================================================
configs:
  opensandbox-config:
    content: |
      [server]
      host = "0.0.0.0"
      port = 8090
      log_level = "INFO"

      [runtime]
      type = "docker"
      # execd_image = "opensandbox/execd:v1.0.7"
      execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.7"

      [egress]
      image = "opensandbox/egress:v1.0.3"
      # image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:v1.0.3"

      [docker]
      network_mode = "bridge"
      # When server runs in a container, set host_ip to the host's IP or hostname so bridge-mode endpoints are reachable (e.g. host.docker.internal or the host LAN IP).
      # It's required when server deployed with docker container under host.
      host_ip = "host.docker.internal"
      drop_capabilities = ["AUDIT_WRITE", "MKNOD", "NET_ADMIN", "NET_RAW", "SYS_ADMIN", "SYS_MODULE", "SYS_PTRACE", "SYS_TIME", "SYS_TTY_CONFIG"]
      no_new_privileges = true
      # TODO: For production environments, it is recommended to set this to '4096' or higher to avoid
      # "can't start new thread" errors when multiple sandboxes are running concurrently.
      # See: https://github.com/alibaba/OpenSandbox/issues/447
      pids_limit = 4096

      [ingress]
      mode = "direct"

version: '3.8'

services:
  opensandbox-server:
    image: opensandbox/server:latest
    container_name: opensandbox-server
    networks:
      - opensandbox-net
    ports:
      - "8090:8090"
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
    configs:
      - source: opensandbox-config
        target: /etc/opensandbox/config.toml
    environment:
      - SANDBOX_CONFIG_PATH=/etc/opensandbox/config.toml

  sdk-client:
    image: python:3.11-slim
    container_name: sdk-client
    networks:
      - opensandbox-net
    command: >
      sh -c "pip install opensandbox && tail -f /dev/null"
    environment:
      - OPENSANDBOX_SERVER_URL=http://opensandbox-server:8090

networks:
  opensandbox-net:
    driver: bridge

================================================
FILE: server/example.batchsandbox-template.yaml
================================================
# Example BatchSandbox CR template for OpenSandbox Kubernetes runtime
# This is a complete BatchSandbox CR template that will be merged with runtime values
#
# Usage in config.toml:
#   [kubernetes]
#   batchsandbox_template_file = "/path/to/this/file.yaml"

# Metadata template (will be merged with runtime-generated metadata)
metadata:
# Spec template
spec:
  replicas: 1
  # Pod template specification
  template:
    spec:
      restartPolicy: Never
      tolerations:
        - operator: "Exists"


================================================
FILE: server/example.config.k8s.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Example Kubernetes Runtime Configuration for OpenSandbox Server
#
# This configuration file demonstrates how to configure the OpenSandbox server
# to use Kubernetes as the sandbox runtime.
#
# Usage:
#   1. Copy this file to ~/.sandbox.toml (or set SANDBOX_CONFIG_PATH environment variable)
#   2. Update the configuration values according to your environment
#   3. Start the server: uvicorn src.main:app --host 0.0.0.0 --port 8080

[server]
host = "0.0.0.0"
port = 8080
log_level = "INFO"
# api_key = "your-secret-api-key"  # Optional: Uncomment to enable API key authentication

[runtime]
type = "kubernetes"
execd_image = "opensandbox/execd:v1.0.7"

[storage]
# Volume and storage configuration
# -----------------------------------------------------------------
# Allowlist of host path prefixes permitted for bind mounts.
# If empty, all host paths are allowed (not recommended for production).
# Example: allowed_host_paths = ["/data/opensandbox", "/tmp/sandbox"]
allowed_host_paths = []

[kubernetes]
# Path to kubeconfig file. Leave as null to use in-cluster configuration
# Replace with your path
kubeconfig_path = "~/.kube/config"

# Namespace for sandbox workloads
namespace = "opensandbox"

# [Beta] Enable informer-backed cache to reduce API calls.
# Set to false to disable the watch-based cache.
informer_enabled = true
informer_resync_seconds = 300
informer_watch_timeout_seconds = 60

# Workload provider type: available providers are registered in the provider factory
# If not specified, uses the first registered provider (typically "batchsandbox")
workload_provider = "batchsandbox"

# Path to the BatchSandbox template file
# Replace with your path
batchsandbox_template_file = "~/batchsandbox-template.yaml"

[ingress]
# Ingress exposure mode: direct (default) or gateway
mode = "direct"

[egress]
# Egress configuration
# -----------------------------------------------------------------
image = "opensandbox/egress:v1.0.3"
# Enforcement: "dns" (DNS proxy only) or "dns+nft" (nftables + DNS).
mode = "dns"


================================================
FILE: server/example.config.k8s.zh.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Example Kubernetes Runtime Configuration for OpenSandbox Server
#
# This configuration file demonstrates how to configure the OpenSandbox server
# to use Kubernetes as the sandbox runtime.
#
# Usage:
#   1. Copy this file to ~/.sandbox.toml (or set SANDBOX_CONFIG_PATH environment variable)
#   2. Update the configuration values according to your environment
#   3. Start the server: uvicorn src.main:app --host 0.0.0.0 --port 8080

[server]
host = "0.0.0.0"
port = 8080
log_level = "INFO"
# api_key = "your-secret-api-key"  # Optional: Uncomment to enable API key authentication

[runtime]
type = "kubernetes"
execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.7"

[storage]
# 卷存储配置
# -----------------------------------------------------------------
# 允许进行 bind mount 的宿主机路径前缀白名单。
# 仅匹配这些前缀的路径才能被挂载到沙箱中。
# 如果为空，则允许所有路径（不建议在生产环境使用）。
# 示例：allowed_host_paths = ["/data/opensandbox", "/tmp/sandbox"]
allowed_host_paths = []

[kubernetes]
# Path to kubeconfig file. Leave as null to use in-cluster configuration
# Replace with your path
kubeconfig_path = "~/.kube/config"

# Namespace for sandbox workloads
namespace = "opensandbox"

# [Beta] 启用 informer 缓存以减少 API 调用。
# 如需关闭 watch 缓存，将该项设为 false。
informer_enabled = true
informer_resync_seconds = 300
informer_watch_timeout_seconds = 60

# Workload provider type: available providers are registered in the provider factory
# If not specified, uses the first registered provider (typically "batchsandbox")
workload_provider = "batchsandbox"

# Path to the BatchSandbox template file
# Replace with your path
batchsandbox_template_file = "~/batchsandbox-template.yaml"

[ingress]
# Ingress exposure mode: direct (default) or gateway
mode = "direct"

[egress]
# Egress configuration
# -----------------------------------------------------------------
image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:v1.0.3"
# Enforcement: "dns" (DNS proxy only) or "dns+nft" (nftables + DNS).
mode = "dns"


================================================
FILE: server/example.config.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Example OpenSandbox configuration.
# Copy this file to ~/.sandbox.toml or set SANDBOX_CONFIG_PATH to point at it.
# Each top-level block mirrors the sections supported by src/config.py.

[server]
# Lifecycle API host/port and logging settings
# -----------------------------------------------------------------
host = "127.0.0.1"
port = 8080
log_level = "INFO"
# api_key = "your-secret-api-key"  # Optional: Uncomment to enable API key authentication
# eip = "1.2.3.4"  # Optional: External IP/hostname for endpoint URLs when returning sandbox endpoints
# Maximum TTL for sandboxes that specify timeout. Comment out this line to disable the upper bound.
max_sandbox_timeout_seconds = 86400

[runtime]
# Runtime selection (docker | kubernetes)
# -----------------------------------------------------------------
type = "docker"
execd_image = "opensandbox/execd:v1.0.7"

[egress]
# Egress configuration
# -----------------------------------------------------------------
image = "opensandbox/egress:v1.0.3"
# Enforcement: "dns" (DNS proxy only) or "dns+nft" (nftables + DNS).
mode = "dns"

[storage]
# Volume and storage configuration
# -----------------------------------------------------------------
# Allowlist of host path prefixes permitted for bind mounts.
# If empty, all host paths are allowed (not recommended for production).
# Example: allowed_host_paths = ["/data/opensandbox", "/tmp/sandbox"]
allowed_host_paths = []

[docker]
# Docker-specific knobs
# -----------------------------------------------------------------
# Use bridge for network isolation
network_mode = "bridge"
# Docker API timeout (seconds). If unset, default 180
# api_timeout = 300
# When server runs in a container, host IP/hostname for bridge-mode endpoints
# host_ip = "10.57.1.91"
# Drop dangerous capabilities and block privilege escalation
drop_capabilities = ["AUDIT_WRITE", "MKNOD", "NET_ADMIN", "NET_RAW", "SYS_ADMIN", "SYS_MODULE", "SYS_PTRACE", "SYS_TIME", "SYS_TTY_CONFIG"]
no_new_privileges = true
# Optional: set an AppArmor profile name (e.g., "docker-default") when AppArmor is enabled
apparmor_profile = ""
# Limit process count to reduce host impact from fork bombs; set to null to disable
# TODO: For production environments, it is recommended to set this to '4096' or higher to avoid
# "can't start new thread" errors when multiple sandboxes are running concurrently.
# See: https://github.com/alibaba/OpenSandbox/issues/447
pids_limit = 4096
# Seccomp profile: empty string uses Docker default; set to an absolute path for a custom profile
seccomp_profile = ""

[ingress]
# Ingress exposure mode: direct (default) or gateway
mode = "direct"


================================================
FILE: server/example.config.zh.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Example OpenSandbox configuration.
# Copy this file to ~/.sandbox.toml or set SANDBOX_CONFIG_PATH to point at it.
# Each top-level block mirrors the sections supported by src/config.py.

[server]
# Lifecycle API host/port and logging settings
# -----------------------------------------------------------------
host = "127.0.0.1"
port = 8080
log_level = "INFO"
# api_key = "your-secret-api-key"  # Optional: Uncomment to enable API key authentication

[runtime]
# Runtime selection (docker | kubernetes)
# -----------------------------------------------------------------
type = "docker"
execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.7"

[egress]
# Egress configuration
# -----------------------------------------------------------------
image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:v1.0.3"
# Enforcement: "dns" (DNS proxy only) or "dns+nft" (nftables + DNS).
mode = "dns"

[storage]
# 卷存储配置
# -----------------------------------------------------------------
# 允许进行 bind mount 的宿主机路径前缀白名单。
# 仅匹配这些前缀的路径才能被挂载到沙箱中。
# 如果为空，则允许所有路径（不建议在生产环境使用）。
# 示例：allowed_host_paths = ["/data/opensandbox", "/tmp/sandbox"]
allowed_host_paths = []

[docker]
# Docker-specific knobs
# -----------------------------------------------------------------
# Supported values for network_mode: "host", "bridge"
network_mode = "bridge"
# Drop dangerous capabilities and block privilege escalation
drop_capabilities = ["AUDIT_WRITE", "MKNOD", "NET_ADMIN", "NET_RAW", "SYS_ADMIN", "SYS_MODULE", "SYS_PTRACE", "SYS_TIME", "SYS_TTY_CONFIG"]
no_new_privileges = true
# Optional: set an AppArmor profile name (e.g., "docker-default") when AppArmor is enabled
apparmor_profile = ""
# Limit process count to reduce host impact from fork bombs; set to null to disable
# TODO: 生产环境建议设置为 4096 或更高，避免多沙箱并发时出现 "can't start new thread" 错误
# See: https://github.com/alibaba/OpenSandbox/issues/447
pids_limit = 4096
# Seccomp profile: empty string uses Docker default; set to an absolute path for a custom profile
seccomp_profile = ""

[ingress]
# Ingress exposure mode: direct (default) or gateway
mode = "direct"


================================================
FILE: server/pyproject.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[build-system]
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"

[project]
name = "opensandbox-server"
dynamic = ["version"]
description = "FastAPI control plane for OpenSandbox that manages sandbox lifecycle on Docker (ready) and Kubernetes (planned) runtimes."
readme = "README.md"
authors = [
    { name = "OpenSandbox Team", email = "pangjiping.pjp@alibaba-inc.com" }
]
license = { text = "Apache-2.0" }
requires-python = ">=3.10"
keywords = ["sandbox", "server", "control-plane", "fastapi", "opensandbox"]
classifiers = [
    "Development Status :: 3 - Alpha",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: Apache Software License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3 :: Only",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Topic :: Software Development :: Libraries",
    "Typing :: Typed",
]
dependencies = [
    "docker",
    "fastapi",
    "httpx[socks]",
    "kubernetes",
    "pydantic",
    "pydantic-settings",
    "pyyaml",
    "tomli; python_version < \"3.11\"",
    "uvicorn",
]

[project.urls]
Homepage = "https://github.com/alibaba/OpenSandbox"
Repository = "https://github.com/alibaba/OpenSandbox"
Issues = "https://github.com/alibaba/OpenSandbox/issues"

[project.scripts]
opensandbox-server = "src.cli:main"

[tool.hatch.version]
source = "vcs"

[tool.hatch.version.raw-options]
# This package is in a subdirectory; explicitly point setuptools-scm at the git root.
root = ".."
tag_regex = "^server/v(?P<version>\\d+\\.\\d+\\.\\d+(?:[\\.\\w\\+\\-]*)?)$"
git_describe_command = 'git describe --dirty --tags --long --match "server/v*"'
fallback_version = "0.1.0.dev0"

[tool.hatch.build]
include = [
    "LICENSE",
    "example.config.toml",
    "example.config.zh.toml",
    "example.config.k8s.toml",
    "example.config.k8s.zh.toml",
    "example.batchsandbox-template.yaml",
    "src/**/py.typed",
    "src",
]

[tool.hatch.build.targets.wheel]
packages = ["src"]

[tool.hatch.build.targets.wheel.force-include]
"example.config.toml" = "example.config.toml"
"example.config.zh.toml" = "example.config.zh.toml"
"example.config.k8s.toml" = "example.config.k8s.toml"
"example.config.k8s.zh.toml" = "example.config.k8s.zh.toml"
"example.batchsandbox-template.yaml" = "example.batchsandbox-template.yaml"

[dependency-groups]
dev = [
    "pytest>=7.0.0",
    "pytest-asyncio>=0.21.0",
    "pytest-cov>=4.0.0",
    "ruff>=0.14.8",
    "pyright>=1.1.0",
]

[tool.ruff]
target-version = "py310"
line-length = 100
src = ["src", "tests"]

[tool.ruff.lint]
select = ["E4", "E7", "E9", "F"]

[tool.pyright]
typeCheckingMode = "standard"
pythonVersion = "3.10"
pythonPlatform = "All"

include = ["src", "tests"]

exclude = [
    "**/node_modules",
    "**/__pycache__",
]

venvPath = "."
venv = ".venv"

reportMissingImports = true
reportMissingTypeStubs = false


================================================
FILE: server/src/__init__.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

def hello() -> str:
    return "Hello from sandbox-server!"


================================================
FILE: server/src/api/__init__.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: server/src/api/lifecycle.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
API routes for OpenSandbox Lifecycle API.

This module defines FastAPI routes that map to the OpenAPI specification endpoints.
All business logic is delegated to the service layer that backs each operation.
"""

from typing import List, Optional

import httpx
from fastapi import APIRouter, Header, Query, Request, status
from fastapi.exceptions import HTTPException
from fastapi.responses import Response, StreamingResponse

from src.api.schema import (
    CreateSandboxRequest,
    CreateSandboxResponse,
    Endpoint,
    ErrorResponse,
    ListSandboxesRequest,
    ListSandboxesResponse,
    PaginationRequest,
    RenewSandboxExpirationRequest,
    RenewSandboxExpirationResponse,
    Sandbox,
    SandboxFilter,
)
from src.services.factory import create_sandbox_service

# RFC 2616 Section 13.5.1
HOP_BY_HOP_HEADERS = {
    "connection",
    "keep-alive",
    "proxy-authenticate",
    "proxy-authorization",
    "te",
    "trailer",
    "transfer-encoding",
    "upgrade",
}

# Headers that shouldn't be forwarded to untrusted/internal backends
SENSITIVE_HEADERS = {
    "authorization",
    "cookie",
}

# Initialize router
router = APIRouter(tags=["Sandboxes"])

# Initialize service based on configuration from config.toml (defaults to docker)
sandbox_service = create_sandbox_service()


# ============================================================================
# Sandbox CRUD Operations
# ============================================================================

@router.post(
    "/sandboxes",
    response_model=CreateSandboxResponse,
    status_code=status.HTTP_202_ACCEPTED,
    responses={
        202: {"description": "Sandbox creation accepted for asynchronous provisioning"},
        400: {"model": ErrorResponse, "description": "The request was invalid or malformed"},
        401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"},
        409: {"model": ErrorResponse, "description": "The operation conflicts with the current state"},
        500: {"model": ErrorResponse, "description": "An unexpected server error occurred"},
    },
)
async def create_sandbox(
    request: CreateSandboxRequest,
    x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"),
) -> CreateSandboxResponse:
    """
    Create a sandbox from a container image.

    Creates a new sandbox from a container image with optional resource limits,
    environment variables, and metadata. Sandboxes are provisioned directly from
    the specified image without requiring a pre-created template.

    Args:
        request: Sandbox creation request
        x_request_id: Unique request identifier for tracing (optional; server generates if omitted).

    Returns:
        CreateSandboxResponse: Accepted sandbox creation request

    Raises:
        HTTPException: If sandbox creation scheduling fails
    """

    return await sandbox_service.create_sandbox(request)


# Search endpoint
@router.get(
    "/sandboxes",
    response_model=ListSandboxesResponse,
    responses={
        200: {"description": "Paginated collection of sandboxes"},
        400: {"model": ErrorResponse, "description": "The request was invalid or malformed"},
        401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"},
        500: {"model": ErrorResponse, "description": "An unexpected server error occurred"},
    },
)
async def list_sandboxes(
    state: Optional[List[str]] = Query(None, description="Filter by lifecycle state. Pass multiple times for OR logic."),
    metadata: Optional[str] = Query(None, description="Arbitrary metadata key-value pairs for filtering (URL encoded)."),
    page: int = Query(1, ge=1, description="Page number for pagination"),
    page_size: int = Query(20, ge=1, le=200, alias="pageSize", description="Number of items per page"),
    x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"),
) -> ListSandboxesResponse:
    """
    List sandboxes with optional filtering and pagination.

    List all sandboxes with optional filtering and pagination using query parameters.
    All filter conditions use AND logic. Multiple `state` parameters use OR logic within states.

    Args:
        state: Filter by lifecycle state.
        metadata: Arbitrary metadata key-value pairs for filtering.
        page: Page number for pagination.
        page_size: Number of items per page.
        x_request_id: Unique request identifier for tracing (optional; server generates if omitted).

    Returns:
        ListSandboxesResponse: Paginated list of sandboxes
    """
    # Parse metadata query string into dictionary
    metadata_dict = {}
    if metadata:
        from urllib.parse import parse_qsl
        try:
            # Parse query string format: key=value&key2=value2
            # strict_parsing=True rejects malformed segments like "a=1&broken"
            parsed = parse_qsl(metadata, keep_blank_values=True, strict_parsing=True)
            metadata_dict = dict(parsed)
        except Exception as e:
            from fastapi import HTTPException
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={"code": "INVALID_METADATA_FORMAT", "message": f"Invalid metadata format: {str(e)}"}
            )

    # Construct request object
    request = ListSandboxesRequest(
        filter=SandboxFilter(state=state, metadata=metadata_dict if metadata_dict else None),
        pagination=PaginationRequest(page=page, pageSize=page_size)
    )

    import logging
    logger = logging.getLogger(__name__)
    logger.info("ListSandboxes: %s", request.filter)

    # Delegate to the service layer for filtering and pagination
    return sandbox_service.list_sandboxes(request)


@router.get(
    "/sandboxes/{sandbox_id}",
    response_model=Sandbox,
    responses={
        200: {"description": "Sandbox current state and metadata"},
        401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"},
        403: {"model": ErrorResponse, "description": "The authenticated user lacks permission for this operation"},
        404: {"model": ErrorResponse, "description": "The requested resource does not exist"},
        500: {"model": ErrorResponse, "description": "An unexpected server error occurred"},
    },
)
async def get_sandbox(
    sandbox_id: str,
    x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"),
) -> Sandbox:
    """
    Fetch a sandbox by id.

    Returns the complete sandbox information including image specification,
    status, metadata, and timestamps.

    Args:
        sandbox_id: Unique sandbox identifier
        x_request_id: Unique request identifier for tracing (optional; server generates if omitted).

    Returns:
        Sandbox: Complete sandbox information

    Raises:
        HTTPException: If sandbox not found or access denied
    """
    # Delegate to the service layer for sandbox lookup
    return sandbox_service.get_sandbox(sandbox_id)


@router.delete(
    "/sandboxes/{sandbox_id}",
    status_code=status.HTTP_204_NO_CONTENT,
    responses={
        204: {"description": "Sandbox successfully deleted"},
        401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"},
        403: {"model": ErrorResponse, "description": "The authenticated user lacks permission for this operation"},
        404: {"model": ErrorResponse, "description": "The requested resource does not exist"},
        409: {"model": ErrorResponse, "description": "The operation conflicts with the current state"},
        500: {"model": ErrorResponse, "description": "An unexpected server error occurred"},
    },
)
async def delete_sandbox(
    sandbox_id: str,
    x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"),
) -> Response:
    """
    Delete a sandbox.

    Terminates sandbox execution. The sandbox will transition through Stopping state to Terminated.

    Args:
        sandbox_id: Unique sandbox identifier
        x_request_id: Unique request identifier for tracing (optional; server generates if omitted).

    Returns:
        Response: 204 No Content

    Raises:
        HTTPException: If sandbox not found or deletion fails
    """
    # Delegate to the service layer for deletion
    sandbox_service.delete_sandbox(sandbox_id)
    return Response(status_code=status.HTTP_204_NO_CONTENT)


# ============================================================================
# Sandbox Lifecycle Operations
# ============================================================================

@router.post(
    "/sandboxes/{sandbox_id}/pause",
    status_code=status.HTTP_202_ACCEPTED,
    responses={
        202: {"description": "Pause operation accepted"},
        401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"},
        403: {"model": ErrorResponse, "description": "The authenticated user lacks permission for this operation"},
        404: {"model": ErrorResponse, "description": "The requested resource does not exist"},
        409: {"model": ErrorResponse, "description": "The operation conflicts with the current state"},
        500: {"model": ErrorResponse, "description": "An unexpected server error occurred"},
    },
)
async def pause_sandbox(
    sandbox_id: str,
    x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"),
) -> Response:
    """
    Pause execution while retaining state.

    Pauses a running sandbox while preserving its state.
    Poll GET /sandboxes/{sandboxId} to track state transition to Paused.

    Args:
        sandbox_id: Unique sandbox identifier
        x_request_id: Unique request identifier for tracing (optional; server generates if omitted).

    Returns:
        Response: 202 Accepted

    Raises:
        HTTPException: If sandbox not found or cannot be paused
    """
    # Delegate to the service layer for pause orchestration
    sandbox_service.pause_sandbox(sandbox_id)
    return Response(status_code=status.HTTP_202_ACCEPTED)


@router.post(
    "/sandboxes/{sandbox_id}/resume",
    status_code=status.HTTP_202_ACCEPTED,
    responses={
        202: {"description": "Resume operation accepted"},
        401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"},
        403: {"model": ErrorResponse, "description": "The authenticated user lacks permission for this operation"},
        404: {"model": ErrorResponse, "description": "The requested resource does not exist"},
        409: {"model": ErrorResponse, "description": "The operation conflicts with the current state"},
        500: {"model": ErrorResponse, "description": "An unexpected server error occurred"},
    },
)
async def resume_sandbox(
    sandbox_id: str,
    x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"),
) -> Response:
    """
    Resume a paused sandbox.

    Resumes execution of a paused sandbox.
    Poll GET /sandboxes/{sandboxId} to track state transition to Running.

    Args:
        sandbox_id: Unique sandbox identifier
        x_request_id: Unique request identifier for tracing (optional; server generates if omitted).

    Returns:
        Response: 202 Accepted

    Raises:
        HTTPException: If sandbox not found or cannot be resumed
    """
    # Delegate to the service layer for resume orchestration
    sandbox_service.resume_sandbox(sandbox_id)
    return Response(status_code=status.HTTP_202_ACCEPTED)


@router.post(
    "/sandboxes/{sandbox_id}/renew-expiration",
    response_model=RenewSandboxExpirationResponse,
    response_model_exclude_none=True,
    responses={
        200: {"description": "Sandbox expiration updated successfully"},
        400: {"model": ErrorResponse, "description": "The request was invalid or malformed"},
        401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"},
        403: {"model": ErrorResponse, "description": "The authenticated user lacks permission for this operation"},
        404: {"model": ErrorResponse, "description": "The requested resource does not exist"},
        409: {"model": ErrorResponse, "description": "The operation conflicts with the current state"},
        500: {"model": ErrorResponse, "description": "An unexpected server error occurred"},
    },
)
async def renew_sandbox_expiration(
    sandbox_id: str,
    request: RenewSandboxExpirationRequest,
    x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"),
) -> RenewSandboxExpirationResponse:
    """
    Renew sandbox expiration.

    Renews the absolute expiration time of a sandbox.
    The new expiration time must be in the future and after the current expiresAt time.

    Args:
        sandbox_id: Unique sandbox identifier
        request: Renewal request with new expiration time
        x_request_id: Unique request identifier for tracing (optional; server generates if omitted).

    Returns:
        RenewSandboxExpirationResponse: Updated expiration time

    Raises:
        HTTPException: If sandbox not found or renewal fails
    """
    # Delegate to the service layer for expiration updates
    return sandbox_service.renew_expiration(sandbox_id, request)


# ============================================================================
# Sandbox Endpoints
# ============================================================================

@router.get(
    "/sandboxes/{sandbox_id}/endpoints/{port}",
    response_model=Endpoint,
    response_model_exclude_none=True,
    responses={
        200: {"description": "Endpoint retrieved successfully"},
        401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"},
        403: {"model": ErrorResponse, "description": "The authenticated user lacks permission for this operation"},
        404: {"model": ErrorResponse, "description": "The requested resource does not exist"},
        500: {"model": ErrorResponse, "description": "An unexpected server error occurred"},
    },
)
async def get_sandbox_endpoint(
    request: Request,
    sandbox_id: str,
    port: int,
    use_server_proxy: bool = Query(False, description="Whether to return a server-proxied URL"),
    x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"),
) -> Endpoint:
    """
    Get sandbox access endpoint.

    Returns the public access endpoint URL for accessing a service running on a specific port
    within the sandbox. The service must be listening on the specified port inside the sandbox
    for the endpoint to be available.

    Args:
        request: FastAPI request object
        sandbox_id: Unique sandbox identifier
        port: Port number where the service is listening inside the sandbox (1-65535)
        use_server_proxy: Whether to return a server-proxied URL
        x_request_id: Unique request identifier for tracing (optional; server generates if omitted).

    Returns:
        Endpoint: Public endpoint URL

    Raises:
        HTTPException: If sandbox not found or endpoint not available
    """
    # Delegate to the service layer for endpoint resolution
    endpoint = sandbox_service.get_endpoint(sandbox_id, port)

    if use_server_proxy:
        # Construct proxy URL
        base_url = str(request.base_url).rstrip("/")
        base_url = base_url.replace("https://", "").replace("http://", "")
        endpoint.endpoint = f"{base_url}/sandboxes/{sandbox_id}/proxy/{port}"

    return endpoint


@router.api_route(
    "/sandboxes/{sandbox_id}/proxy/{port}/{full_path:path}",
    methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
)
async def proxy_sandbox_endpoint_request(request: Request, sandbox_id: str, port: int, full_path: str):
    """
    Receives all incoming requests, determines the target sandbox from path parameter,
    and asynchronously proxies the request to it.
    """

    endpoint = sandbox_service.get_endpoint(sandbox_id, port, resolve_internal=True)

    target_host = endpoint.endpoint
    query_string = request.url.query

    client: httpx.AsyncClient = request.app.state.http_client

    try:
        upgrade_header = request.headers.get("Upgrade", "")
        if upgrade_header.lower() == "websocket":
            raise HTTPException(status_code=400, detail="Websocket upgrade is not supported yet")

        # Filter headers
        hop_by_hop = set(HOP_BY_HOP_HEADERS)
        connection_header = request.headers.get("connection")
        if connection_header:
            hop_by_hop.update(
                header.strip().lower()
                for header in connection_header.split(",")
                if header.strip()
            )
        headers = {}
        for key, value in request.headers.items():
            key_lower = key.lower()
            if (
                key_lower != "host"
                and key_lower not in hop_by_hop
                and key_lower not in SENSITIVE_HEADERS
            ):
                headers[key] = value

        req = client.build_request(
            method=request.method,
            url=f"http://{target_host}/{full_path}",
            params=query_string if query_string else None,
            headers=headers,
            content=request.stream() if request.method in ("POST", "PUT", "PATCH", "DELETE") else None,
        )

        resp = await client.send(req, stream=True)

        hop_by_hop = set(HOP_BY_HOP_HEADERS)
        connection_header = resp.headers.get("connection")
        if connection_header:
            hop_by_hop.update(
                header.strip().lower()
                for header in connection_header.split(",")
                if header.strip()
            )
        response_headers = {
            key: value
            for key, value in resp.headers.items()
            if key.lower() not in hop_by_hop
        }

        return StreamingResponse(
            content=resp.aiter_bytes(),
            status_code=resp.status_code,
            headers=response_headers,
        )
    except httpx.ConnectError as e:
        raise HTTPException(
            status_code=502,
            detail=f"Could not connect to the backend sandbox {endpoint}: {e}",
        )
    except HTTPException:
        # Preserve explicit HTTP exceptions raised above (e.g. websocket upgrade not supported).
        raise
    except Exception as e:
        raise HTTPException(
            status_code=500, detail=f"An internal error occurred in the proxy: {e}"
        )


================================================
FILE: server/src/api/schema.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Pydantic schemas for OpenSandbox Lifecycle API.

This module defines data models based on the OpenAPI specification
for request/response validation and serialization.
"""

from datetime import datetime
from typing import Dict, List, Literal, Optional

from pydantic import BaseModel, Field, RootModel, model_validator


# ============================================================================
# Image Specification
# ============================================================================

class ImageAuth(BaseModel):
    """
    Registry authentication credentials for private container registries.
    """
    username: str = Field(..., description="Registry username or service account")
    password: str = Field(..., description="Registry password or authentication token")


class ImageSpec(BaseModel):
    """
    Container image specification for sandbox provisioning.

    Supports public registry images and private registry images with authentication.
    """
    uri: str = Field(
        ...,
        description="Container image URI in standard format (e.g., 'python:3.11', 'gcr.io/my-project/app:v1.0')",
    )
    auth: Optional[ImageAuth] = Field(
        None,
        description="Registry authentication credentials (required for private registries)",
    )


# ============================================================================
# Resource Limits
# ============================================================================

class ResourceLimits(RootModel[Dict[str, str]]):
    """
    Runtime resource constraints as key-value pairs.

    Similar to Kubernetes resource specifications, allows flexible definition
    of resource limits. Common resource types include cpu, memory, and gpu.
    """
    root: Dict[str, str] = Field(
        default_factory=dict,
        example={"cpu": "500m", "memory": "512Mi", "gpu": "1"},
    )


class NetworkRule(BaseModel):
    """
    Egress rule: allow/deny a specific domain or wildcard.
    """

    action: str = Field(..., description="Whether to allow or deny matching targets (allow | deny).")
    target: str = Field(
        ...,
        description="FQDN or wildcard domain (e.g., 'example.com', '*.example.com').",
        min_length=1,
    )

    class Config:
        populate_by_name = True


class NetworkPolicy(BaseModel):
    """
    Egress network policy matching the sidecar /policy payload.
    """

    default_action: Optional[str] = Field(
        default=None,
        alias="defaultAction",
        description="Default action when no egress rule matches (allow | deny). If omitted, sidecar defaults to deny.",
    )
    egress: list[NetworkRule] = Field(
        default_factory=list,
        description="Ordered egress rules. Empty/omitted yields allow-all at startup.",
    )

    class Config:
        populate_by_name = True


# ============================================================================
# Volume Definitions
# ============================================================================


class Host(BaseModel):
    """
    Host path bind mount backend.

    Maps a directory on the host filesystem into the container.
    Only available when the runtime supports host mounts.

    Security note: Host paths are restricted by server-side allowlist.
    Users must specify paths under permitted prefixes.
    """

    path: str = Field(
        ...,
        description="Absolute path on the host filesystem to mount.",
        pattern=r"^(/|[A-Za-z]:[\\/])",
    )


class PVC(BaseModel):
    """
    Platform-managed named volume backend.

    A runtime-neutral abstraction for referencing a pre-existing, platform-managed
    named volume. The semantics are identical across runtimes: claim an existing
    volume by name, mount it into the container, and leave volume lifecycle
    management to the user.

    - Kubernetes: maps to a PersistentVolumeClaim in the same namespace.
    - Docker: maps to a Docker named volume (created via ``docker volume create``).
    """

    claim_name: str = Field(
        ...,
        alias="claimName",
        description=(
            "Name of the volume on the target platform. "
            "In Kubernetes this is the PVC name; in Docker this is the named volume name."
        ),
        pattern=r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$",
        max_length=253,
    )

    class Config:
        populate_by_name = True


class OSSFS(BaseModel):
    """
    Alibaba Cloud OSS mount backend via ossfs.

    The runtime mounts a host-side OSS path under ``storage.ossfs_mount_root``
    and then bind-mounts the resolved path into the sandbox container. Prefix
    selection is expressed via ``Volume.subPath``.
    In Docker runtime, OSSFS backend requires the server host to be Linux with FUSE support.
    """

    bucket: str = Field(
        ...,
        description="OSS bucket name.",
        min_length=3,
        max_length=63,
    )
    endpoint: str = Field(
        ...,
        description="OSS endpoint, e.g. 'oss-cn-hangzhou.aliyuncs.com'.",
        min_length=1,
    )
    version: Literal["1.0", "2.0"] = Field(
        "2.0",
        description="ossfs major version used by runtime mount integration.",
    )
    options: Optional[List[str]] = Field(
        None,
        description=(
            "Additional ossfs mount options. Runtime encodes options by version: "
            "1.0 => 'ossfs ... -o <option>', 2.0 => 'ossfs2 config line --<option>'. "
            "Provide raw option payloads without leading '-'."
        ),
    )
    access_key_id: Optional[str] = Field(
        None,
        alias="accessKeyId",
        description="OSS access key ID for inline credentials mode.",
        min_length=1,
    )
    access_key_secret: Optional[str] = Field(
        None,
        alias="accessKeySecret",
        description="OSS access key secret for inline credentials mode.",
        min_length=1,
    )
    class Config:
        populate_by_name = True

    @model_validator(mode="after")
    def validate_inline_credentials(self) -> "OSSFS":
        """Ensure inline credentials are provided for current OSSFS mode."""
        if not self.access_key_id or not self.access_key_secret:
            raise ValueError(
                "OSSFS inline credentials are required: accessKeyId and accessKeySecret."
            )
        return self


class Volume(BaseModel):
    """
    Storage mount definition for a sandbox.

    Each volume entry contains:
    - A unique name identifier
    - Exactly one backend struct (host, pvc, etc.) with backend-specific fields
    - Common mount settings (mountPath, readOnly, subPath)
    """

    name: str = Field(
        ...,
        description="Unique identifier for the volume within the sandbox.",
        pattern=r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$",
        max_length=63,
    )
    host: Optional[Host] = Field(
        None,
        description="Host path bind mount backend.",
    )
    pvc: Optional[PVC] = Field(
        None,
        description="Platform-managed named volume backend (PVC in Kubernetes, named volume in Docker).",
    )
    ossfs: Optional[OSSFS] = Field(
        None,
        description="OSSFS mount backend.",
    )
    mount_path: str = Field(
        ...,
        alias="mountPath",
        description="Absolute path inside the container where the volume is mounted.",
        pattern=r"^/.*",
    )
    read_only: bool = Field(
        False,
        alias="readOnly",
        description="If true, the volume is mounted as read-only. Defaults to false (read-write).",
    )
    sub_path: Optional[str] = Field(
        None,
        alias="subPath",
        description="Optional subdirectory under the backend path to mount.",
    )

    class Config:
        populate_by_name = True

    @model_validator(mode="after")
    def validate_exactly_one_backend(self) -> "Volume":
        """Ensure exactly one backend type is specified."""
        backends = [self.host, self.pvc, self.ossfs]
        specified = [b for b in backends if b is not None]
        if len(specified) == 0:
            raise ValueError("Exactly one backend (host, pvc, ossfs) must be specified, but none was provided.")
        if len(specified) > 1:
            raise ValueError("Exactly one backend (host, pvc, ossfs) must be specified, but multiple were provided.")
        return self


# ============================================================================
# Sandbox Status
# ============================================================================

class SandboxStatus(BaseModel):
    """
    Detailed status information with lifecycle state and transition details.
    """
    state: str = Field(
        ...,
        description="Current lifecycle state (Pending, Running, Pausing, Paused, Stopping, Terminated, Failed)",
    )
    reason: Optional[str] = Field(
        None,
        description="Short machine-readable reason code for the current state",
    )
    message: Optional[str] = Field(
        None,
        description="Human-readable message describing the current state or reason for state transition",
    )
    last_transition_at: Optional[datetime] = Field(
        None,
        alias="lastTransitionAt",
        description="Timestamp of the last state transition",
    )

    class Config:
        populate_by_name = True


# ============================================================================
# Sandbox Models
# ============================================================================

class CreateSandboxRequest(BaseModel):
    """
    Request to create a new sandbox from a container image.
    """
    image: ImageSpec = Field(..., description="Container image specification for the sandbox")
    timeout: Optional[int] = Field(
        None,
        ge=60,
        description=(
            "Sandbox timeout in seconds (minimum 60). "
            "The maximum is controlled by server.max_sandbox_timeout_seconds. "
            "When omitted or null, the sandbox will not auto-terminate and must be deleted explicitly. "
            "Note: manual cleanup support is runtime-dependent; Kubernetes providers may reject "
            "null timeout when the workload provider does not support non-expiring sandboxes."
        ),
    )
    resource_limits: ResourceLimits = Field(
        ...,
        alias="resourceLimits",
        description="Runtime resource constraints for the sandbox instance",
    )
    env: Optional[Dict[str, Optional[str]]] = Field(
        None,
        description="Environment variables to inject into the sandbox runtime",
    )
    metadata: Optional[Dict[str, str]] = Field(
        None,
        description="Custom key-value metadata for management, filtering, and tagging",
    )
    entrypoint: List[str] = Field(
        ...,
        min_length=1,
        description="The command to execute as the sandbox's entry process",
        example=["python", "/app/main.py"],
    )
    network_policy: Optional[NetworkPolicy] = Field(
        None,
        alias="networkPolicy",
        description=(
            "Optional outbound network policy. Shape matches the egress sidecar /policy endpoint. "
            "Empty/omitted means allow-all until updated."
        ),
    )
    volumes: Optional[List[Volume]] = Field(
        None,
        description=(
            "Storage mounts for the sandbox. Each volume entry specifies a named backend-specific "
            "storage source and common mount settings. Exactly one backend type must be specified per volume entry."
        ),
    )
    extensions: Optional[Dict[str, str]] = Field(
        None,
        description="Opaque container for provider-specific or transient parameters not covered by the core API",
    )

    class Config:
        populate_by_name = True


class CreateSandboxResponse(BaseModel):
    """
    Response from creating a new sandbox.

    Contains essential information without image and updatedAt.
    """
    id: str = Field(..., description="Unique sandbox identifier")
    status: SandboxStatus = Field(..., description="Current lifecycle status and detailed state information")
    metadata: Optional[Dict[str, str]] = Field(None, description="Custom metadata from creation request")
    expires_at: Optional[datetime] = Field(
        None,
        alias="expiresAt",
        description="Timestamp when sandbox will auto-terminate. Null when manual cleanup is enabled.",
    )
    created_at: datetime = Field(..., alias="createdAt", description="Sandbox creation timestamp")
    entrypoint: List[str] = Field(..., description="Entry process specification from creation request")

    class Config:
        populate_by_name = True


class Sandbox(BaseModel):
    """
    Runtime execution environment provisioned from a container image.

    This is the complete representation of the sandbox resource.
    """
    id: str = Field(..., description="Unique sandbox identifier")
    image: ImageSpec = Field(..., description="Container image specification used to provision this sandbox")
    status: SandboxStatus = Field(..., description="Current lifecycle status and detailed state information")
    metadata: Optional[Dict[str, str]] = Field(None, description="Custom metadata from creation request")
    entrypoint: List[str] = Field(..., description="The command to execute as the sandbox's entry process")
    expires_at: Optional[datetime] = Field(
        None,
        alias="expiresAt",
        description="Timestamp when sandbox will auto-terminate. Null when manual cleanup is enabled.",
    )
    created_at: datetime = Field(..., alias="createdAt", description="Sandbox creation timestamp")

    class Config:
        populate_by_name = True


# ============================================================================
# List Sandboxes
# ============================================================================

class SandboxFilter(BaseModel):
    """
    Filtering criteria for listing sandboxes.
    """
    state: Optional[List[str]] = Field(
        None,
        min_length=1,
        description="Filter by lifecycle state (status.state) - supports OR logic",
    )
    metadata: Optional[Dict[str, str]] = Field(
        None,
        description="Filter by metadata key-value pairs (AND logic)",
    )


class PaginationRequest(BaseModel):
    """
    Pagination parameters for list requests.
    """
    page: int = Field(1, ge=1, description="Page number")
    page_size: int = Field(
        20,
        ge=1,
        le=200,
        alias="pageSize",
        description="Number of items per page",
    )

    class Config:
        populate_by_name = True


class ListSandboxesRequest(BaseModel):
    """
    Request body for complex listing queries.
    """
    filter: SandboxFilter = Field(
        default_factory=SandboxFilter,
        description="Filtering criteria (all conditions combined with AND logic)",
    )
    pagination: Optional[PaginationRequest] = Field(None, description="Pagination parameters")


class PaginationInfo(BaseModel):
    """
    Pagination metadata for list responses.
    """
    page: int = Field(..., ge=1, description="Current page number")
    page_size: int = Field(..., ge=1, alias="pageSize", description="Number of items per page")
    total_items: int = Field(..., ge=0, alias="totalItems", description="Total number of items matching the filter")
    total_pages: int = Field(..., ge=0, alias="totalPages", description="Total number of pages")
    has_next_page: bool = Field(..., alias="hasNextPage", description="Whether there are more pages after the current one")

    class Config:
        populate_by_name = True


class ListSandboxesResponse(BaseModel):
    """
    Paginated collection of sandboxes.
    """
    items: List[Sandbox] = Field(..., description="List of sandboxes")
    pagination: PaginationInfo = Field(..., description="Pagination metadata")


# ============================================================================
# Renew Expiration
# ============================================================================

class RenewSandboxExpirationRequest(BaseModel):
    """
    Request to renew sandbox expiration time.
    """
    expires_at: datetime = Field(
        ...,
        alias="expiresAt",
        description="New absolute expiration time in UTC (RFC 3339 format). Must be in the future.",
    )

    class Config:
        populate_by_name = True


class RenewSandboxExpirationResponse(BaseModel):
    """
    Response for renewing sandbox expiration.
    """
    expires_at: datetime = Field(
        ...,
        alias="expiresAt",
        description="The new absolute expiration time in UTC (RFC 3339 format)",
    )

    class Config:
        populate_by_name = True


# ============================================================================
# Endpoint
# ============================================================================

class Endpoint(BaseModel):
    """
    Endpoint for accessing a service running in the sandbox.
    """
    endpoint: str = Field(
        ...,
        description="Public endpoint string (host[:port]/path) exposed for the sandbox service",
    )
    headers: Optional[dict[str, str]] = Field(
        default=None,
        description="Optional headers required when accessing the endpoint (e.g., for header-based routing).",
    )


# ============================================================================
# Error Response
# ============================================================================

class ErrorResponse(BaseModel):
    """
    Standard error response for all non-2xx HTTP responses.

    HTTP status code indicates the error category; code and message provide details.
    """
    code: str = Field(
        ...,
        description="Machine-readable error code (e.g., INVALID_REQUEST, NOT_FOUND, INTERNAL_ERROR)",
    )
    message: str = Field(
        ...,
        description="Human-readable error message describing what went wrong and how to fix it",
    )


================================================
FILE: server/src/cli.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import argparse
import os
import shutil
from pathlib import Path

import uvicorn

from src.config import (
    AgentSandboxRuntimeConfig,
    CONFIG_ENV_VAR,
    DEFAULT_CONFIG_PATH,
    DockerConfig,
    EgressConfig,
    IngressConfig,
    KubernetesRuntimeConfig,
    RuntimeConfig,
    ServerConfig,
    StorageConfig,
)

EXAMPLE_FILE_MAP = {
    "docker": "example.config.toml",
    "docker-zh": "example.config.zh.toml",
    "k8s": "example.config.k8s.toml",
    "k8s-zh": "example.config.k8s.zh.toml",
}


def _build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description="Run the OpenSandbox server.",
        formatter_class=argparse.RawTextHelpFormatter,
    )
    parser.add_argument(
        "--config",
        help="Path to the server config TOML file (overrides SANDBOX_CONFIG_PATH).",
    )
    parser.add_argument(
        "--reload",
        action="store_true",
        help="Enable auto-reload (development only).",
    )

    subparsers = parser.add_subparsers(dest="command")

    init_parser = subparsers.add_parser(
        "init-config",
        help="Generate a config file from packaged examples or the schema skeleton.",
    )
    init_parser.add_argument(
        "path",
        nargs="?",
        default=str(DEFAULT_CONFIG_PATH),
        help="Destination path for the config file (default: ~/.sandbox.toml).",
    )
    init_parser.add_argument(
        "--example",
        choices=sorted(EXAMPLE_FILE_MAP),
        help=(
            "Packaged example to copy (docker, docker-zh, k8s, k8s-zh). "
            "Omit to render the full skeleton with placeholders."
        ),
    )
    init_parser.add_argument(
        "--force",
        action="store_true",
        help="Overwrite existing file when generating config.",
    )

    parser.epilog = (
        "Subcommands:\n"
        "  init-config [path] [--example {docker,docker-zh,k8s,k8s-zh}] [--force]\n"
        "    Generate a config file. Without --example it renders the full skeleton (placeholders only).\n"
        "    --example    Copy a packaged example config.\n"
        "    --force      Overwrite destination if it exists.\n"
    )
    return parser


def copy_example_config(
    destination: str | Path | None = None, *, force: bool = False, kind: str = "default"
) -> Path:
    """Copy a packaged example config template to the target path."""
    if kind not in EXAMPLE_FILE_MAP:
        supported = ", ".join(EXAMPLE_FILE_MAP)
        raise ValueError(f"Unsupported example kind '{kind}'. Choices: {supported}")

    filename = EXAMPLE_FILE_MAP[kind]
    src_path = Path(__file__).resolve().parent.parent / filename
    if not src_path.exists():
        raise FileNotFoundError(f"Missing example config template at {src_path}")

    dest_path = Path(destination or DEFAULT_CONFIG_PATH).expanduser()
    dest_path.parent.mkdir(parents=True, exist_ok=True)
    if dest_path.exists() and not force:
        raise FileExistsError(f"Config file already exists at {dest_path}. Use --force to overwrite.")

    shutil.copyfile(src_path, dest_path)
    return dest_path


def render_full_config(destination: str | Path | None = None, *, force: bool = False) -> Path:
    """
    Render the most complete config skeleton from config models with comments.

    No defaults are prefilled; everything is emitted as placeholders so users
    must explicitly set values. Field comments come from pydantic Field
    descriptions to stay in sync with the schema.
    """

    def _placeholder_for_field(field) -> str:
        """Return a placeholder TOML value that is intentionally empty."""
        ann = field.annotation
        if ann is not None:
            origin = getattr(ann, "__origin__", None)
            if ann is list or origin is list:
                return "[]"
        return '""'  # string placeholder for scalars/bool/int; user must replace

    def _render_section(
        section: str,
        model,
        *,
        placeholders: dict[str, str] | None = None,
        extra_comments: list[str] | None = None,
    ) -> str:
        lines: list[str] = []
        if extra_comments:
            lines.extend([f"# {c}" for c in extra_comments])
        lines.append(f"[{section}]")

        placeholders = placeholders or {}

        for field_name, field in model.model_fields.items():
            key = field.alias or field_name
            value = placeholders.get(key, _placeholder_for_field(field))
            if field.description:
                lines.append(f"# {field.description}")
            lines.append(f"{key} = {value}")
            lines.append("")

        if lines and lines[-1] == "":
            lines.pop()
        return "\n".join(lines)

    dest_path = Path(destination or DEFAULT_CONFIG_PATH).expanduser()
    dest_path.parent.mkdir(parents=True, exist_ok=True)
    if dest_path.exists() and not force:
        raise FileExistsError(f"Config file already exists at {dest_path}. Use --force to overwrite.")

    sections = [
        "# Generated from OpenSandbox config schema. Remove sections you do not use.",
        _render_section("server", ServerConfig),
        _render_section("runtime", RuntimeConfig),
        _render_section("docker", DockerConfig),
        _render_section(
            "egress",
            EgressConfig,
            extra_comments=["Used when networkPolicy is provided. Requires docker.network_mode = \"bridge\"."],
        ),
        _render_section(
            "kubernetes",
            KubernetesRuntimeConfig,
            extra_comments=["Only used when runtime.type = \"kubernetes\""],
        ),
        _render_section(
            "agent_sandbox",
            AgentSandboxRuntimeConfig,
            extra_comments=["Requires kubernetes.workload_provider = \"agent-sandbox\""],
        ),
        _render_section("ingress", IngressConfig),
        _render_section("storage", StorageConfig),
    ]

    content = "\n\n".join(sections) + "\n"
    dest_path.write_text(content, encoding="utf-8")
    return dest_path


def main() -> None:
    parser = _build_parser()
    args = parser.parse_args()

    if args.command == "init-config":
        try:
            if args.example:
                dest = copy_example_config(args.path, force=args.force, kind=args.example)
                print(f"Wrote example config ({args.example}) to {dest}\n")
            else:
                dest = render_full_config(args.path, force=args.force)
                print(f"Wrote full config skeleton to {dest}\n")
        except Exception as exc:  # noqa: BLE001
            print(f"Failed to write config template: {exc}\n")
            raise SystemExit(1)
        return

    if args.config:
        os.environ[CONFIG_ENV_VAR] = args.config

    from src import main as server_main  # local import after env is set

    uvicorn.run(
        "src.main:app",
        host=server_main.app_config.server.host,
        port=server_main.app_config.server.port,
        reload=args.reload,
        log_config=server_main._log_config,
    )


if __name__ == "__main__":
    main()


================================================
FILE: server/src/config.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Application configuration management for sandbox server.

Loads configuration from a TOML file (default: ~/.sandbox.toml) and exposes
helpers to access the parsed settings throughout the application.
"""

from __future__ import annotations

import ipaddress
import logging
import os
import re
from pathlib import Path
from typing import Any, Dict, Literal, Optional

from pydantic import BaseModel, Field, ValidationError, model_validator

try:  # Python 3.11+
    import tomllib  # type: ignore[attr-defined]
except ModuleNotFoundError:  # Python 3.10 fallback
    import tomli as tomllib  # type: ignore[import]

logger = logging.getLogger(__name__)

CONFIG_ENV_VAR = "SANDBOX_CONFIG_PATH"
DEFAULT_CONFIG_PATH = Path.home() / ".sandbox.toml"

_DOMAIN_RE = re.compile(r"^(?=.{1,253}$)(?!-)[A-Za-z0-9-]{1,63}(?:\.[A-Za-z0-9-]{1,63})+$")
_WILDCARD_DOMAIN_RE = re.compile(r"^\*\.(?!-)[A-Za-z0-9-]{1,63}(?:\.[A-Za-z0-9-]{1,63})+$")
_IPV4_WITH_PORT_RE = re.compile(r"^(?P<ip>(?:\d{1,3}\.){3}\d{1,3})(?::(?P<port>\d{1,5}))?$")

INGRESS_MODE_DIRECT = "direct"
INGRESS_MODE_GATEWAY = "gateway"
GATEWAY_ROUTE_MODE_WILDCARD = "wildcard"
GATEWAY_ROUTE_MODE_HEADER = "header"
GATEWAY_ROUTE_MODE_URI = "uri"

EGRESS_MODE_DNS = "dns"
EGRESS_MODE_DNS_NFT = "dns+nft"


def _is_valid_ip(host: str) -> bool:
    try:
        ipaddress.ip_address(host)
        return True
    except ValueError:
        return False


def _is_valid_ip_or_ip_port(address: str) -> bool:
    match = _IPV4_WITH_PORT_RE.match(address)
    if not match:
        return False
    ip_str = match.group("ip")
    if not _is_valid_ip(ip_str):
        return False
    port_str = match.group("port")
    if port_str is None:
        return True
    try:
        port = int(port_str)
    except ValueError:
        return False
    return 1 <= port <= 65535


def _is_valid_domain(host: str) -> bool:
    return bool(_DOMAIN_RE.match(host))


def _is_wildcard_domain(host: str) -> bool:
    return bool(_WILDCARD_DOMAIN_RE.match(host))


class GatewayRouteModeConfig(BaseModel):
    """Routing strategy for gateway ingress exposure."""

    mode: Literal[
        GATEWAY_ROUTE_MODE_WILDCARD,
        GATEWAY_ROUTE_MODE_HEADER,
        GATEWAY_ROUTE_MODE_URI,
    ] = Field(
        ...,
        description="Routing mode used by the gateway (wildcard, header, uri).",
    )

    class Config:
        populate_by_name = True


class GatewayConfig(BaseModel):
    """Gateway mode configuration for ingress exposure."""

    address: str = Field(
        ...,
        description="Gateway host used to expose sandboxes (domain or IP, may include :port; scheme is not allowed).",
        min_length=1,
    )
    route: GatewayRouteModeConfig = Field(
        ...,
        description="Routing mode configuration used by the gateway.",
    )


class IngressConfig(BaseModel):
    """Configuration for exposing sandbox ingress."""

    mode: Literal[INGRESS_MODE_DIRECT, INGRESS_MODE_GATEWAY] = Field(
        default=INGRESS_MODE_DIRECT,
        description="Ingress exposure mode (direct or gateway).",
    )
    gateway: Optional[GatewayConfig] = Field(
        default=None,
        description="Gateway configuration required when mode = 'gateway'.",
    )

    @model_validator(mode="after")
    def validate_ingress_mode(self) -> "IngressConfig":
        if self.mode == INGRESS_MODE_GATEWAY and self.gateway is None:
            raise ValueError("gateway block must be provided when ingress.mode = 'gateway'.")
        if self.mode == INGRESS_MODE_DIRECT and self.gateway is not None:
            raise ValueError("gateway block must be omitted unless ingress.mode = 'gateway'.")

        if self.mode == INGRESS_MODE_GATEWAY and self.gateway:
            route_mode = self.gateway.route.mode
            address_raw = self.gateway.address
            hostport = address_raw
            if "://" in address_raw:
                raise ValueError("ingress.gateway.address must not include a scheme; clients choose http/https.")

            if route_mode == GATEWAY_ROUTE_MODE_WILDCARD:
                if not _is_wildcard_domain(hostport):
                    raise ValueError(
                        "ingress.gateway.address must be a wildcard domain (e.g., *.example.com) "
                        "when gateway.route.mode is wildcard."
                    )
            else:
                if "*" in hostport:
                    raise ValueError(
                        "ingress.gateway.address must not contain wildcard when gateway.route.mode is not wildcard."
                    )
                if not (_is_valid_domain(hostport) or _is_valid_ip_or_ip_port(hostport)):
                    raise ValueError(
                        "ingress.gateway.address must be a valid domain, IP, or IP:port when gateway.route.mode is not wildcard."
                    )
        return self


class ServerConfig(BaseModel):
    """FastAPI server configuration."""

    host: str = Field(
        default="0.0.0.0",
        description="Interface bound by the lifecycle API server.",
        min_length=1,
    )
    port: int = Field(
        default=8080,
        ge=1,
        le=65535,
        description="Port exposed by the lifecycle API server.",
    )
    log_level: str = Field(
        default="INFO",
        description="Python logging level for the server process.",
        min_length=3,
    )
    api_key: Optional[str] = Field(
        default=None,
        description="Global API key for authenticating incoming lifecycle API calls.",
    )
    eip: Optional[str] = Field(
        default=None,
        description="Bound public IP. When set, used as the host part when returning sandbox endpoints.",
    )
    max_sandbox_timeout_seconds: Optional[int] = Field(
        default=None,
        ge=60,
        description=(
            "Maximum allowed sandbox TTL in seconds for requests that specify timeout. "
            "Omit from config to disable the server-side upper bound."
        ),
    )


class KubernetesRuntimeConfig(BaseModel):
    """Kubernetes-specific runtime configuration."""

    kubeconfig_path: Optional[str] = Field(
        default=None,
        description="Absolute path to the kubeconfig file used for API authentication.",
    )
    informer_enabled: bool = Field(
        default=True,
        description=(
            "[Beta] Enable informer-backed cache for workload reads. "
            "Keeps a watch to reduce API pressure; set false to disable."
        ),
    )
    informer_resync_seconds: int = Field(
        default=300,
        ge=1,
        description=(
            "[Beta] Full resync interval for informer cache (seconds). "
            "Shorter intervals refresh the cache more eagerly."
        ),
    )
    informer_watch_timeout_seconds: int = Field(
        default=60,
        ge=1,
        description=(
            "[Beta] Watch timeout (seconds) before restarting the informer stream."
        ),
    )
    read_qps: float = Field(
        default=0.0,
        ge=0,
        description=(
            "Maximum read requests per second to the Kubernetes API (get/list). "
            "0 means unlimited (no rate limiting)."
        ),
    )
    read_burst: int = Field(
        default=0,
        ge=0,
        description=(
            "Burst size for the read rate limiter. "
            "0 means use read_qps as burst (minimum 1)."
        ),
    )
    write_qps: float = Field(
        default=0.0,
        ge=0,
        description=(
            "Maximum write requests per second to the Kubernetes API (create/delete/patch). "
            "0 means unlimited (no rate limiting)."
        ),
    )
    write_burst: int = Field(
        default=0,
        ge=0,
        description=(
            "Burst size for the write rate limiter. "
            "0 means use write_qps as burst (minimum 1)."
        ),
    )
    namespace: Optional[str] = Field(
        default=None,
        description="Namespace used for sandbox workloads.",
    )
    service_account: Optional[str] = Field(
        default=None,
        description="Service account bound to sandbox workloads.",
    )
    workload_provider: Optional[str] = Field(
        default=None,
        description="Workload provider type. If not specified, uses the first registered provider.",
    )
    batchsandbox_template_file: Optional[str] = Field(
        default=None,
        description="Path to BatchSandbox CR YAML template file. Used when workload_provider is 'batchsandbox'.",
    )
    sandbox_create_timeout_seconds: int = Field(
        default=60,
        ge=1,
        description="Timeout in seconds to wait for a sandbox to become ready (IP assigned) after creation.",
    )
    sandbox_create_poll_interval_seconds: float = Field(
        default=1.0,
        gt=0,
        description="Polling interval in seconds when waiting for a sandbox to become ready after creation.",
    )
    execd_init_resources: Optional["ExecdInitResources"] = Field(
        default=None,
        description=(
            "Resource requests/limits for the execd init container. "
            "If unset, no resource constraints are applied."
        ),
    )


class ExecdInitResources(BaseModel):
    """Resource requests and limits for the execd init container."""

    limits: Optional[Dict[str, str]] = Field(
        default=None,
        description='Resource limits, e.g. {cpu = "100m", memory = "128Mi"}.',
    )
    requests: Optional[Dict[str, str]] = Field(
        default=None,
        description='Resource requests, e.g. {cpu = "50m", memory = "64Mi"}.',
    )


class AgentSandboxRuntimeConfig(BaseModel):
    """Agent-sandbox runtime configuration."""

    template_file: Optional[str] = Field(
        default=None,
        description="Path to Sandbox CR YAML template file for agent-sandbox.",
    )
    shutdown_policy: Literal["Delete", "Retain"] = Field(
        default="Delete",
        description="Shutdown policy applied when a sandbox expires (Delete or Retain).",
    )
    ingress_enabled: bool = Field(
        default=True,
        description="Whether ingress routing to agent-sandbox pods is expected to be enabled.",
    )


class StorageConfig(BaseModel):
    """Volume and storage configuration for sandbox mounts."""

    allowed_host_paths: list[str] = Field(
        default_factory=list,
        description=(
            "Allowlist of host path prefixes permitted for host bind mounts. "
            "If empty, all host paths are allowed (not recommended for production). "
            "Each entry must be an absolute path (e.g., '/data/opensandbox')."
        ),
    )
    ossfs_mount_root: str = Field(
        default="/mnt/ossfs",
        description=(
            "Host-side root directory where OSSFS mounts are resolved. "
            "Resolved OSSFS host paths are built as "
            "'ossfs_mount_root/<bucket>/<volume.subPath?>'."
        ),
    )


class EgressConfig(BaseModel):
    """Egress sidecar configuration."""

    image: Optional[str] = Field(
        default=None,
        description="Container image for the egress sidecar (used when network policy is requested).",
        min_length=1,
    )
    mode: Literal[
        EGRESS_MODE_DNS,
        EGRESS_MODE_DNS_NFT,
    ] = Field(
        default=EGRESS_MODE_DNS,
        description="Egress enforcement passed to the sidecar as OPENSANDBOX_EGRESS_MODE (dns or dns+nft).",
    )


class RuntimeConfig(BaseModel):
    """Runtime selection (docker, kubernetes, etc.)."""

    type: Literal["docker", "kubernetes"] = Field(
        ...,
        description="Active sandbox runtime implementation.",
    )
    execd_image: str = Field(
        ...,
        description="Container image that contains the execd binary for sandbox initialization.",
        min_length=1,
    )


class SecureRuntimeConfig(BaseModel):
    """Secure container runtime configuration (gVisor, Kata, Firecracker)."""

    type: Literal["", "gvisor", "kata", "firecracker"] = Field(
        default="",
        description=(
            "Secure runtime type. Empty means no secure runtime. "
            "gVisor uses runsc OCI runtime. "
            "Kata uses kata-runtime (OCI) or kata-qemu (RuntimeClass). "
            "Firecracker uses kata-fc (RuntimeClass, Kubernetes only)."
        ),
    )
    docker_runtime: Optional[str] = Field(
        default=None,
        description=(
            "OCI runtime name for Docker (e.g., 'runsc' for gVisor, 'kata-runtime' for Kata). "
            "When specified, the Docker daemon will use this runtime instead of runc."
        ),
    )
    k8s_runtime_class: Optional[str] = Field(
        default=None,
        description=(
            "Kubernetes RuntimeClass name for secure containers. "
            "Common values: 'gvisor', 'kata-qemu', 'kata-fc'. "
            "When specified, pods will have runtimeClassName set to this value."
        ),
    )

    @model_validator(mode="after")
    def validate_secure_runtime(self) -> "SecureRuntimeConfig":
        if self.type == "":
            # No secure runtime configured
            if self.docker_runtime is not None or self.k8s_runtime_class is not None:
                raise ValueError(
                    "docker_runtime and k8s_runtime_class must be omitted when secure_runtime.type is empty."
                )
            return self

        if self.type == "firecracker":
            # Firecracker is Kubernetes-only
            if self.k8s_runtime_class is None:
                raise ValueError(
                    "secure_runtime.k8s_runtime_class is required when secure_runtime.type is 'firecracker'."
                )
            # Optional: also allow docker_runtime for consistency, but Firecracker won't use it

        # For gVisor and Kata, at least one runtime must be specified
        if self.type in ("gvisor", "kata"):
            if self.docker_runtime is None and self.k8s_runtime_class is None:
                raise ValueError(
                    f"At least one of secure_runtime.docker_runtime or secure_runtime.k8s_runtime_class "
                    f"must be specified when secure_runtime.type is '{self.type}'."
                )

        return self


class DockerConfig(BaseModel):
    """Docker runtime specific settings."""

    network_mode: str = Field(
        default="host",
        description="Docker network mode for sandbox containers (host, bridge, or a custom user-defined network name).",
    )
    api_timeout: Optional[int] = Field(
        default=None,
        ge=1,
        description="Docker API timeout in seconds. If unset, default is 180.",
    )
    host_ip: Optional[str] = Field(
        default=None,
        description=(
            "Docker host IP or hostname for bridge-mode endpoint URLs when the server runs in a container."
        ),
    )
    drop_capabilities: list[str] = Field(
        default_factory=lambda: [
            "AUDIT_WRITE",
            "MKNOD",
            "NET_ADMIN",
            "NET_RAW",
            "SYS_ADMIN",
            "SYS_MODULE",
            "SYS_PTRACE",
            "SYS_TIME",
            "SYS_TTY_CONFIG",
        ],
        description=(
            "Linux capabilities to drop from sandbox containers. Defaults to a conservative set to reduce host impact."
        ),
    )
    apparmor_profile: Optional[str] = Field(
        default=None,
        description=(
            "Optional AppArmor profile name applied to sandbox containers. Leave unset to let Docker choose the default."
        ),
    )
    no_new_privileges: bool = Field(
        default=True,
        description="Enable the kernel no_new_privileges flag to block privilege escalation inside the container.",
    )
    seccomp_profile: Optional[str] = Field(
        default=None,
        description=(
            "Optional seccomp profile name or path applied to sandbox containers. Leave unset to use Docker's default profile."
        ),
    )
    pids_limit: Optional[int] = Field(
        default=4096,
        ge=1,
        description="Maximum number of processes allowed per sandbox container. Set to null to disable the limit.",
    )


class AppConfig(BaseModel):
    """Root application configuration model."""

    server: ServerConfig = Field(default_factory=ServerConfig)
    runtime: RuntimeConfig = Field(..., description="Sandbox runtime configuration.")
    kubernetes: Optional[KubernetesRuntimeConfig] = None
    agent_sandbox: Optional["AgentSandboxRuntimeConfig"] = None
    ingress: Optional[IngressConfig] = None
    docker: DockerConfig = Field(default_factory=DockerConfig)
    storage: StorageConfig = Field(default_factory=StorageConfig)
    egress: Optional[EgressConfig] = None
    secure_runtime: Optional[SecureRuntimeConfig] = Field(
        default=None,
        description="Secure container runtime configuration (gVisor, Kata, Firecracker).",
    )

    @model_validator(mode="after")
    def validate_runtime_blocks(self) -> "AppConfig":
        if self.runtime.type == "docker":
            if self.kubernetes is not None:
                raise ValueError("Kubernetes block must be omitted when runtime.type = 'docker'.")
            if self.agent_sandbox is not None:
                raise ValueError("agent_sandbox block must be omitted when runtime.type = 'docker'.")
            if self.ingress is not None and self.ingress.mode != INGRESS_MODE_DIRECT:
                raise ValueError("ingress.mode must be 'direct' when runtime.type = 'docker'.")
            if self.secure_runtime is not None and self.secure_runtime.type == "firecracker":
                raise ValueError( "secure_runtime.type 'firecracker' is only compatible with runtime.type='kubernetes'.")
        elif self.runtime.type == "kubernetes":
            if self.kubernetes is None:
                self.kubernetes = KubernetesRuntimeConfig()
            provider_type = (self.kubernetes.workload_provider or "").lower()
            if provider_type == "agent-sandbox":
                if self.agent_sandbox is None:
                    self.agent_sandbox = AgentSandboxRuntimeConfig()
            elif self.agent_sandbox is not None:
                raise ValueError(
                    "agent_sandbox block requires kubernetes.workload_provider = 'agent-sandbox'."
                )
        else:
            raise ValueError(f"Unsupported runtime type '{self.runtime.type}'.")
        return self


_config: AppConfig | None = None
_config_path: Path | None = None


def _resolve_config_path(path: str | Path | None = None) -> Path:
    """Resolve configuration file path from explicit value, env var, or default."""
    if path:
        return Path(path).expanduser()
    env_path = os.environ.get(CONFIG_ENV_VAR)
    if env_path:
        return Path(env_path).expanduser()
    return DEFAULT_CONFIG_PATH


def _load_toml_data(path: Path) -> dict[str, Any]:
    """Load TOML content from file, returning empty dict if file is missing."""
    if not path.exists():
        logger.info("Config file %s not found. Using default configuration.", path)
        return {}

    try:
        with path.open("rb") as fh:
            data = tomllib.load(fh)
            logger.info("Loaded configuration from %s", path)
            return data
    except Exception as exc:  # noqa: BLE001
        logger.error("Failed to read config file %s: %s", path, exc)
        raise


def load_config(path: str | Path | None = None) -> AppConfig:
    """
    Load configuration from TOML file and store it globally.

    Args:
        path: Optional explicit config path. Falls back to SANDBOX_CONFIG_PATH env,
              then ~/.sandbox.toml when not provided.

    Returns:
        AppConfig: Parsed application configuration.

    Raises:
        ValidationError: If the TOML contents do not match AppConfig schema.
        Exception: For any IO or parsing errors.
    """
    global _config, _config_path

    resolved_path = _resolve_config_path(path)
    raw_data = _load_toml_data(resolved_path)

    try:
        _config = AppConfig(**raw_data)
    except ValidationError as exc:
        logger.error("Invalid configuration in %s: %s", resolved_path, exc)
        raise

    _config_path = resolved_path
    return _config


def get_config() -> AppConfig:
    """
    Retrieve the currently loaded configuration, loading defaults if necessary.

    Returns:
        AppConfig: Currently active configuration.
    """
    global _config
    if _config is None:
        _config = load_config()
    return _config


def get_config_path() -> Path:
    """Return the resolved configuration path."""
    global _config_path
    if _config_path is None:
        _config_path = _resolve_config_path()
    return _config_path


__all__ = [
    "AppConfig",
    "ServerConfig",
    "RuntimeConfig",
    "IngressConfig",
    "GatewayConfig",
    "GatewayRouteModeConfig",
    "INGRESS_MODE_DIRECT",
    "INGRESS_MODE_GATEWAY",
    "DockerConfig",
    "StorageConfig",
    "KubernetesRuntimeConfig",
    "EgressConfig",
    "EGRESS_MODE_DNS",
    "EGRESS_MODE_DNS_NFT",
    "SecureRuntimeConfig",
    "DEFAULT_CONFIG_PATH",
    "CONFIG_ENV_VAR",
    "get_config",
    "get_config_path",
    "load_config",
]


================================================
FILE: server/src/main.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
FastAPI application entry point for OpenSandbox Lifecycle API.

This module initializes the FastAPI application with middleware, routes,
and configuration for the sandbox lifecycle management service.
"""

import copy
import logging.config
from contextlib import asynccontextmanager
from typing import Any

import httpx
from fastapi import FastAPI, Request
from fastapi.exceptions import HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse

from src.config import load_config
from uvicorn.config import LOGGING_CONFIG as UVICORN_LOGGING_CONFIG

# Load configuration before initializing routers/middleware
app_config = load_config()

# Unify logging format (including uvicorn access/error logs) with timestamp prefix.
_log_config = copy.deepcopy(UVICORN_LOGGING_CONFIG)
_fmt = "%(levelprefix)s %(asctime)s [%(request_id)s] %(name)s: %(message)s"
_datefmt = "%Y-%m-%d %H:%M:%S%z"

# Inject request_id into log records so one request's logs can be correlated.
_log_config["filters"] = {
    "request_id": {"()": "src.middleware.request_id.RequestIdFilter"},
}
_log_config["handlers"]["default"]["filters"] = ["request_id"]
_log_config["handlers"]["access"]["filters"] = ["request_id"]

# Enable colors and set format for both default and access loggers
_log_config["formatters"]["default"]["fmt"] = _fmt
_log_config["formatters"]["default"]["datefmt"] = _datefmt
_log_config["formatters"]["default"]["use_colors"] = True

_log_config["formatters"]["access"]["fmt"] = _fmt
_log_config["formatters"]["access"]["datefmt"] = _datefmt
_log_config["formatters"]["access"]["use_colors"] = True

# Ensure project loggers (src.*) emit at configured level using the default handler.
_log_config["loggers"]["src"] = {
    "handlers": ["default"],
    "level": app_config.server.log_level.upper(),
    "propagate": False,
}

logging.config.dictConfig(_log_config)
logging.getLogger().setLevel(
    getattr(logging, app_config.server.log_level.upper(), logging.INFO)
)

from src.api.lifecycle import router  # noqa: E402
from src.middleware.auth import AuthMiddleware  # noqa: E402
from src.middleware.request_id import RequestIdMiddleware  # noqa: E402
from src.services.runtime_resolver import (  # noqa: E402
    validate_secure_runtime_on_startup,
)

logger = logging.getLogger(__name__)

@asynccontextmanager
async def lifespan(app: FastAPI):
    app.state.http_client = httpx.AsyncClient(timeout=180.0)

    # Validate secure runtime configuration at startup
    try:
        # Determine which runtime client to create based on config
        docker_client = None
        k8s_client = None
        runtime_type = app_config.runtime.type

        if runtime_type == "docker":
            import docker

            docker_client = docker.from_env()
            logger.info("Validating secure runtime for Docker backend")
        elif runtime_type == "kubernetes":
            from src.services.k8s.client import K8sClient

            k8s_client = K8sClient(app_config.kubernetes)
            logger.info("Validating secure runtime for Kubernetes backend")

        await validate_secure_runtime_on_startup(
            app_config,
            docker_client=docker_client,
            k8s_client=k8s_client,
        )

    except Exception as exc:
        logger.error("Secure runtime validation failed: %s", exc)
        raise

    yield
    await app.state.http_client.aclose()


# Initialize FastAPI application
app = FastAPI(
    title="OpenSandbox Lifecycle API",
    version="0.1.0",
    description="The Sandbox Lifecycle API coordinates how untrusted workloads are created, "
                "executed, paused, resumed, and finally disposed.",
    docs_url="/docs",
    redoc_url="/redoc",
    lifespan=lifespan,
)

# Attach global config for runtime access
app.state.config = app_config

# Middleware run in reverse order of addition: last added = first to run (outermost).
# Add auth and CORS first so they run after RequestIdMiddleware.
app.add_middleware(AuthMiddleware, config=app_config)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
# RequestIdMiddleware last = outermost: runs first, so every response (including
# 401 from AuthMiddleware) gets X-Request-ID and logs have request_id in context.
app.add_middleware(RequestIdMiddleware)

# Include API routes at root and versioned prefix
app.include_router(router)
app.include_router(router, prefix="/v1")

DEFAULT_ERROR_CODE = "GENERAL::UNKNOWN_ERROR"
DEFAULT_ERROR_MESSAGE = "An unexpected error occurred."


def _normalize_error_detail(detail: Any) -> dict[str, str]:
    """
    Ensure HTTP errors always conform to {"code": "...", "message": "..."}.
    """
    if isinstance(detail, dict):
        code = detail.get("code") or DEFAULT_ERROR_CODE
        message = detail.get("message") or DEFAULT_ERROR_MESSAGE
        return {"code": code, "message": message}
    message = str(detail) if detail else DEFAULT_ERROR_MESSAGE
    return {"code": DEFAULT_ERROR_CODE, "message": message}


@app.exception_handler(HTTPException)
async def sandbox_http_exception_handler(request: Request, exc: HTTPException):
    """
    Flatten FastAPI HTTPException payload to the standard error schema.
    """
    content = _normalize_error_detail(exc.detail)
    return JSONResponse(
        status_code=exc.status_code,
        content=content,
        headers=exc.headers,
    )


@app.get("/health")
async def health_check():
    """
    Health check endpoint.

    Returns:
        dict: Health status
    """
    return {"status": "healthy"}


if __name__ == "__main__":
    import uvicorn

    # Run the application
    uvicorn.run(
        "src.main:app",
        host=app_config.server.host,
        port=app_config.server.port,
        reload=True,
        log_config=_log_config,
    )


================================================
FILE: server/src/middleware/__init__.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: server/src/middleware/auth.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Authentication middleware for OpenSandbox Lifecycle API.

This module implements API Key authentication as specified in the OpenAPI spec.
API keys are configured via config.toml and validated against the OPEN-SANDBOX-API-KEY header.
"""

import re
from typing import Callable, Optional

from fastapi import Request, Response, status
from fastapi.responses import JSONResponse
from starlette.middleware.base import BaseHTTPMiddleware

from src.config import AppConfig, get_config

class AuthMiddleware(BaseHTTPMiddleware):
    """
    Middleware for API Key authentication.

    Validates the OPEN-SANDBOX-API-KEY header for all requests except health check.
    Returns 401 Unauthorized if authentication fails.
    """

    API_KEY_HEADER = "OPEN-SANDBOX-API-KEY"

    # Paths that don't require authentication
    EXEMPT_PATHS = ["/health", "/docs", "/redoc", "/openapi.json"]

    # Strict pattern for proxy-to-sandbox: /sandboxes/{id}/proxy/{port}/... with numeric port only.
    # Matches the actual route in lifecycle.py; rejects path traversal (..) and malformed port.
    _PROXY_PATH_RE = re.compile(r"^(/v1)?/sandboxes/[^/]+/proxy/\d+(/|$)")

    @staticmethod
    def _is_proxy_path(path: str) -> bool:
        """True only for the exact proxy-route shape; rejects path traversal (..)."""
        if ".." in path:
            return False
        return bool(AuthMiddleware._PROXY_PATH_RE.match(path))

    def __init__(self, app, config: Optional[AppConfig] = None):
        """
        Initialize authentication middleware.

        Args:
            app: FastAPI application instance
            config: Optional application configuration (for dependency injection)
        """
        super().__init__(app)
        self.config = config or get_config()
        # Read the API key directly from config; suitable for dev/test usage
        self.valid_api_keys = self._load_api_keys()

    def _load_api_keys(self) -> set:
        """
        Load valid API keys from configuration.

        Returns:
            set: Set of valid API keys
        """
        # Supports a single API key from config; extend later for secret managers
        api_key = self.config.server.api_key
        # Treat empty string as no key configured
        if api_key and api_key.strip():
            return {api_key}
        return set()

    async def dispatch(self, request: Request, call_next: Callable) -> Response:
        """
        Process each request and validate authentication.

        Args:
            request: Incoming HTTP request
            call_next: Next middleware or route handler

        Returns:
            Response: HTTP response
        """
        # Skip authentication for exempt paths
        if any(request.url.path.startswith(path) for path in self.EXEMPT_PATHS):
            return await call_next(request)

        # Skip authentication only for the exact proxy-to-sandbox route shape
        # (no path traversal, no loose substring match)
        if self._is_proxy_path(request.url.path):
            return await call_next(request)

        # If no API keys are configured, skip authentication
        if not self.valid_api_keys:
            return await call_next(request)

        # Extract API key from header
        api_key = request.headers.get(self.API_KEY_HEADER)

        # Validate API key
        if not api_key:
            return JSONResponse(
                status_code=status.HTTP_401_UNAUTHORIZED,
                content={
                    "code": "MISSING_API_KEY",
                    "message": "Authentication credentials are missing. "
                              "Provide API key via OPEN-SANDBOX-API-KEY header.",
                },
            )

        # Enforce strict comparison whenever API keys are configured
        if self.valid_api_keys and api_key not in self.valid_api_keys:
            return JSONResponse(
                status_code=status.HTTP_401_UNAUTHORIZED,
                content={
                    "code": "INVALID_API_KEY",
                    "message": "Authentication credentials are invalid. "
                              "Check your API key and try again.",
                },
            )

        # Authentication successful, proceed to next middleware/handler
        response = await call_next(request)
        return response


================================================
FILE: server/src/middleware/request_id.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Request ID middleware and logging context for OpenSandbox Lifecycle API.

Reads X-Request-ID from incoming requests (or generates one), stores it in
contextvars so that all logs emitted during that request can be correlated
by request_id. Response includes X-Request-ID for client-side tracing.
"""

import logging
import uuid
from contextvars import ContextVar
from typing import Callable

from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import Response

# Context variable holding the current request ID for this async context.
# Used by RequestIdFilter to attach request_id to log records.
request_id_ctx: ContextVar[str | None] = ContextVar("request_id", default=None)

X_REQUEST_ID_HEADER = "X-Request-ID"


def get_request_id() -> str | None:
    """Return the current request ID in this async context, or None."""
    return request_id_ctx.get()


class RequestIdMiddleware(BaseHTTPMiddleware):
    """
    Middleware that sets request ID from X-Request-ID header or generates one.

    The ID is stored in a context variable so that any code (including service
    layer) running in the same request context can correlate logs via
    RequestIdFilter without passing request_id explicitly.
    """

    async def dispatch(self, request: Request, call_next: Callable) -> Response:
        raw = request.headers.get(X_REQUEST_ID_HEADER)
        request_id = (raw and raw.strip()) or uuid.uuid4().hex
        token = request_id_ctx.set(request_id)
        try:
            response = await call_next(request)
            response.headers[X_REQUEST_ID_HEADER] = request_id
            return response
        finally:
            request_id_ctx.reset(token)


class RequestIdFilter(logging.Filter):
    """
    Injects the current request_id from context into each log record.

    Attach this filter to handlers whose formatter uses %(request_id)s.
    When no request context (e.g. startup or health check), request_id is "-".
    """

    def filter(self, record: logging.LogRecord) -> bool:
        rid = get_request_id()
        setattr(record, "request_id", rid if rid else "-")
        return True


================================================
FILE: server/src/py.typed
================================================


================================================
FILE: server/src/services/__init__.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Sandbox service implementations."""

from src.services.docker import DockerSandboxService
from src.services.k8s.kubernetes_service import KubernetesSandboxService
from src.services.factory import create_sandbox_service
from src.services.sandbox_service import SandboxService

__all__ = [
    "SandboxService",
    "DockerSandboxService",
    "KubernetesSandboxService",
    "create_sandbox_service",
]


================================================
FILE: server/src/services/constants.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Shared constants for sandbox services."""

RESERVED_LABEL_PREFIX = "opensandbox.io/"

SANDBOX_ID_LABEL = "opensandbox.io/id"
SANDBOX_EXPIRES_AT_LABEL = "opensandbox.io/expires-at"
SANDBOX_MANUAL_CLEANUP_LABEL = "opensandbox.io/manual-cleanup"
# Host-mapped ports recorded on containers (bridge mode).
SANDBOX_EMBEDDING_PROXY_PORT_LABEL = "opensandbox.io/embedding-proxy-port"  # maps container 44772 -> host port
SANDBOX_HTTP_PORT_LABEL = "opensandbox.io/http-port"  # maps container 8080 -> host port
SANDBOX_OSSFS_MOUNTS_LABEL = "opensandbox.io/ossfs-mounts"
OPEN_SANDBOX_INGRESS_HEADER = "OpenSandbox-Ingress-To"
OPEN_SANDBOX_EGRESS_AUTH_HEADER = "OPENSANDBOX-EGRESS-AUTH"
SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY = "opensandbox.io/egress-auth-token"

# Environment variable name for passing network policy to egress sidecar
EGRESS_RULES_ENV = "OPENSANDBOX_EGRESS_RULES"
# Must match components/egress/pkg/constants/configuration.go EnvEgressMode
EGRESS_MODE_ENV = "OPENSANDBOX_EGRESS_MODE"
# Must match components/egress/pkg/constants/configuration.go EnvEgressToken
OPENSANDBOX_EGRESS_TOKEN = "OPENSANDBOX_EGRESS_TOKEN"


class SandboxErrorCodes:
    """Canonical error codes for sandbox service operations."""

    # Docker runtime error codes
    DOCKER_INITIALIZATION_ERROR = "DOCKER::INITIALIZATION_ERROR"
    CONTAINER_QUERY_FAILED = "DOCKER::SANDBOX_QUERY_FAILED"
    SANDBOX_NOT_FOUND = "DOCKER::SANDBOX_NOT_FOUND"
    IMAGE_PULL_FAILED = "DOCKER::SANDBOX_IMAGE_PULL_FAILED"
    CONTAINER_START_FAILED = "DOCKER::SANDBOX_START_FAILED"
    SANDBOX_DELETE_FAILED = "DOCKER::SANDBOX_DELETE_FAILED"
    SANDBOX_NOT_RUNNING = "DOCKER::SANDBOX_NOT_RUNNING"
    SANDBOX_PAUSE_FAILED = "DOCKER::SANDBOX_PAUSE_FAILED"
    SANDBOX_NOT_PAUSED = "DOCKER::SANDBOX_NOT_PAUSED"
    SANDBOX_RESUME_FAILED = "DOCKER::SANDBOX_RESUME_FAILED"
    INVALID_EXPIRATION = "DOCKER::INVALID_EXPIRATION"
    EXPIRATION_NOT_EXTENDED = "DOCKER::EXPIRATION_NOT_EXTENDED"
    EXECD_START_FAILED = "DOCKER::SANDBOX_EXECD_START_FAILED"
    EXECD_DISTRIBUTION_FAILED = "DOCKER::SANDBOX_EXECD_DISTRIBUTION_FAILED"
    BOOTSTRAP_INSTALL_FAILED = "DOCKER::SANDBOX_BOOTSTRAP_INSTALL_FAILED"
    INVALID_ENTRYPOINT = "DOCKER::INVALID_ENTRYPOINT"
    INVALID_PORT = "DOCKER::INVALID_PORT"
    NETWORK_MODE_ENDPOINT_UNAVAILABLE = "DOCKER::NETWORK_MODE_ENDPOINT_UNAVAILABLE"
    
    # Kubernetes runtime error codes
    K8S_INITIALIZATION_ERROR = "KUBERNETES::INITIALIZATION_ERROR"
    K8S_SANDBOX_NOT_FOUND = "KUBERNETES::SANDBOX_NOT_FOUND"
    K8S_POD_FAILED = "KUBERNETES::POD_FAILED"
    K8S_POD_READY_TIMEOUT = "KUBERNETES::POD_READY_TIMEOUT"
    K8S_API_ERROR = "KUBERNETES::API_ERROR"
    K8S_POD_IP_NOT_AVAILABLE = "KUBERNETES::POD_IP_NOT_AVAILABLE"
    
    # Common error codes
    UNKNOWN_ERROR = "SANDBOX::UNKNOWN_ERROR"
    API_NOT_SUPPORTED = "SANDBOX::API_NOT_SUPPORTED"
    INVALID_METADATA_LABEL = "SANDBOX::INVALID_METADATA_LABEL"
    INVALID_PARAMETER = "SANDBOX::INVALID_PARAMETER"

    # Volume error codes
    INVALID_VOLUME_NAME = "VOLUME::INVALID_NAME"
    DUPLICATE_VOLUME_NAME = "VOLUME::DUPLICATE_NAME"
    INVALID_VOLUME_BACKEND = "VOLUME::INVALID_BACKEND"
    INVALID_MOUNT_PATH = "VOLUME::INVALID_MOUNT_PATH"
    INVALID_SUB_PATH = "VOLUME::INVALID_SUB_PATH"
    INVALID_HOST_PATH = "VOLUME::INVALID_HOST_PATH"
    HOST_PATH_NOT_ALLOWED = "VOLUME::HOST_PATH_NOT_ALLOWED"
    INVALID_PVC_NAME = "VOLUME::INVALID_PVC_NAME"
    UNSUPPORTED_VOLUME_BACKEND = "VOLUME::UNSUPPORTED_BACKEND"
    HOST_PATH_NOT_FOUND = "VOLUME::HOST_PATH_NOT_FOUND"
    HOST_PATH_CREATE_FAILED = "VOLUME::HOST_PATH_CREATE_FAILED"
    PVC_VOLUME_NOT_FOUND = "VOLUME::PVC_NOT_FOUND"
    PVC_VOLUME_INSPECT_FAILED = "VOLUME::PVC_INSPECT_FAILED"
    PVC_SUBPATH_UNSUPPORTED_DRIVER = "VOLUME::PVC_SUBPATH_UNSUPPORTED_DRIVER"
    INVALID_OSSFS_VERSION = "VOLUME::INVALID_OSSFS_VERSION"
    INVALID_OSSFS_ENDPOINT = "VOLUME::INVALID_OSSFS_ENDPOINT"
    INVALID_OSSFS_BUCKET = "VOLUME::INVALID_OSSFS_BUCKET"
    INVALID_OSSFS_OPTION = "VOLUME::INVALID_OSSFS_OPTION"
    INVALID_OSSFS_CREDENTIALS = "VOLUME::INVALID_OSSFS_CREDENTIALS"
    INVALID_OSSFS_MOUNT_ROOT = "VOLUME::INVALID_OSSFS_MOUNT_ROOT"
    OSSFS_PATH_NOT_FOUND = "VOLUME::OSSFS_PATH_NOT_FOUND"
    OSSFS_MOUNT_FAILED = "VOLUME::OSSFS_MOUNT_FAILED"
    OSSFS_UNMOUNT_FAILED = "VOLUME::OSSFS_UNMOUNT_FAILED"


__all__ = [
    "RESERVED_LABEL_PREFIX",
    "SANDBOX_ID_LABEL",
    "SANDBOX_EXPIRES_AT_LABEL",
    "SANDBOX_MANUAL_CLEANUP_LABEL",
    "SANDBOX_EMBEDDING_PROXY_PORT_LABEL",
    "SANDBOX_HTTP_PORT_LABEL",
    "SANDBOX_OSSFS_MOUNTS_LABEL",
    "OPEN_SANDBOX_INGRESS_HEADER",
    "OPEN_SANDBOX_EGRESS_AUTH_HEADER",
    "SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY",
    "EGRESS_RULES_ENV",
    "EGRESS_MODE_ENV",
    "OPENSANDBOX_EGRESS_TOKEN",
    "SandboxErrorCodes",
]


================================================
FILE: server/src/services/docker.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Docker-based implementation of SandboxService.

This module provides a Docker implementation of the sandbox service interface,
using Docker containers for sandbox lifecycle management.
"""

from __future__ import annotations

import inspect
import io
import json
import logging
import math
import os
import posixpath
import random
import socket
import tarfile
import time
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from threading import Lock, Timer
from typing import Any, Dict, Optional
from uuid import uuid4

import docker
from docker.errors import DockerException, ImageNotFound, NotFound as DockerNotFound
from fastapi import HTTPException, status

from src.api.schema import (
    CreateSandboxRequest,
    CreateSandboxResponse,
    Endpoint,
    ImageSpec,
    ListSandboxesRequest,
    ListSandboxesResponse,
    NetworkPolicy,
    PaginationInfo,
    RenewSandboxExpirationRequest,
    RenewSandboxExpirationResponse,
    Sandbox,
    SandboxStatus,
)
from src.config import AppConfig, get_config
from src.services.constants import (
    EGRESS_MODE_ENV,
    EGRESS_RULES_ENV,
    OPENSANDBOX_EGRESS_TOKEN,
    SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY,
    SANDBOX_EMBEDDING_PROXY_PORT_LABEL,
    SANDBOX_EXPIRES_AT_LABEL,
    SANDBOX_HTTP_PORT_LABEL,
    SANDBOX_ID_LABEL,
    SANDBOX_MANUAL_CLEANUP_LABEL,
    SANDBOX_OSSFS_MOUNTS_LABEL,
    SandboxErrorCodes,
)
from src.services.endpoint_auth import (
    build_egress_auth_headers,
    generate_egress_token,
    merge_endpoint_headers,
)
from src.services.helpers import (
    matches_filter,
    parse_memory_limit,
    parse_nano_cpus,
    parse_timestamp,
)
from src.services.ossfs_mixin import OSSFSMixin
from src.services.sandbox_service import SandboxService
from src.services.runtime_resolver import SecureRuntimeResolver
from src.services.validators import (
    calculate_expiration_or_raise,
    ensure_egress_configured,
    ensure_entrypoint,
    ensure_future_expiration,
    ensure_metadata_labels,
    ensure_timeout_within_limit,
    ensure_valid_host_path,
    ensure_volumes_valid,
)
logger = logging.getLogger(__name__)


def _running_inside_docker_container() -> bool:
    """Return True if the current process is running inside a Docker container."""
    return os.path.exists("/.dockerenv")


OPENSANDBOX_DIR = "/opt/opensandbox"
# Use posixpath for container-internal paths so they always use forward slashes,
# even when the server runs on Windows.
EXECED_INSTALL_PATH = posixpath.join(OPENSANDBOX_DIR, "execd")
BOOTSTRAP_PATH = posixpath.join(OPENSANDBOX_DIR, "bootstrap.sh")

HOST_NETWORK_MODE = "host"
BRIDGE_NETWORK_MODE = "bridge"
PENDING_FAILURE_TTL_SECONDS = int(os.environ.get("PENDING_FAILURE_TTL", "3600"))
EGRESS_SIDECAR_LABEL = "opensandbox.io/egress-sidecar-for"


@dataclass
class PendingSandbox:
    request: CreateSandboxRequest
    created_at: datetime
    expires_at: Optional[datetime]
    status: SandboxStatus


class DockerSandboxService(OSSFSMixin, SandboxService):
    """
    Docker-based implementation of SandboxService.

    This class implements sandbox lifecycle operations using Docker containers.
    """

    def __init__(self, config: Optional[AppConfig] = None):
        """
        Initialize Docker sandbox service.

        Initializes Docker service from environment variables.
        The service will read configuration from:
        - DOCKER_HOST: Docker daemon URL (e.g., 'unix://var/run/docker.sock' or 'tcp://127.0.0.1:2376')
        - DOCKER_TLS_CERTDIR: Directory containing TLS certificates
        - Other Docker environment variables as needed

        Note: Connection is not verified at initialization time.
        Connection errors will be raised when Docker operations are performed.
        """
        self.app_config = config or get_config()
        runtime_config = self.app_config.runtime
        if runtime_config.type != "docker":
            raise ValueError("DockerSandboxService requires runtime.type = 'docker'.")

        self.execd_image = runtime_config.execd_image
        self.network_mode = (self.app_config.docker.network_mode or HOST_NETWORK_MODE).lower()
        self._execd_archive_cache: Optional[bytes] = None
        self._api_timeout = self._resolve_api_timeout()
        try:
            # Initialize Docker service from environment variables
            client_kwargs = {}
            try:
                signature = inspect.signature(docker.from_env)
                if "timeout" in signature.parameters:
                    client_kwargs["timeout"] = self._api_timeout
            except (ValueError, TypeError):
                logger.debug(
                    "Unable to introspect docker.from_env signature; using default parameters."
                )
            self.docker_client = docker.from_env(**client_kwargs)
            if not client_kwargs:
                try:
                    self.docker_client.api.timeout = self._api_timeout
                except AttributeError:
                    logger.debug("Docker client API does not expose timeout attribute.")
            logger.info("Docker service initialized from environment")
        except Exception as e:  # noqa: BLE001
            # Common failure mode on macOS/dev machines: Docker daemon not running or socket path wrong.
            hint = ""
            msg = str(e)
            if isinstance(e, FileNotFoundError) or "No such file or directory" in msg:
                docker_host = os.environ.get("DOCKER_HOST", "")
                hint = (
                    " Docker daemon seems unavailable (unix socket not found). "
                    "Make sure Docker Desktop (or Colima/Rancher Desktop) is running. "
                    "If you use Colima on macOS, you may need to set "
                    "DOCKER_HOST=unix://${HOME}/.colima/default/docker.sock before starting the server. "
                    f"(current DOCKER_HOST='{docker_host}')"
                )
            raise HTTPException(
                status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
                detail={
                    "code": SandboxErrorCodes.DOCKER_INITIALIZATION_ERROR,
                    "message": f"Failed to initialize Docker service: {str(e)}.{hint}",
                },
            )
        self._expiration_lock = Lock()
        self._execd_archive_lock = Lock()
        self._sandbox_expirations: Dict[str, datetime] = {}
        self._expiration_timers: Dict[str, Timer] = {}
        self._pending_sandboxes: Dict[str, PendingSandbox] = {}
        self._pending_lock = Lock()
        self._pending_cleanup_timers: Dict[str, Timer] = {}
        self._ossfs_mount_lock = Lock()
        self._ossfs_mount_ref_counts: Dict[str, int] = {}
        self._restore_existing_sandboxes()

        # Initialize secure runtime resolver
        self.resolver = SecureRuntimeResolver(self.app_config)
        self.docker_runtime = self.resolver.get_docker_runtime()

    def _resolve_api_timeout(self) -> int:
        """Docker API timeout in seconds: [docker].api_timeout if set, else default 180."""
        cfg = self.app_config.docker.api_timeout
        if cfg is not None and cfg >= 1:
            return cfg
        return 180

    @contextmanager
    def _docker_operation(self, action: str, sandbox_id: Optional[str] = None):
        """Context manager to log duration for Docker API calls."""
        op_id = sandbox_id or "shared"
        start = time.perf_counter()
        try:
            yield
        except Exception as exc:
            elapsed_ms = (time.perf_counter() - start) * 1000
            logger.warning(
                "sandbox=%s | action=%s | duration=%.2f | error=%s",
                op_id,
                action,
                elapsed_ms,
                exc,
            )
            raise
        else:
            elapsed_ms = (time.perf_counter() - start) * 1000
            logger.info(
                "sandbox=%s | action=%s | duration=%.2f",
                op_id,
                action,
                elapsed_ms,
            )

    def _get_container_by_sandbox_id(self, sandbox_id: str):
        """Helper to fetch the Docker container associated with a sandbox ID."""
        label_selector = f"{SANDBOX_ID_LABEL}={sandbox_id}"
        try:
            containers = self.docker_client.containers.list(
                all=True, filters={"label": label_selector}
            )
        except DockerException as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.CONTAINER_QUERY_FAILED,
                    "message": f"Failed to query sandbox containers: {str(exc)}",
                },
            ) from exc

        if not containers:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail={
                    "code": SandboxErrorCodes.SANDBOX_NOT_FOUND,
                    "message": f"Sandbox {sandbox_id} not found.",
                },
            )

        return containers[0]

    def _schedule_expiration(
        self,
        sandbox_id: str,
        expires_at: datetime,
        *,
        update_expiration: bool = True,
        **expire_kwargs,
    ) -> None:
        """Schedule automatic sandbox termination at expiration time."""
        # Delay might already be negative if the timer should fire immediately
        delay = max(0.0, (expires_at - datetime.now(timezone.utc)).total_seconds())
        timer = Timer(
            delay,
            self._expire_sandbox,
            args=(sandbox_id,),
            kwargs=expire_kwargs or None,
        )
        timer.daemon = True
        with self._expiration_lock:
            # Replace existing timer (if any) so renew operations take effect immediately
            existing = self._expiration_timers.pop(sandbox_id, None)
            if existing:
                existing.cancel()
            if update_expiration:
                self._sandbox_expirations[sandbox_id] = expires_at
            self._expiration_timers[sandbox_id] = timer
        timer.start()

    def _remove_expiration_tracking(self, sandbox_id: str) -> None:
        """Remove expiration tracking and cancel any pending timers."""
        with self._expiration_lock:
            timer = self._expiration_timers.pop(sandbox_id, None)
            if timer:
                timer.cancel()
            self._sandbox_expirations.pop(sandbox_id, None)

    @staticmethod
    def _has_manual_cleanup(labels: Dict[str, str]) -> bool:
        """Return True when labels indicate manual cleanup mode."""
        return labels.get(SANDBOX_MANUAL_CLEANUP_LABEL, "").lower() == "true"

    def _get_tracked_expiration(
        self,
        sandbox_id: str,
        labels: Dict[str, str],
    ) -> Optional[datetime]:
        """Return the known expiration timestamp for the sandbox."""
        with self._expiration_lock:
            tracked = self._sandbox_expirations.get(sandbox_id)
        if tracked:
            return tracked
        label_value = labels.get(SANDBOX_EXPIRES_AT_LABEL)
        if label_value:
            return parse_timestamp(label_value)
        return None

    def _expire_sandbox(
        self,
        sandbox_id: str,
        fallback_mount_keys: Optional[list[str]] = None,
    ) -> None:
        """Timer callback to terminate expired sandboxes."""
        mount_keys: list[str] = []
        try:
            container = self._get_container_by_sandbox_id(sandbox_id)
        except HTTPException as exc:
            if exc.status_code == status.HTTP_404_NOT_FOUND:
                self._remove_expiration_tracking(sandbox_id)
                if fallback_mount_keys:
                    self._release_ossfs_mounts(fallback_mount_keys)
            else:
                with self._expiration_lock:
                    current_expires = self._sandbox_expirations.get(sandbox_id)
                now = datetime.now(timezone.utc)
                if current_expires and current_expires > now:
                    logger.info(
                        "Sandbox %s expiration was renewed; skipping retry.",
                        sandbox_id,
                    )
                else:
                    logger.warning(
                        "Failed to fetch sandbox %s for expiration: %s — "
                        "scheduling retry in 30s",
                        sandbox_id,
                        exc.detail,
                    )
                    retry_at = now + timedelta(seconds=30)
                    self._schedule_expiration(
                        sandbox_id,
                        retry_at,
                        update_expiration=False,
                        fallback_mount_keys=fallback_mount_keys,
                    )
            return

        with self._expiration_lock:
            current_expires = self._sandbox_expirations.get(sandbox_id)
        if current_expires and current_expires > datetime.now(timezone.utc):
            logger.info(
                "Sandbox %s was renewed (expires %s); aborting expiration.",
                sandbox_id,
                current_expires,
            )
            return

        labels = container.attrs.get("Config", {}).get("Labels") or {}
        mount_keys_raw = labels.get(SANDBOX_OSSFS_MOUNTS_LABEL, "[]")
        try:
            parsed_mount_keys = json.loads(mount_keys_raw)
            if isinstance(parsed_mount_keys, list):
                mount_keys = [key for key in parsed_mount_keys if isinstance(key, str) and key]
        except (TypeError, json.JSONDecodeError):
            mount_keys = []

        try:
            state = container.attrs.get("State", {})
            if state.get("Running", False):
                container.kill()
        except DockerException as exc:
            logger.warning("Failed to stop expired sandbox %s: %s", sandbox_id, exc)

        try:
            container.remove(force=True)
        except DockerException as exc:
            logger.warning("Failed to remove expired sandbox %s: %s", sandbox_id, exc)

        self._remove_expiration_tracking(sandbox_id)
        # Ensure sidecar is also cleaned up on expiration
        self._cleanup_egress_sidecar(sandbox_id)
        self._release_ossfs_mounts(mount_keys)

    def _restore_existing_sandboxes(self) -> None:
        """On startup, rebuild expiration timers for containers already running."""
        try:
            containers = self.docker_client.containers.list(all=True)
        except DockerException as exc:
            logger.warning("Failed to restore existing sandboxes: %s", exc)
            return

        restored = 0
        seen_sidecars: set[str] = set()
        restored_mount_refs: dict[str, int] = {}
        expired_entries: list[tuple[str, list[str]]] = []
        now = datetime.now(timezone.utc)

        def _parse_and_accumulate_mount_refs(labels: dict) -> list[str]:
            mount_keys_raw = labels.get(SANDBOX_OSSFS_MOUNTS_LABEL, "[]")
            try:
                parsed = json.loads(mount_keys_raw)
            except (TypeError, json.JSONDecodeError):
                parsed = []
            keys: list[str] = []
            if isinstance(parsed, list):
                for key in parsed:
                    if isinstance(key, str) and key:
                        keys.append(key)
                        restored_mount_refs[key] = restored_mount_refs.get(key, 0) + 1
            return keys

        # Pass 1: collect ref counts for ALL sandbox containers (alive + expired)
        # and schedule timers for alive ones.  Expired sandboxes are deferred to
        # pass 2 so that ref counts are fully populated before any release.
        for container in containers:
            labels = container.attrs.get("Config", {}).get("Labels") or {}
            sidecar_for = labels.get(EGRESS_SIDECAR_LABEL)
            if sidecar_for:
                seen_sidecars.add(sidecar_for)
                continue

            sandbox_id = labels.get(SANDBOX_ID_LABEL)
            if not sandbox_id:
                continue

            mount_keys = _parse_and_accumulate_mount_refs(labels)

            expires_label = labels.get(SANDBOX_EXPIRES_AT_LABEL)
            if expires_label:
                expires_at = parse_timestamp(expires_label)
            elif self._has_manual_cleanup(labels):
                restored += 1
                continue
            else:
                logger.warning(
                    "Sandbox %s missing expires-at label; skipping expiration scheduling.",
                    sandbox_id,
                )
                continue

            if expires_at <= now:
                logger.info("Sandbox %s already expired; terminating now.", sandbox_id)
                expired_entries.append((sandbox_id, mount_keys))
                continue

            self._schedule_expiration(sandbox_id, expires_at)
            restored += 1

        # Populate ref counts before expiring anything so _release_ossfs_mount
        # can properly decrement and unmount.
        with self._ossfs_mount_lock:
            self._ossfs_mount_ref_counts = restored_mount_refs

        # Pass 2: expire deferred sandboxes (ref counts are now available).
        # Cached mount keys are passed as fallback so that mounts are still
        # released even if the container vanishes between pass 1 and pass 2.
        for sandbox_id, cached_mount_keys in expired_entries:
            self._expire_sandbox(sandbox_id, fallback_mount_keys=cached_mount_keys)

        # Cleanup orphan sidecars (no matching sandbox container)
        for orphan_id in seen_sidecars:
            try:
                self._get_container_by_sandbox_id(orphan_id)
            except HTTPException as exc:
                if exc.status_code == status.HTTP_404_NOT_FOUND:
                    self._cleanup_egress_sidecar(orphan_id)
                else:
                    logger.warning(
                        "Failed to check sandbox %s for orphan sidecar cleanup: %s", orphan_id, exc
                    )

        if restored:
            logger.info("Restored expiration timers for %d sandbox(es).", restored)

    def _fetch_execd_archive(self) -> bytes:
        """Fetch (and memoize) the execd archive from the platform container."""
        if self._execd_archive_cache is not None:
            return self._execd_archive_cache

        with self._execd_archive_lock:
            # Double-check locking to ensure only one thread initializes the cache
            if self._execd_archive_cache is not None:
                return self._execd_archive_cache

            container = None
            try:
                try:
                    # Prefer a locally built image (e.g., opensandbox/execd:local); pull only if missing.
                    self.docker_client.images.get(self.execd_image)
                    logger.info("Found execd image %s locally; skipping pull", self.execd_image)
                except ImageNotFound:
                    with self._docker_operation(
                        f"pull execd image {self.execd_image}",
                        "execd-cache",
                    ):
                        self.docker_client.images.pull(self.execd_image)

                with self._docker_operation("execd cache create container", "execd-cache"):
                    container = self.docker_client.containers.create(
                        image=self.execd_image,
                        command=["tail", "-f", "/dev/null"],
                        name=f"sandbox-execd-{uuid4()}",
                        detach=True,
                        auto_remove=False,
                    )
                with self._docker_operation("execd cache start container", "execd-cache"):
                    container.start()
                    container.reload()
                    logger.info("Created sandbox execd archive for container %s", container.id)
            except DockerException as exc:
                raise HTTPException(
                    status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
                    detail={
                        "code": SandboxErrorCodes.EXECD_START_FAILED,
                        "message": f"Failed to start execd container: {str(exc)}",
                    },
                ) from exc

            try:
                with self._docker_operation("execd cache read archive", "execd-cache"):
                    stream, _ = container.get_archive("/execd")
                    data = b"".join(stream)
            except DockerException as exc:
                raise HTTPException(
                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                    detail={
                        "code": SandboxErrorCodes.EXECD_DISTRIBUTION_FAILED,
                        "message": f"Failed to read execd artifacts: {str(exc)}",
                    },
                ) from exc
            finally:
                if container:
                    try:
                        with self._docker_operation("execd cache cleanup container", "execd-cache"):
                            container.remove(force=True)
                    except DockerException as cleanup_exc:
                        logger.warning(
                            "Failed to cleanup temporary execd container: %s", cleanup_exc
                        )

            self._execd_archive_cache = data
            logger.info("Dumped execd archive to memory")
            return data

    def _container_to_sandbox(self, container, sandbox_id: Optional[str] = None) -> Sandbox:
        labels = container.attrs.get("Config", {}).get("Labels") or {}
        resolved_id = sandbox_id or labels.get(SANDBOX_ID_LABEL)
        if not resolved_id:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.SANDBOX_NOT_FOUND,
                    "message": "Container missing sandbox ID label.",
                },
            )

        status_section = container.attrs.get("State", {})
        status_value = (status_section.get("Status") or container.status or "").lower()
        running = status_section.get("Running", False)
        paused = status_section.get("Paused", False)
        restarting = status_section.get("Restarting", False)
        exit_code = status_section.get("ExitCode")
        finished_at = status_section.get("FinishedAt")

        if running and not paused:
            state = "Running"
            reason = "CONTAINER_RUNNING"
            message = "Sandbox container is running."
        elif paused:
            state = "Paused"
            reason = "CONTAINER_PAUSED"
            message = "Sandbox container is paused."
        elif restarting:
            state = "Running"
            reason = "CONTAINER_RESTARTING"
            message = "Sandbox container is restarting."
        elif status_value in {"created", "starting"}:
            state = "Pending"
            reason = "CONTAINER_STARTING"
            message = "Sandbox container is starting."
        elif status_value in {"exited", "dead"}:
            if exit_code == 0:
                state = "Terminated"
                reason = "CONTAINER_EXITED"
                message = "Sandbox container exited successfully."
            else:
                state = "Failed"
                reason = "CONTAINER_EXITED_ERROR"
                message = f"Sandbox container exited with code {exit_code}."
        else:
            state = "Unknown"
            reason = "CONTAINER_STATE_UNKNOWN"
            message = f"Sandbox container is in state '{status_value or 'unknown'}'."

        metadata = {
            key: value
            for key, value in labels.items()
            if key not in {SANDBOX_ID_LABEL, SANDBOX_EXPIRES_AT_LABEL, SANDBOX_MANUAL_CLEANUP_LABEL}
        } or None
        entrypoint = container.attrs.get("Config", {}).get("Cmd") or []
        if isinstance(entrypoint, str):
            entrypoint = [entrypoint]
        image_tags = container.image.tags
        image_uri = image_tags[0] if image_tags else container.image.short_id
        image_spec = ImageSpec(uri=image_uri)

        created_at = parse_timestamp(container.attrs.get("Created"))
        last_transition_at = (
            parse_timestamp(finished_at)
            if finished_at and finished_at != "0001-01-01T00:00:00Z"
            else created_at
        )
        expires_at = self._get_tracked_expiration(resolved_id, labels)

        status_info = SandboxStatus(
            state=state,
            reason=reason,
            message=message,
            last_transition_at=last_transition_at,
        )

        return Sandbox(
            id=resolved_id,
            image=image_spec,
            status=status_info,
            metadata=metadata,
            entrypoint=entrypoint,
            expiresAt=expires_at,
            createdAt=created_at,
        )

    def _ensure_directory(self, container, path: str, sandbox_id: Optional[str] = None) -> None:
        """Create a directory within the target container if it does not exist."""
        if not path or path == "/":
            return
        normalized_path = path.rstrip("/")
        if not normalized_path:
            return
        tar_stream = io.BytesIO()
        with tarfile.open(fileobj=tar_stream, mode="w") as tar:
            dir_info = tarfile.TarInfo(name=normalized_path.lstrip("/"))
            dir_info.type = tarfile.DIRTYPE
            dir_info.mode = 0o755
            dir_info.mtime = int(time.time())
            tar.addfile(dir_info)
        tar_stream.seek(0)
        try:
            with self._docker_operation(f"ensure directory {normalized_path}", sandbox_id):
                container.put_archive(path="/", data=tar_stream.getvalue())
        except DockerException as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.EXECD_DISTRIBUTION_FAILED,
                    "message": f"Failed to create directory {path} in sandbox: {str(exc)}",
                },
            ) from exc

    def _copy_execd_to_container(self, container, sandbox_id: str) -> None:
        """Copy execd artifacts from the platform container into the sandbox."""
        archive = self._fetch_execd_archive()
        target_parent = posixpath.dirname(EXECED_INSTALL_PATH.rstrip("/")) or "/"
        self._ensure_directory(container, target_parent, sandbox_id)
        try:
            with self._docker_operation("copy execd archive to sandbox", sandbox_id):
                container.put_archive(path=target_parent, data=archive)
        except DockerException as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.EXECD_DISTRIBUTION_FAILED,
                    "message": f"Failed to copy execd into sandbox: {str(exc)}",
                },
            ) from exc

    def _install_bootstrap_script(self, container, sandbox_id: str) -> None:
        """Install the bootstrap launcher that starts execd then chains to user command."""
        script_path = BOOTSTRAP_PATH
        script_dir = posixpath.dirname(script_path)
        self._ensure_directory(container, script_dir, sandbox_id)
        execd_binary = EXECED_INSTALL_PATH
        script_content = "\n".join(
            [
                "#!/bin/sh",
                "set -e",
                f"{execd_binary} >/tmp/execd.log 2>&1 &",
                'exec "$@"',
                "",
            ]
        ).encode("utf-8")

        tar_stream = io.BytesIO()
        with tarfile.open(fileobj=tar_stream, mode="w") as tar:
            info = tarfile.TarInfo(name=script_path.lstrip("/"))
            info.mode = 0o755
            info.size = len(script_content)
            info.mtime = int(time.time())
            tar.addfile(info, io.BytesIO(script_content))
        tar_stream.seek(0)
        try:
            with self._docker_operation("install bootstrap script", sandbox_id):
                container.put_archive(path="/", data=tar_stream.getvalue())
        except DockerException as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.BOOTSTRAP_INSTALL_FAILED,
                    "message": f"Failed to install bootstrap launcher: {str(exc)}",
                },
            ) from exc

    def _prepare_sandbox_runtime(self, container, sandbox_id: str) -> None:
        """Copy execd artifacts and bootstrap launcher into the sandbox container."""
        self._copy_execd_to_container(container, sandbox_id)
        self._install_bootstrap_script(container, sandbox_id)

    def _prepare_creation_context(
        self,
        request: CreateSandboxRequest,
    ) -> tuple[str, datetime, Optional[datetime]]:
        sandbox_id = self.generate_sandbox_id()
        created_at = datetime.now(timezone.utc)
        expires_at = None
        if request.timeout is not None:
            expires_at = calculate_expiration_or_raise(created_at, request.timeout)
        return sandbox_id, created_at, expires_at

    @staticmethod
    def _allocate_host_port(
        min_port: int = 40000, max_port: int = 60000, attempts: int = 50
    ) -> Optional[int]:
        """Find an available TCP port on the host within the given range."""
        for _ in range(attempts):
            port = random.randint(min_port, max_port)
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
                sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
                try:
                    sock.bind(("0.0.0.0", port))
                except OSError:
                    continue
                return port
        return None

    async def create_sandbox(self, request: CreateSandboxRequest) -> CreateSandboxResponse:
        """
        Create a new sandbox from a container image using Docker.

        Args:
            request: Sandbox creation request

        Returns:
            CreateSandboxResponse: Created sandbox information

        Raises:
            HTTPException: If sandbox creation fails
        """
        ensure_entrypoint(request.entrypoint)
        ensure_metadata_labels(request.metadata)
        ensure_timeout_within_limit(
            request.timeout,
            self.app_config.server.max_sandbox_timeout_seconds,
        )
        self._ensure_network_policy_support(request)
        self._validate_network_exists()
        pvc_inspect_cache = self._validate_volumes(request)
        sandbox_id, created_at, expires_at = self._prepare_creation_context(request)
        return self._provision_sandbox(sandbox_id, request, created_at, expires_at, pvc_inspect_cache)

    def _async_provision_worker(
        self,
        sandbox_id: str,
        request: CreateSandboxRequest,
        created_at: datetime,
        expires_at: Optional[datetime],
        pvc_inspect_cache: Optional[dict[str, dict]] = None,
    ) -> None:
        try:
            self._provision_sandbox(sandbox_id, request, created_at, expires_at, pvc_inspect_cache)
        except HTTPException as exc:
            message = exc.detail.get("message") if isinstance(exc.detail, dict) else str(exc)
            self._mark_pending_failed(sandbox_id, message or "Sandbox provisioning failed.")
            self._cleanup_failed_containers(sandbox_id)
            self._schedule_pending_cleanup(sandbox_id)
        except Exception as exc:  # noqa: BLE001
            logger.exception("Unexpected error provisioning sandbox %s: %s", sandbox_id, exc)
            self._mark_pending_failed(sandbox_id, str(exc))
            self._cleanup_failed_containers(sandbox_id)
            self._schedule_pending_cleanup(sandbox_id)
        else:
            self._remove_pending_sandbox(sandbox_id)

    def _mark_pending_failed(self, sandbox_id: str, message: str) -> None:
        with self._pending_lock:
            pending = self._pending_sandboxes.get(sandbox_id)
            if not pending:
                return
            pending.status = SandboxStatus(
                state="Failed",
                reason="PROVISIONING_ERROR",
                message=message,
                last_transition_at=datetime.now(timezone.utc),
            )

    def _cleanup_failed_containers(self, sandbox_id: str) -> None:
        """
        Best-effort cleanup for containers left behind after a failed provision.
        """
        label_selector = f"{SANDBOX_ID_LABEL}={sandbox_id}"
        try:
            containers = self.docker_client.containers.list(
                all=True, filters={"label": label_selector}
            )
        except DockerException as exc:
            logger.warning("sandbox=%s | cleanup listing failed containers: %s", sandbox_id, exc)
            self._cleanup_egress_sidecar(sandbox_id)
            return

        for container in containers:
            labels = container.attrs.get("Config", {}).get("Labels") or {}
            mount_keys_raw = labels.get(SANDBOX_OSSFS_MOUNTS_LABEL, "[]")
            try:
                mount_keys: list[str] = json.loads(mount_keys_raw)
            except (TypeError, json.JSONDecodeError):
                mount_keys = []
            try:
                with self._docker_operation("cleanup failed sandbox container", sandbox_id):
                    container.remove(force=True)
            except DockerException as exc:
                logger.warning(
                    "sandbox=%s | failed to remove leftover container %s: %s",
                    sandbox_id,
                    container.id,
                    exc,
                )
            finally:
                self._release_ossfs_mounts(mount_keys)
        # Always attempt to cleanup sidecar as well
        self._cleanup_egress_sidecar(sandbox_id)

    def _remove_pending_sandbox(self, sandbox_id: str) -> None:
        with self._pending_lock:
            timer = self._pending_cleanup_timers.pop(sandbox_id, None)
            if timer:
                timer.cancel()
            self._pending_sandboxes.pop(sandbox_id, None)

    def _get_pending_sandbox(self, sandbox_id: str) -> Optional[PendingSandbox]:
        with self._pending_lock:
            pending = self._pending_sandboxes.get(sandbox_id)
            return pending

    def _iter_pending_sandboxes(self) -> list[tuple[str, PendingSandbox]]:
        with self._pending_lock:
            return list(self._pending_sandboxes.items())

    @staticmethod
    def _pending_to_sandbox(sandbox_id: str, pending: PendingSandbox) -> Sandbox:
        return Sandbox(
            id=sandbox_id,
            image=pending.request.image,
            status=pending.status,
            metadata=pending.request.metadata,
            entrypoint=pending.request.entrypoint,
            expiresAt=pending.expires_at,
            createdAt=pending.created_at,
        )

    def _update_container_labels(self, container, labels: Dict[str, str]) -> None:
        """
        Update container labels, falling back to raw API if docker-py lacks support.
        """
        try:
            container.update(labels=labels)
        except TypeError:
            # Older docker-py versions do not accept labels; call low-level API directly.
            url = self.docker_client.api._url(f"/containers/{container.id}/update")  # noqa: SLF001
            data = {"Labels": labels}
            self.docker_client.api._post_json(url, data=data)  # noqa: SLF001
        container.reload()

    def _schedule_pending_cleanup(self, sandbox_id: str) -> None:
        def _cleanup():
            self._remove_pending_sandbox(sandbox_id)

        timer = Timer(PENDING_FAILURE_TTL_SECONDS, _cleanup)
        timer.daemon = True
        with self._pending_lock:
            existing = self._pending_cleanup_timers.pop(sandbox_id, None)
            if existing:
                existing.cancel()
            self._pending_cleanup_timers[sandbox_id] = timer
        timer.start()

    def _pull_image(
        self,
        image_uri: str,
        auth_config: Optional[dict],
        sandbox_id: str,
    ) -> None:
        try:
            with self._docker_operation(f"pull image {image_uri}", sandbox_id):
                self.docker_client.images.pull(image_uri, auth_config=auth_config)
        except DockerException as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.IMAGE_PULL_FAILED,
                    "message": f"Failed to pull image {image_uri}: {str(exc)}",
                },
            ) from exc

    def _ensure_image_available(
        self,
        image_uri: str,
        auth_config: Optional[dict],
        sandbox_id: str,
    ) -> None:
        try:
            with self._docker_operation(f"inspect image {image_uri}", sandbox_id):
                self.docker_client.images.get(image_uri)
                logger.debug("Sandbox %s using cached image %s", sandbox_id, image_uri)
        except ImageNotFound:
            self._pull_image(image_uri, auth_config, sandbox_id)
        except DockerException as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.IMAGE_PULL_FAILED,
                    "message": f"Failed to inspect image {image_uri}: {str(exc)}",
                },
            ) from exc

    def _provision_sandbox(
        self,
        sandbox_id: str,
        request: CreateSandboxRequest,
        created_at: datetime,
        expires_at: Optional[datetime],
        pvc_inspect_cache: Optional[dict[str, dict]] = None,
    ) -> CreateSandboxResponse:
        labels, environment = self._build_labels_and_env(sandbox_id, request, expires_at)
        image_uri, auth_config = self._resolve_image_auth(request, sandbox_id)
        mem_limit, nano_cpus = self._resolve_resource_limits(request)
        egress_token: Optional[str] = None

        # Prepare OSSFS mounts first so binds can reference mounted host paths.
        ossfs_mount_keys = self._prepare_ossfs_mounts(request.volumes)
        if ossfs_mount_keys:
            labels[SANDBOX_OSSFS_MOUNTS_LABEL] = json.dumps(
                ossfs_mount_keys,
                separators=(",", ":"),
            )

        sidecar_container = None
        try:
            # Build volume bind mounts from request volumes.
            # pvc_inspect_cache carries Docker volume inspect data from the
            # validation phase, avoiding a redundant API call.
            volume_binds = self._build_volume_binds(request.volumes, pvc_inspect_cache)

            host_config_kwargs: Dict[str, Any]
            exposed_ports: Optional[list[str]] = None

            if request.network_policy:
                egress_token = generate_egress_token()
                labels[SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY] = egress_token
                host_execd_port, host_http_port = self._allocate_distinct_host_ports()
                sidecar_container = self._start_egress_sidecar(
                    sandbox_id=sandbox_id,
                    network_policy=request.network_policy,
                    egress_token=egress_token,
                    host_execd_port=host_execd_port,
                    host_http_port=host_http_port,
                )
                labels[SANDBOX_EMBEDDING_PROXY_PORT_LABEL] = str(host_execd_port)
                labels[SANDBOX_HTTP_PORT_LABEL] = str(host_http_port)
                host_config_kwargs = self._base_host_config_kwargs(
                    mem_limit, nano_cpus, f"container:{sidecar_container.id}"
                )
                # Drop NET_ADMIN for the main container; only the sidecar should keep it
                cap_drop = set(host_config_kwargs.get("cap_drop") or [])
                cap_drop.add("NET_ADMIN")
                if cap_drop:
                    host_config_kwargs["cap_drop"] = list(cap_drop)
            else:
                host_config_kwargs = self._base_host_config_kwargs(
                    mem_limit, nano_cpus, self.network_mode
                )
                if self.network_mode != HOST_NETWORK_MODE:
                    host_execd_port, host_http_port = self._allocate_distinct_host_ports()
                    port_bindings = {
                        "44772": ("0.0.0.0", host_execd_port),
                        "8080": ("0.0.0.0", host_http_port),
                    }
                    host_config_kwargs["port_bindings"] = port_bindings
                    exposed_ports = list(port_bindings.keys())
                    labels[SANDBOX_EMBEDDING_PROXY_PORT_LABEL] = str(host_execd_port)
                    labels[SANDBOX_HTTP_PORT_LABEL] = str(host_http_port)

            # Inject volume bind mounts into Docker host config
            if volume_binds:
                host_config_kwargs["binds"] = volume_binds

            self._create_and_start_container(
                sandbox_id,
                image_uri,
                request.entrypoint,
                labels,
                environment,
                host_config_kwargs,
                exposed_ports,
            )
        except Exception:
            if sidecar_container is not None:
                try:
                    sidecar_container.remove(force=True)
                except DockerException as cleanup_exc:
                    logger.warning(
                        "Failed to cleanup egress sidecar for sandbox %s: %s",
                        sandbox_id,
                        cleanup_exc,
                    )
            self._release_ossfs_mounts(ossfs_mount_keys)
            raise

        status_info = SandboxStatus(
            state="Running",
            reason="CONTAINER_RUNNING",
            message="Sandbox container started successfully.",
            last_transition_at=created_at,
        )

        if expires_at is not None:
            self._schedule_expiration(sandbox_id, expires_at)

        return CreateSandboxResponse(
            id=sandbox_id,
            status=status_info,
            metadata=request.metadata,
            expiresAt=expires_at,
            createdAt=created_at,
            entrypoint=request.entrypoint,
        )

    def _is_user_defined_network(self) -> bool:
        """Return True when network_mode is a named user-defined network (not host/bridge/none/container:*)."""
        return (
            self.network_mode not in {HOST_NETWORK_MODE, BRIDGE_NETWORK_MODE, "none"}
            and not self.network_mode.startswith("container:")
        )

    def _validate_network_exists(self) -> None:
        """Verify the configured user-defined Docker network exists before creating a sandbox."""
        if not self._is_user_defined_network():
            return
        try:
            self.docker_client.networks.get(self.network_mode)
        except DockerNotFound:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_PARAMETER,
                    "message": (
                        f"Docker network '{self.network_mode}' does not exist. "
                        "Create it first with 'docker network create <name>'."
                    ),
                },
            )
        except DockerException as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.CONTAINER_START_FAILED,
                    "message": f"Failed to inspect Docker network '{self.network_mode}': {exc}",
                },
            ) from exc

    def _ensure_network_policy_support(self, request: CreateSandboxRequest) -> None:
        """
        Validate that network policy can be honored under the current runtime config.

        This includes Docker-specific checks (network_mode) and common checks (egress.image).
        """
        if not request.network_policy:
            return

        # Docker-specific validation: network_mode must be bridge
        if self.network_mode == HOST_NETWORK_MODE:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_PARAMETER,
                    "message": "networkPolicy is not supported when docker network_mode=host.",
                },
            )

        # User-defined networks cannot be combined with networkPolicy: the egress sidecar
        # always runs on the default bridge, which would silently discard the configured network.
        if self._is_user_defined_network():
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_PARAMETER,
                    "message": (
                        f"networkPolicy is not supported when docker network_mode='{self.network_mode}' "
                        "(user-defined network). Use network_mode='bridge' to enable network policy enforcement."
                    ),
                },
            )

        # Common validation: egress.image must be configured
        ensure_egress_configured(request.network_policy, self.app_config.egress)

    def _validate_volumes(self, request: CreateSandboxRequest) -> dict[str, dict]:
        """
        Validate volume definitions for Docker runtime.

        Performs comprehensive validation:
        - Calls shared volume validation (name, mount path, sub path, backend count)
        - Delegates to backend-specific validators for Docker-level checks

        Args:
            request: Sandbox creation request.

        Returns:
            A dict mapping PVC volume names (``pvc.claimName``) to their
            ``docker volume inspect`` results.  Empty when there are no PVC
            volumes.  This data is passed to ``_build_volume_binds`` so that
            bind generation does not need a second API call.

        Raises:
            HTTPException: When any validation fails.
        """
        if not request.volumes:
            return {}

        # Shared validation: names, mount paths, sub paths, backend count, host path allowlist
        allowed_prefixes = self.app_config.storage.allowed_host_paths or None
        ensure_volumes_valid(request.volumes, allowed_host_prefixes=allowed_prefixes)

        pvc_inspect_cache: dict[str, dict] = {}
        for volume in request.volumes:
            if volume.host is not None:
                self._validate_host_volume(volume, allowed_prefixes)
            elif volume.pvc is not None:
                vol_info = self._validate_pvc_volume(volume)
                pvc_inspect_cache[volume.pvc.claim_name] = vol_info
            elif volume.ossfs is not None:
                self._validate_ossfs_volume(volume)

        return pvc_inspect_cache

    @staticmethod
    def _validate_host_volume(volume, allowed_prefixes: Optional[list[str]]) -> None:
        """
        Docker-specific validation for host bind mount volumes.

        Validates that the resolved host path (host.path + optional subPath)
        remains within allowed prefixes, then ensures the directory exists on
        the filesystem — creating it automatically if it does not.

        Args:
            volume: Volume with host backend.
            allowed_prefixes: Optional allowlist of host path prefixes.

        Raises:
            HTTPException: When the resolved path is invalid or cannot be created.
        """
        resolved_path = volume.host.path
        if volume.sub_path:
            resolved_path = os.path.normpath(os.path.join(resolved_path, volume.sub_path))

        # Defense in depth: re-validate the resolved path against the
        # allowlist.  Even though sub_path traversal (../) is blocked by
        # ensure_valid_sub_path(), normalizing and re-checking prevents
        # any edge-case bypass.
        if allowed_prefixes and resolved_path != volume.host.path:
            ensure_valid_host_path(resolved_path, allowed_prefixes)

        try:
            os.makedirs(resolved_path, exist_ok=True)
        except OSError as e:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.HOST_PATH_CREATE_FAILED,
                    "message": (
                        f"Volume '{volume.name}': could not ensure host path "
                        f"directory exists at '{resolved_path}': {type(e).__name__}"
                    ),
                },
            )

    def _validate_pvc_volume(self, volume) -> dict:
        """
        Docker-specific validation for PVC (named volume) backend.

        In Docker runtime, the ``pvc`` backend maps to a Docker named volume.
        ``pvc.claimName`` is used as the Docker volume name.  The volume must
        already exist (created via ``docker volume create``).

        When ``subPath`` is specified, the volume must use the ``local`` driver
        so that the host-side ``Mountpoint`` is a real filesystem path.  The
        resolved path (``Mountpoint + subPath``) is validated for path-traversal
        safety but *not* for existence, because the Mountpoint directory is
        typically owned by root and may not be stat-able by the server process.

        Args:
            volume: Volume with pvc backend.

        Returns:
            The ``docker volume inspect`` result dict for the named volume.

        Raises:
            HTTPException: When the named volume does not exist, inspection
                fails, or subPath constraints are violated.
        """
        volume_name = volume.pvc.claim_name
        try:
            vol_info = self.docker_client.api.inspect_volume(volume_name)
        except DockerNotFound:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.PVC_VOLUME_NOT_FOUND,
                    "message": (
                        f"Volume '{volume.name}': Docker named volume '{volume_name}' "
                        "does not exist. Named volumes must be created before sandbox "
                        "creation (e.g., 'docker volume create <name>')."
                    ),
                },
            )
        except DockerException as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.PVC_VOLUME_INSPECT_FAILED,
                    "message": (
                        f"Volume '{volume.name}': failed to inspect Docker named volume "
                        f"'{volume_name}': {exc}"
                    ),
                },
            ) from exc

        # --- subPath validation for Docker named volumes ---
        if volume.sub_path:
            driver = vol_info.get("Driver", "")
            if driver != "local":
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    detail={
                        "code": SandboxErrorCodes.PVC_SUBPATH_UNSUPPORTED_DRIVER,
                        "message": (
                            f"Volume '{volume.name}': subPath is only supported for "
                            f"Docker named volumes using the 'local' driver, but "
                            f"volume '{volume_name}' uses driver '{driver}'."
                        ),
                    },
                )

            mountpoint = vol_info.get("Mountpoint", "")
            if not mountpoint:
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    detail={
                        "code": SandboxErrorCodes.PVC_SUBPATH_UNSUPPORTED_DRIVER,
                        "message": (
                            f"Volume '{volume.name}': cannot resolve subPath because "
                            f"Docker named volume '{volume_name}' has no Mountpoint."
                        ),
                    },
                )

            resolved_path = posixpath.normpath(
                posixpath.join(mountpoint, volume.sub_path)
            )

            # ── Path-escape check (lexical + symlink) ──
            #
            # 1. Lexical check via normpath + path-boundary-aware startswith.
            #    Use mountpoint + "/" to avoid false positives when one
            #    mountpoint is a prefix of another (e.g., …/_data vs …/_data2).
            #    Docker Mountpoint paths are always POSIX, so use "/" directly.
            mountpoint_prefix = (
                mountpoint if mountpoint.endswith("/") else mountpoint + "/"
            )
            if resolved_path != mountpoint and not resolved_path.startswith(
                mountpoint_prefix
            ):
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    detail={
                        "code": SandboxErrorCodes.INVALID_SUB_PATH,
                        "message": (
                            f"Volume '{volume.name}': resolved subPath escapes the "
                            f"volume mountpoint."
                        ),
                    },
                )

            # 2. Symlink-aware check (best-effort).
            #    Docker volume Mountpoint dirs are typically root-owned and not
            #    readable by the server process.  Using strict=True so that
            #    realpath raises OSError when it cannot traverse a directory
            #    instead of silently returning the unresolved lexical path
            #    (which would make this check a no-op).  When the path IS
            #    accessible, this detects symlink-escape attacks (e.g., a
            #    malicious symlink datasets -> /).
            try:
                canonical_mountpoint = os.path.realpath(
                    mountpoint, strict=True
                )
                canonical_resolved = os.path.realpath(
                    resolved_path, strict=True
                )
                # os.path.realpath returns OS-native separators, so use
                # os.sep here (unlike the lexical check above which operates
                # on POSIX-normalised Docker Mountpoint strings).
                canonical_prefix = (
                    canonical_mountpoint
                    if canonical_mountpoint.endswith(os.sep)
                    else canonical_mountpoint + os.sep
                )
                if (
                    canonical_resolved != canonical_mountpoint
                    and not canonical_resolved.startswith(canonical_prefix)
                ):
                    raise HTTPException(
                        status_code=status.HTTP_400_BAD_REQUEST,
                        detail={
                            "code": SandboxErrorCodes.INVALID_SUB_PATH,
                            "message": (
                                f"Volume '{volume.name}': resolved subPath escapes "
                                f"the volume mountpoint after symlink resolution."
                            ),
                        },
                    )
            except OSError:
                # Cannot access volume paths (expected for non-root server).
                # Lexical validation above is still enforced; the symlink
                # check is skipped because we cannot resolve the real paths.
                pass

            # NOTE: We intentionally do NOT check os.path.exists(resolved_path)
            # here.  Docker volume Mountpoint directories (e.g.,
            # /var/lib/docker/volumes/…/_data) are typically owned by root and
            # not readable by the server process.  os.path.exists() returns
            # False when the process lacks permission to stat the path, causing
            # false-negative rejections.  If the subPath does not actually
            # exist, Docker will report the error at container creation time.

        return vol_info

    def _build_volume_binds(
        self,
        volumes: Optional[list],
        pvc_inspect_cache: Optional[dict[str, dict]] = None,
    ) -> list[str]:
        """
        Convert Volume definitions into Docker bind/volume mount specs.

        Supported backends:
        - ``host``: host path bind mount.
          Format: ``/host/path:/container/path:ro|rw``
        - ``pvc``: Docker named volume mount.
          Format (no subPath): ``volume-name:/container/path:ro|rw``
          Docker recognises non-absolute-path sources as named volume references.
          Format (with subPath): ``/var/lib/docker/volumes/…/subdir:/container/path:ro|rw``
          When subPath is specified, the volume's host Mountpoint (obtained from
          ``pvc_inspect_cache``) is used to produce a standard bind mount.
        - ``ossfs``: host bind mount to runtime-mounted OSSFS path.
          Format: ``/mnt/ossfs/<bucket>/<subPath?>:/container/path:ro|rw``

        Each mount string uses ``:ro`` for read-only and ``:rw`` for read-write
        (default).

        Args:
            volumes: List of Volume objects from the creation request.
            pvc_inspect_cache: Dict mapping PVC claimNames to their
                ``docker volume inspect`` results, populated by
                ``_validate_volumes``.  Avoids a redundant API call and
                eliminates the race window between validation and bind
                generation.

        Returns:
            List of Docker bind/volume mount strings.
        """
        if not volumes:
            return []

        cache = pvc_inspect_cache or {}
        binds: list[str] = []
        for volume in volumes:
            container_path = volume.mount_path
            mode = "ro" if volume.read_only else "rw"

            if volume.host is not None:
                # Resolve the concrete host path (host.path + optional subPath)
                host_path = volume.host.path
                if volume.sub_path:
                    host_path = os.path.normpath(
                        os.path.join(host_path, volume.sub_path)
                    )
                binds.append(f"{host_path}:{container_path}:{mode}")

            elif volume.pvc is not None:
                if volume.sub_path:
                    # Resolve the named volume's host-side Mountpoint and append
                    # the subPath to produce a regular bind mount.  Validation
                    # has already ensured the driver is "local" and the resolved
                    # path is safe.  Reuse cached inspect data to avoid a
                    # redundant Docker API call and potential race condition.
                    vol_info = cache.get(volume.pvc.claim_name, {})
                    mountpoint = vol_info.get("Mountpoint", "")
                    resolved = posixpath.normpath(
                        posixpath.join(mountpoint, volume.sub_path)
                    )
                    binds.append(f"{resolved}:{container_path}:{mode}")
                else:
                    # No subPath: use claimName directly as Docker volume ref.
                    binds.append(
                        f"{volume.pvc.claim_name}:{container_path}:{mode}"
                    )
            elif volume.ossfs is not None:
                _, host_path = self._resolve_ossfs_paths(volume)
                binds.append(f"{host_path}:{container_path}:{mode}")

        return binds

    def list_sandboxes(self, request: ListSandboxesRequest) -> ListSandboxesResponse:
        """
        List sandboxes with optional filtering and pagination.
        """
        try:
            containers = self.docker_client.containers.list(
                all=True,
                filters={"label": [SANDBOX_ID_LABEL]},
            )
        except DockerException as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.CONTAINER_QUERY_FAILED,
                    "message": f"Failed to query sandbox containers: {str(exc)}",
                },
            ) from exc

        sandboxes_by_id: dict[str, Sandbox] = {}
        container_ids: set[str] = set()
        for container in containers:
            labels = container.attrs.get("Config", {}).get("Labels") or {}
            sandbox_id = labels.get(SANDBOX_ID_LABEL)
            if not sandbox_id:
                continue
            sandbox_obj = self._container_to_sandbox(container, sandbox_id)
            container_ids.add(sandbox_id)
            if matches_filter(sandbox_obj, request.filter):
                sandboxes_by_id[sandbox_id] = sandbox_obj

        for sandbox_id, pending in self._iter_pending_sandboxes():
            if sandbox_id in container_ids:
                # If a real container exists, prefer its state regardless of filter outcome.
                continue
            sandbox_obj = self._pending_to_sandbox(sandbox_id, pending)
            if matches_filter(sandbox_obj, request.filter):
                sandboxes_by_id[sandbox_id] = sandbox_obj

        sandboxes: list[Sandbox] = list(sandboxes_by_id.values())

        sandboxes.sort(key=lambda s: s.created_at or datetime.min, reverse=True)

        if request.pagination:
            page = request.pagination.page
            page_size = request.pagination.page_size
        else:
            page = 1
            page_size = 20

        total_items = len(sandboxes)
        total_pages = math.ceil(total_items / page_size) if total_items else 0
        start_index = (page - 1) * page_size
        end_index = start_index + page_size
        items = sandboxes[start_index:end_index]
        has_next_page = page < total_pages

        pagination_info = PaginationInfo(
            page=page,
            page_size=page_size,
            total_items=total_items,
            total_pages=total_pages,
            has_next_page=has_next_page,
        )

        return ListSandboxesResponse(items=items, pagination=pagination_info)

    def get_sandbox(self, sandbox_id: str) -> Sandbox:
        """
        Fetch a sandbox by id.

        Args:
            sandbox_id: Unique sandbox identifier

        Returns:
            Sandbox: Complete sandbox information

        Raises:
            HTTPException: If sandbox not found
        """
        # Prefer real container state; fall back to pending record only if no container exists.
        try:
            container = self._get_container_by_sandbox_id(sandbox_id)
        except HTTPException as exc:
            if exc.status_code != status.HTTP_404_NOT_FOUND:
                raise
            pending = self._get_pending_sandbox(sandbox_id)
            if pending:
                return self._pending_to_sandbox(sandbox_id, pending)
            raise
        return self._container_to_sandbox(container, sandbox_id)

    def delete_sandbox(self, sandbox_id: str) -> None:
        """
        Delete a sandbox using Docker.

        Args:
            sandbox_id: Unique sandbox identifier

        Raises:
            HTTPException: If sandbox not found or deletion fails
        """
        container = self._get_container_by_sandbox_id(sandbox_id)
        labels = container.attrs.get("Config", {}).get("Labels") or {}
        mount_keys_raw = labels.get(SANDBOX_OSSFS_MOUNTS_LABEL, "[]")
        try:
            mount_keys: list[str] = json.loads(mount_keys_raw)
        except (TypeError, json.JSONDecodeError):
            mount_keys = []
        try:
            try:
                with self._docker_operation("kill sandbox container", sandbox_id):
                    container.kill()
            except DockerException as exc:
                # Ignore error if container is already stopped
                if "is not running" not in str(exc).lower():
                    raise
            with self._docker_operation("remove sandbox container", sandbox_id):
                container.remove(force=True)
        except DockerException as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.SANDBOX_DELETE_FAILED,
                    "message": f"Failed to delete sandbox container: {str(exc)}",
                },
            ) from exc
        finally:
            self._remove_expiration_tracking(sandbox_id)
            self._cleanup_egress_sidecar(sandbox_id)
            self._release_ossfs_mounts(mount_keys)

    def pause_sandbox(self, sandbox_id: str) -> None:
        """
        Pause a running sandbox using Docker.

        Args:
            sandbox_id: Unique sandbox identifier

        Raises:
            HTTPException: If sandbox not found or cannot be paused
        """
        container = self._get_container_by_sandbox_id(sandbox_id)
        state = container.attrs.get("State", {})
        if not state.get("Running", False):
            raise HTTPException(
                status_code=status.HTTP_409_CONFLICT,
                detail={
                    "code": SandboxErrorCodes.SANDBOX_NOT_RUNNING,
                    "message": "Sandbox is not in a running state.",
                },
            )

        try:
            with self._docker_operation("pause sandbox container", sandbox_id):
                container.pause()
        except DockerException as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.SANDBOX_PAUSE_FAILED,
                    "message": f"Failed to pause sandbox container: {str(exc)}",
                },
            ) from exc

    def resume_sandbox(self, sandbox_id: str) -> None:
        """
        Resume a paused sandbox using Docker.

        Args:
            sandbox_id: Unique sandbox identifier

        Raises:
            HTTPException: If sandbox not found or cannot be resumed
        """
        container = self._get_container_by_sandbox_id(sandbox_id)
        state = container.attrs.get("State", {})
        if not state.get("Paused", False):
            raise HTTPException(
                status_code=status.HTTP_409_CONFLICT,
                detail={
                    "code": SandboxErrorCodes.SANDBOX_NOT_PAUSED,
                    "message": "Sandbox is not in a paused state.",
                },
            )

        try:
            with self._docker_operation("resume sandbox container", sandbox_id):
                container.unpause()
        except DockerException as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.SANDBOX_RESUME_FAILED,
                    "message": f"Failed to resume sandbox container: {str(exc)}",
                },
            ) from exc

    def renew_expiration(
        self,
        sandbox_id: str,
        request: RenewSandboxExpirationRequest,
    ) -> RenewSandboxExpirationResponse:
        """
        Renew sandbox expiration time.

        Args:
            sandbox_id: Unique sandbox identifier
            request: Renewal request with new expiration time

        Returns:
            RenewSandboxExpirationResponse: Updated expiration time

        Raises:
            HTTPException: If sandbox not found or renewal fails
        """
        container = self._get_container_by_sandbox_id(sandbox_id)
        new_expiration = ensure_future_expiration(request.expires_at)

        labels = container.attrs.get("Config", {}).get("Labels") or {}
        if self._has_manual_cleanup(labels):
            raise HTTPException(
                status_code=status.HTTP_409_CONFLICT,
                detail={
                    "code": SandboxErrorCodes.INVALID_EXPIRATION,
                    "message": f"Sandbox {sandbox_id} does not have automatic expiration enabled.",
                },
            )
        if self._get_tracked_expiration(sandbox_id, labels) is None:
            raise HTTPException(
                status_code=status.HTTP_409_CONFLICT,
                detail={
                    "code": SandboxErrorCodes.INVALID_EXPIRATION,
                    "message": (
                        f"Sandbox {sandbox_id} is missing expiration metadata and cannot be renewed safely."
                    ),
                },
            )

        # Persist the new timeout in memory; it will also be respected on restart via _restore_existing_sandboxes
        self._schedule_expiration(sandbox_id, new_expiration)
        labels[SANDBOX_EXPIRES_AT_LABEL] = new_expiration.isoformat()
        try:
            with self._docker_operation("update sandbox labels", sandbox_id):
                self._update_container_labels(container, labels)
        except (DockerException, TypeError) as exc:
            logger.warning("Failed to refresh labels for sandbox %s: %s", sandbox_id, exc)

        return RenewSandboxExpirationResponse(expires_at=new_expiration)

    def get_endpoint(self, sandbox_id: str, port: int, resolve_internal: bool = False) -> Endpoint:
        """
        Get sandbox access endpoint.

        Args:
            sandbox_id: Unique sandbox identifier
            port: Port number where the service is listening inside the sandbox
            resolve_internal: If True, return the internal container IP (for proxy), ignoring router config.

        Returns:
            Endpoint: Public endpoint URL

        Raises:
            HTTPException: If sandbox not found or endpoint not available
        """
        try:
            self.validate_port(port)
        except ValueError as exc:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_PORT,
                    "message": str(exc),
                },
            ) from exc

        if resolve_internal:
            container = self._get_container_by_sandbox_id(sandbox_id)
            labels = container.attrs.get("Config", {}).get("Labels") or {}
            # Sandboxes created with egress sidecar share the sidecar network namespace, so the
            # main container's private IP is not a stable proxy target. In that case, treat the
            # server-proxy target as the server-local host-mapped endpoint instead of a container IP.
            if labels.get(SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY):
                return self._resolve_host_mapped_endpoint(
                    self._resolve_proxy_host(),
                    labels,
                    port,
                )
            return self._resolve_internal_endpoint(container, port)

        public_host = self._resolve_public_host()

        if self.network_mode == HOST_NETWORK_MODE:
            endpoint = Endpoint(endpoint=f"{public_host}:{port}")
            container = self._get_container_by_sandbox_id(sandbox_id)
            self._attach_egress_auth_headers(
                endpoint,
                (container.attrs.get("Config", {}).get("Labels") or {}),
            )
            return endpoint

        # non-host mode (bridge / user-defined network)
        container = self._get_container_by_sandbox_id(sandbox_id)
        labels = container.attrs.get("Config", {}).get("Labels") or {}
        return self._resolve_host_mapped_endpoint(public_host, labels, port)

    def _resolve_host_mapped_endpoint(
        self,
        public_host: str,
        labels: dict[str, str],
        port: int,
    ) -> Endpoint:
        execd_host_port = self._parse_host_port_label(
            labels.get(SANDBOX_EMBEDDING_PROXY_PORT_LABEL),
            SANDBOX_EMBEDDING_PROXY_PORT_LABEL,
        )
        http_host_port = self._parse_host_port_label(
            labels.get(SANDBOX_HTTP_PORT_LABEL),
            SANDBOX_HTTP_PORT_LABEL,
        )

        if port == 8080:
            if http_host_port is None:
                raise HTTPException(
                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                    detail={
                        "code": SandboxErrorCodes.NETWORK_MODE_ENDPOINT_UNAVAILABLE,
                        "message": "Missing host port mapping for container port 8080.",
                    },
                )
            return Endpoint(endpoint=f"{public_host}:{http_host_port}")

        if execd_host_port is None:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.NETWORK_MODE_ENDPOINT_UNAVAILABLE,
                    "message": "Missing host port mapping for execd proxy port 44772.",
                },
            )

        endpoint = Endpoint(endpoint=f"{public_host}:{execd_host_port}/proxy/{port}")
        self._attach_egress_auth_headers(endpoint, labels)
        return endpoint

    def _attach_egress_auth_headers(
        self,
        endpoint: Endpoint,
        labels: dict[str, str],
    ) -> None:
        token = labels.get(SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY)
        if not token:
            return
        endpoint.headers = merge_endpoint_headers(
            endpoint.headers,
            build_egress_auth_headers(token),
        )

    def _get_docker_host_ip(self) -> Optional[str]:
        """When running inside a container, return [docker].host_ip for endpoint URLs (if set)."""
        ip = (self.app_config.docker.host_ip or "").strip()
        return ip or None

    def _resolve_public_host(self) -> str:
        """Resolve the host used in endpoint URLs. If [server].eip is set, use it directly without resolving host."""
        eip_cfg = (self.app_config.server.eip or "").strip()
        if eip_cfg:
            return eip_cfg
        host_cfg = (self.app_config.server.host or "").strip()
        host_key = host_cfg.lower()
        if host_key in {"", "0.0.0.0", "::"}:
            if _running_inside_docker_container():
                host_ip = self._get_docker_host_ip()
                if host_ip:
                    return host_ip
            return self._resolve_bind_ip(socket.AF_INET)
        return host_cfg

    def _resolve_proxy_host(self) -> str:
        """Resolve the server-local host used for proxying to host-mapped Docker endpoints.

        This intentionally does not use ``server.eip`` because the proxy target must be reachable
        from the server process itself, even in deployments without hairpin access to the public EIP.
        """
        host_cfg = (self.app_config.server.host or "").strip()
        host_key = host_cfg.lower()
        if host_key in {"", "0.0.0.0", "::"}:
            if _running_inside_docker_container():
                host_ip = self._get_docker_host_ip()
                if host_ip:
                    return host_ip
            return "127.0.0.1"
        return host_cfg

    def _resolve_internal_endpoint(self, container, port: int) -> Endpoint:
        """Return the internal endpoint used when bypassing host mapping."""
        if self.network_mode == HOST_NETWORK_MODE:
            return Endpoint(endpoint=f"127.0.0.1:{port}")

        ip_address = self._extract_bridge_ip(container)
        return Endpoint(endpoint=f"{ip_address}:{port}")

    # ---------------------------
    # Common helpers for creation
    # ---------------------------
    def _build_labels_and_env(
        self,
        sandbox_id: str,
        request: CreateSandboxRequest,
        expires_at: Optional[datetime],
    ) -> tuple[dict[str, str], list[str]]:
        metadata = request.metadata or {}
        labels = {key: str(value) for key, value in metadata.items()}
        labels[SANDBOX_ID_LABEL] = sandbox_id
        if expires_at is None:
            labels[SANDBOX_MANUAL_CLEANUP_LABEL] = "true"
        else:
            labels[SANDBOX_EXPIRES_AT_LABEL] = expires_at.isoformat()

        env_dict = request.env or {}
        environment = []
        for key, value in env_dict.items():
            if value is None:
                continue
            environment.append(f"{key}={value}")
        return labels, environment

    def _resolve_image_auth(
        self, request: CreateSandboxRequest, sandbox_id: str
    ) -> tuple[str, Optional[dict]]:
        image_uri = request.image.uri
        auth_config = None
        if request.image.auth:
            auth_config = {
                "username": request.image.auth.username,
                "password": request.image.auth.password,
            }
        self._ensure_image_available(image_uri, auth_config, sandbox_id)
        return image_uri, auth_config

    def _resolve_resource_limits(
        self, request: CreateSandboxRequest
    ) -> tuple[Optional[int], Optional[int]]:
        resource_limits = request.resource_limits.root or {}
        mem_limit = parse_memory_limit(resource_limits.get("memory"))
        nano_cpus = parse_nano_cpus(resource_limits.get("cpu"))
        return mem_limit, nano_cpus

    def _base_host_config_kwargs(
        self,
        mem_limit: Optional[int],
        nano_cpus: Optional[int],
        network_mode: str,
    ) -> Dict[str, Any]:
        host_config_kwargs: Dict[str, Any] = {"network_mode": network_mode}
        security_opts: list[str] = []
        docker_cfg = self.app_config.docker
        if docker_cfg.no_new_privileges:
            security_opts.append("no-new-privileges:true")
        if docker_cfg.apparmor_profile:
            security_opts.append(f"apparmor={docker_cfg.apparmor_profile}")
        if docker_cfg.seccomp_profile:
            security_opts.append(f"seccomp={docker_cfg.seccomp_profile}")
        if security_opts:
            host_config_kwargs["security_opt"] = security_opts
        if docker_cfg.drop_capabilities:
            host_config_kwargs["cap_drop"] = docker_cfg.drop_capabilities
        if docker_cfg.pids_limit is not None:
            host_config_kwargs["pids_limit"] = docker_cfg.pids_limit
        if mem_limit:
            host_config_kwargs["mem_limit"] = mem_limit
        if nano_cpus:
            host_config_kwargs["nano_cpus"] = nano_cpus
        # Inject secure runtime into host_config
        if self.docker_runtime:
            logger.info(
                "Using Docker runtime '%s' for container creation",
                self.docker_runtime,
            )
            host_config_kwargs["runtime"] = self.docker_runtime
        return host_config_kwargs

    def _allocate_distinct_host_ports(self) -> tuple[int, int]:
        host_execd_port = self._allocate_host_port()
        host_http_port = self._allocate_host_port()
        if host_execd_port is None or host_http_port is None:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.CONTAINER_START_FAILED,
                    "message": "Failed to allocate host ports for sandbox container.",
                },
            )
        while host_http_port == host_execd_port:
            host_http_port = self._allocate_host_port()
            if host_http_port is None:
                raise HTTPException(
                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                    detail={
                        "code": SandboxErrorCodes.CONTAINER_START_FAILED,
                        "message": "Failed to allocate distinct host ports for sandbox container.",
                    },
                )
        return host_execd_port, host_http_port

    def _cleanup_egress_sidecar(self, sandbox_id: str) -> None:
        """
        Remove egress sidecar associated with sandbox_id (best effort).
        """
        try:
            containers = self.docker_client.containers.list(
                all=True, filters={"label": f"{EGRESS_SIDECAR_LABEL}={sandbox_id}"}
            )
        except DockerException as exc:
            logger.warning("sandbox=%s | failed to list egress sidecar: %s", sandbox_id, exc)
            return

        for container in containers:
            try:
                with self._docker_operation("cleanup egress sidecar", sandbox_id):
                    container.remove(force=True)
            except DockerException as exc:
                logger.warning(
                    "sandbox=%s | failed to remove egress sidecar %s: %s",
                    sandbox_id,
                    container.id,
                    exc,
                )

    def _start_egress_sidecar(
        self,
        sandbox_id: str,
        network_policy: NetworkPolicy,
        egress_token: str,
        host_execd_port: int,
        host_http_port: int,
    ):
        sidecar_name = f"sandbox-egress-{sandbox_id}"
        sidecar_labels = {
            EGRESS_SIDECAR_LABEL: sandbox_id,
        }

        # Ensure sidecar image is available before create/start.
        egress_image = self.app_config.egress.image if self.app_config.egress else None
        if not egress_image:
            raise ValueError("egress.image must be configured when networkPolicy is provided.")
        self._ensure_image_available(egress_image, None, sandbox_id)

        policy_payload = json.dumps(network_policy.model_dump(by_alias=True, exclude_none=True))
        assert self.app_config.egress is not None  # validated by ensure_egress_configured with networkPolicy
        egress_mode = self.app_config.egress.mode
        sidecar_env = [
            f"{EGRESS_RULES_ENV}={policy_payload}",
            f"{EGRESS_MODE_ENV}={egress_mode}",
            f"{OPENSANDBOX_EGRESS_TOKEN}={egress_token}",
        ]

        sidecar_host_config_kwargs: dict[str, Any] = {
            "network_mode": BRIDGE_NETWORK_MODE,
            "cap_add": ["NET_ADMIN"],
            "port_bindings": {
                "44772": ("0.0.0.0", host_execd_port),
                "8080": ("0.0.0.0", host_http_port),
            },
            # FIXME(Pangjiping): Disable IPv6 in the shared namespace to keep policy enforcement consistent.
            "sysctls": {
                "net.ipv6.conf.all.disable_ipv6": 1,
                "net.ipv6.conf.default.disable_ipv6": 1,
                "net.ipv6.conf.lo.disable_ipv6": 1,
            },
        }

        sidecar_host_config = self.docker_client.api.create_host_config(
            **sidecar_host_config_kwargs
        )

        sidecar_container = None
        sidecar_container_id: Optional[str] = None
        try:
            with self._docker_operation("create egress sidecar", sandbox_id):
                sidecar_resp = self.docker_client.api.create_container(
                    image=egress_image,
                    name=sidecar_name,
                    host_config=sidecar_host_config,
                    labels=sidecar_labels,
                    environment=sidecar_env,
                    # Expose the ports that have host bindings so Docker publishes them in bridge mode.
                    ports=["44772", "8080"],
                )
            sidecar_container_id = sidecar_resp.get("Id")
            if not sidecar_container_id:
                raise HTTPException(
                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                    detail={
                        "code": SandboxErrorCodes.CONTAINER_START_FAILED,
                        "message": "Docker did not return an egress sidecar container ID.",
                    },
                )
            sidecar_container = self.docker_client.containers.get(sidecar_container_id)
            with self._docker_operation("start egress sidecar", sandbox_id):
                sidecar_container.start()
            return sidecar_container
        except Exception as exc:
            if sidecar_container is not None:
                try:
                    with self._docker_operation("cleanup egress sidecar", sandbox_id):
                        sidecar_container.remove(force=True)
                except DockerException as cleanup_exc:
                    logger.warning(
                        "Failed to cleanup egress sidecar for sandbox %s: %s",
                        sandbox_id,
                        cleanup_exc,
                    )
            elif sidecar_container_id:
                try:
                    with self._docker_operation("cleanup egress sidecar (API)", sandbox_id):
                        self.docker_client.api.remove_container(sidecar_container_id, force=True)
                except DockerException as cleanup_exc:
                    logger.warning(
                        "Failed to cleanup egress sidecar for sandbox %s: %s",
                        sandbox_id,
                        cleanup_exc,
                    )
            if isinstance(exc, HTTPException):
                raise exc
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.CONTAINER_START_FAILED,
                    "message": "Egress sidecar container failed to start.",
                },
            ) from exc

    def _create_and_start_container(
        self,
        sandbox_id: str,
        image_uri: str,
        bootstrap_command: list[str],
        labels: dict[str, str],
        environment: list[str],
        host_config_kwargs: Dict[str, Any],
        exposed_ports: Optional[list[str]],
    ):
        # Normalize single-string entrypoint containing spaces to avoid shell path issues in bootstrap.
        if len(bootstrap_command) == 1 and " " in bootstrap_command[0]:
            import shlex

            bootstrap_command = shlex.split(bootstrap_command[0])

        host_config = self.docker_client.api.create_host_config(**host_config_kwargs)
        container = None
        container_id: Optional[str] = None
        try:
            with self._docker_operation("create sandbox container", sandbox_id):
                container_kwargs = {
                    "image": image_uri,
                    "entrypoint": [BOOTSTRAP_PATH],
                    "command": bootstrap_command,
                    "ports": exposed_ports,
                    "name": f"sandbox-{sandbox_id}",
                    "environment": environment,
                    "labels": labels,
                    "host_config": host_config,
                }

                response = self.docker_client.api.create_container(**container_kwargs)
            container_id = response.get("Id")
            if not container_id:
                raise HTTPException(
                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                    detail={
                        "code": SandboxErrorCodes.CONTAINER_START_FAILED,
                        "message": "Docker did not return a container ID.",
                    },
                )
            container = self.docker_client.containers.get(container_id)
            self._prepare_sandbox_runtime(container, sandbox_id)
            with self._docker_operation("start sandbox container", sandbox_id):
                container.start()
            return container
        except Exception as exc:
            if container is not None:
                try:
                    with self._docker_operation("cleanup sandbox container", sandbox_id):
                        container.remove(force=True)
                except DockerException as cleanup_exc:
                    logger.warning(
                        "Failed to cleanup container for sandbox %s: %s",
                        sandbox_id,
                        cleanup_exc,
                    )
            elif container_id:
                try:
                    with self._docker_operation("cleanup sandbox container (API)", sandbox_id):
                        self.docker_client.api.remove_container(container_id, force=True)
                except DockerException as cleanup_exc:
                    logger.warning(
                        "Failed to cleanup container for sandbox %s: %s",
                        sandbox_id,
                        cleanup_exc,
                    )

            if isinstance(exc, HTTPException):
                raise exc

            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.CONTAINER_START_FAILED,
                    "message": f"Failed to create or start container: {str(exc)}",
                },
            ) from exc

    @staticmethod
    def _parse_host_port_label(value: Optional[str], label_name: str) -> Optional[int]:
        if not value:
            return None
        try:
            port = int(value)
            if port <= 0 or port > 65535:
                raise ValueError
            return port
        except ValueError:
            logger.warning("Invalid port label %s=%s", label_name, value)
            return None

    def _extract_bridge_ip(self, container) -> str:
        """Extract the IP address assigned to a container on a bridge or user-defined network.

        For user-defined networks, the top-level ``NetworkSettings.IPAddress`` is empty;
        the IP lives under ``NetworkSettings.Networks[<network-name>].IPAddress``.
        This method prefers the configured ``network_mode`` entry when it is a user-defined
        network, then falls back to any non-empty entry for robustness.
        """
        network_settings = container.attrs.get("NetworkSettings", {}) or {}
        networks = network_settings.get("Networks", {}) or {}
        ip_address: Optional[str] = None

        if self._is_user_defined_network():
            # Prefer the explicit network entry for the configured named network.
            net_conf = networks.get(self.network_mode) or {}
            ip_address = net_conf.get("IPAddress") or None

        if not ip_address:
            # Default bridge path (or fallback): check the top-level IPAddress first.
            ip_address = network_settings.get("IPAddress") or None

        if not ip_address:
            # Last resort: iterate all network entries and take the first populated IP.
            for net_conf in networks.values():
                if net_conf and net_conf.get("IPAddress"):
                    ip_address = net_conf.get("IPAddress")
                    break

        if not ip_address:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.NETWORK_MODE_ENDPOINT_UNAVAILABLE,
                    "message": "Container is running but has no assigned IP address.",
                },
            )
        return ip_address


================================================
FILE: server/src/services/endpoint_auth.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Helpers for sandbox endpoint authentication."""

from __future__ import annotations

import secrets

from src.services.constants import OPEN_SANDBOX_EGRESS_AUTH_HEADER

EGRESS_AUTH_TOKEN_BYTES = 24


def generate_egress_token() -> str:
    """Return a random URL-safe token for egress endpoint auth."""
    return secrets.token_urlsafe(EGRESS_AUTH_TOKEN_BYTES)


def build_egress_auth_headers(token: str) -> dict[str, str]:
    """Build endpoint headers for egress auth."""
    return {OPEN_SANDBOX_EGRESS_AUTH_HEADER: token}


def merge_endpoint_headers(
    existing: dict[str, str] | None,
    extra: dict[str, str],
) -> dict[str, str]:
    """Merge auth headers into existing endpoint headers without mutating input."""
    merged: dict[str, str] = dict(existing or {})
    merged.update(extra)
    return merged


__all__ = [
    "EGRESS_AUTH_TOKEN_BYTES",
    "build_egress_auth_headers",
    "generate_egress_token",
    "merge_endpoint_headers",
]


================================================
FILE: server/src/services/factory.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Factory for creating sandbox service instances.

This module provides a factory function to create sandbox service implementations
based on application configuration loaded from sandbox_server.config.
"""

import logging
from typing import Optional

from src.config import AppConfig, get_config
from src.services.docker import DockerSandboxService
from src.services.k8s import KubernetesSandboxService
from src.services.sandbox_service import SandboxService

logger = logging.getLogger(__name__)


def create_sandbox_service(
    service_type: Optional[str] = None,
    config: Optional[AppConfig] = None,
) -> SandboxService:
    """
    Create a sandbox service instance based on configuration.

    Args:
        service_type: Optional override for service implementation type.
        config: Optional application configuration. Defaults to global config.

    Returns:
        SandboxService: An instance of the configured sandbox service implementation.

    Raises:
        ValueError: If the configured service type is not supported.
    """
    active_config = config or get_config()
    selected_type = (service_type or active_config.runtime.type).lower()

    logger.info("Creating sandbox service with type: %s", selected_type)

    # Service implementation registry
    # Add new implementations here as they are created
    implementations: dict[str, type[SandboxService]] = {
        "docker": DockerSandboxService,
        "kubernetes": KubernetesSandboxService,
        # Future implementations can be added here:
        # "containerd": ContainerdSandboxService,
    }

    if selected_type not in implementations:
        supported_types = ", ".join(implementations.keys())
        raise ValueError(
            f"Unsupported sandbox service type: {selected_type}. "
            f"Supported types: {supported_types}"
        )

    implementation_class = implementations[selected_type]
    return implementation_class(config=active_config)


================================================
FILE: server/src/services/helpers.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Shared helpers for container-based sandbox services.

These utilities centralize common parsing, filtering, and transformation logic
so multiple container runtimes (docker, kubernetes, etc.) can reuse them.
"""

from __future__ import annotations

import logging
import re
from datetime import datetime, timezone
from typing import Dict, Optional

from src.api.schema import Endpoint, Sandbox, SandboxFilter
from src.services.constants import OPEN_SANDBOX_INGRESS_HEADER
from src.config import (
    GATEWAY_ROUTE_MODE_HEADER,
    GATEWAY_ROUTE_MODE_URI,
    GATEWAY_ROUTE_MODE_WILDCARD,
    INGRESS_MODE_GATEWAY,
    IngressConfig,
)

logger = logging.getLogger(__name__)

MEMORY_PATTERN = re.compile(r"^\s*(\d+)([kmgti]i?|[kmgti]?b)?\s*$", re.IGNORECASE)
MEMORY_MULTIPLIERS: Dict[str, int] = {
    "": 1,
    "b": 1,
    "k": 1_000,
    "kb": 1_000,
    "ki": 1024,
    "m": 1_000_000,
    "mb": 1_000_000,
    "mi": 1024**2,
    "g": 1_000_000_000,
    "gb": 1_000_000_000,
    "gi": 1024**3,
    "t": 1_000_000_000_000,
    "tb": 1_000_000_000_000,
    "ti": 1024**4,
}


def parse_memory_limit(value: Optional[str]) -> Optional[int]:
    """Convert memory string (e.g., 512Mi) to bytes."""
    if not value:
        return None
    match = MEMORY_PATTERN.match(value)
    if not match:
        logger.warning("Invalid memory limit format '%s'; ignoring.", value)
        return None
    amount = int(match.group(1))
    unit = (match.group(2) or "").lower()
    multiplier = MEMORY_MULTIPLIERS.get(unit)
    if not multiplier:
        logger.warning("Unsupported memory unit '%s'; ignoring.", unit)
        return None
    return amount * multiplier


def parse_nano_cpus(value: Optional[str]) -> Optional[int]:
    """Convert CPU string (e.g., 500m, 2) to nano_cpus."""
    if not value:
        return None
    cpu_str = value.strip().lower()
    try:
        if cpu_str.endswith("m"):
            cpus = float(cpu_str[:-1]) / 1000
        else:
            cpus = float(cpu_str)
    except ValueError:
        logger.warning("Invalid CPU limit format '%s'; ignoring.", value)
        return None
    if cpus <= 0:
        logger.warning("CPU limit must be positive. Got '%s'. Ignoring.", value)
        return None
    return int(cpus * 1_000_000_000)


def parse_timestamp(timestamp: Optional[str]) -> datetime:
    """
    Parse RFC3339 timestamp into timezone-aware datetime. Fallback to now.

    Docker often returns RFC3339Nano (up to 9 fractional digits). Python's
    datetime.fromisoformat only supports microseconds (6 digits), so we
    truncate the fractional part to 6 digits before parsing.
    """
    if not timestamp or timestamp == "0001-01-01T00:00:00Z":
        return datetime.now(timezone.utc)

    normalized = timestamp
    if normalized.endswith("Z"):
        normalized = normalized[:-1] + "+00:00"

    if "." in normalized:
        main, rest = normalized.split(".", 1)
        tz_sep = None
        for sep in ("+", "-"):
            pos = rest.find(sep)
            if pos != -1:
                tz_sep = pos
                break
        if tz_sep is None:
            frac = rest
            tz = ""
        else:
            frac = rest[:tz_sep]
            tz = rest[tz_sep:]
        frac = frac[:6]  # truncate to microseconds precision
        normalized = f"{main}.{frac}{tz}" if frac else f"{main}{tz}"

    try:
        return datetime.fromisoformat(normalized)
    except ValueError:
        logger.warning("Invalid timestamp '%s'; defaulting to current time.", timestamp)
        return datetime.now(timezone.utc)


def normalize_external_endpoint_url(endpoint: str, default_scheme: str = "https") -> str:
    """Normalize host or URL to a full URL with an explicit scheme."""
    endpoint = endpoint.strip()
    if endpoint.startswith("http://") or endpoint.startswith("https://"):
        return endpoint
    return f"{default_scheme}://{endpoint}"


def matches_filter(sandbox: Sandbox, filter_: SandboxFilter) -> bool:
    """Apply state/metadata filters to a sandbox instance."""
    if not filter_:
        return True
    if filter_.state:
        desired = {state.lower() for state in filter_.state}
        current_state = (sandbox.status.state or "").lower()
        if current_state not in desired:
            return False
    if filter_.metadata:
        metadata = sandbox.metadata or {}
        for key, value in filter_.metadata.items():
            if metadata.get(key) != value:
                return False
    return True


# ============================================================================
# Ingress helpers
# ============================================================================
def format_ingress_endpoint(
    ingress_config: Optional[IngressConfig],
    sandbox_id: str,
    port: int,
) -> Optional[Endpoint]:
    """
    Build an ingress-based endpoint string for a sandbox.

    Returns None when ingress is not in gateway mode.
    """
    if not ingress_config or ingress_config.mode != INGRESS_MODE_GATEWAY:
        return None
    gateway_cfg = ingress_config.gateway
    if gateway_cfg is None:
        return None

    address = gateway_cfg.address
    route_mode = gateway_cfg.route.mode

    if route_mode == GATEWAY_ROUTE_MODE_WILDCARD:
        base = address[2:] if address.startswith("*.") else address
        return Endpoint(endpoint=f"{sandbox_id}-{port}.{base}")

    if route_mode == GATEWAY_ROUTE_MODE_URI:
        return Endpoint(endpoint=f"{address}/{sandbox_id}/{port}")

    if route_mode == GATEWAY_ROUTE_MODE_HEADER:
        header_value = f"{sandbox_id}-{port}"
        return Endpoint(
            endpoint=address,
            headers={OPEN_SANDBOX_INGRESS_HEADER: header_value},
        )

    raise RuntimeError(f"Unsupported route mode: {route_mode}")


__all__ = [
    "parse_memory_limit",
    "parse_nano_cpus",
    "parse_timestamp",
    "normalize_external_endpoint_url",
    "format_ingress_endpoint",
    "matches_filter",
]


================================================
FILE: server/src/services/k8s/__init__.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Kubernetes runtime implementation for OpenSandbox.
"""

from src.services.k8s.kubernetes_service import KubernetesSandboxService
from src.services.k8s.provider_factory import (
    create_workload_provider,
    register_provider,
    list_available_providers,
    PROVIDER_TYPE_BATCHSANDBOX,
    PROVIDER_TYPE_AGENT_SANDBOX,
)

__all__ = [
    "KubernetesSandboxService",
    "create_workload_provider",
    "register_provider",
    "list_available_providers",
    "PROVIDER_TYPE_BATCHSANDBOX",
    "PROVIDER_TYPE_AGENT_SANDBOX",
]


================================================
FILE: server/src/services/k8s/agent_sandbox_provider.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Agent-sandbox workload provider implementation.
"""

import hashlib
import logging
import re
from datetime import datetime
from typing import Dict, List, Any, Optional

from kubernetes.client import (
    V1Container,
    V1EnvVar,
    V1ResourceRequirements,
    V1VolumeMount,
)

from src.config import AppConfig, EGRESS_MODE_DNS
from src.services.helpers import format_ingress_endpoint
from src.api.schema import Endpoint, ImageSpec, NetworkPolicy, Volume
from src.services.k8s.agent_sandbox_template import AgentSandboxTemplateManager
from src.services.k8s.client import K8sClient
from src.services.k8s.egress_helper import (
    apply_egress_to_spec,
    build_security_context_for_sandbox_container,
    prep_execd_init_for_egress,
)
from src.services.k8s.security_context import (
    build_security_context_from_dict,
    serialize_security_context_to_dict,
)
from src.services.k8s.volume_helper import apply_volumes_to_pod_spec
from src.services.k8s.workload_provider import WorkloadProvider
from src.services.runtime_resolver import SecureRuntimeResolver

logger = logging.getLogger(__name__)

DNS1035_LABEL_MAX_LENGTH = 63
DNS1035_INVALID_CHARS = re.compile(r"[^a-z0-9-]+")
DNS1035_DUPLICATE_HYPHENS = re.compile(r"-+")


def _to_dns1035_label(value: str, prefix: str = "sandbox") -> str:
    normalized = DNS1035_INVALID_CHARS.sub("-", value.strip().lower())
    normalized = DNS1035_DUPLICATE_HYPHENS.sub("-", normalized).strip("-")

    hash_suffix = hashlib.sha256(value.encode("utf-8")).hexdigest()[:8]

    if not normalized:
        normalized = f"{prefix}-{hash_suffix}"
    elif not normalized[0].isalpha():
        normalized = f"{prefix}-{normalized}"

    if len(normalized) > DNS1035_LABEL_MAX_LENGTH:
        max_base = DNS1035_LABEL_MAX_LENGTH - len(hash_suffix) - 1
        base = normalized[:max_base].rstrip("-")
        if not base or not base[0].isalpha():
            base = prefix
        normalized = f"{base}-{hash_suffix}"

    return normalized.strip("-")


class AgentSandboxProvider(WorkloadProvider):
    """
    Workload provider using kubernetes-sigs/agent-sandbox Sandbox CRD.
    """

    def __init__(
        self,
        k8s_client: K8sClient,
        app_config: Optional[AppConfig] = None,
    ):
        self.k8s_client = k8s_client

        self.group = "agents.x-k8s.io"
        self.version = "v1alpha1"
        self.plural = "sandboxes"

        k8s_config = app_config.kubernetes if app_config else None
        agent_config = app_config.agent_sandbox if app_config else None

        self.shutdown_policy = agent_config.shutdown_policy if agent_config else "Delete"
        self.service_account = k8s_config.service_account if k8s_config else None
        self.template_manager = AgentSandboxTemplateManager(
            agent_config.template_file if agent_config else None
        )
        self.ingress_config = app_config.ingress if app_config else None
        self.execd_init_resources = k8s_config.execd_init_resources if k8s_config else None

        # Initialize secure runtime resolver
        self.resolver = SecureRuntimeResolver(app_config) if app_config else None
        self.runtime_class = (
            self.resolver.get_k8s_runtime_class() if self.resolver else None
        )

    def _resource_name(self, sandbox_id: str) -> str:
        return _to_dns1035_label(sandbox_id, prefix="sandbox")

    def _resource_name_candidates(self, sandbox_id: str) -> List[str]:
        candidates = []
        primary = self._resource_name(sandbox_id)
        candidates.append(primary)
        if sandbox_id not in candidates:
            candidates.append(sandbox_id)
        legacy = self.legacy_resource_name(sandbox_id)
        if legacy not in candidates:
            candidates.append(legacy)
        return candidates

    def create_workload(
        self,
        sandbox_id: str,
        namespace: str,
        image_spec: ImageSpec,
        entrypoint: List[str],
        env: Dict[str, str],
        resource_limits: Dict[str, str],
        labels: Dict[str, str],
        expires_at: Optional[datetime],
        execd_image: str,
        extensions: Optional[Dict[str, str]] = None,
        network_policy: Optional[NetworkPolicy] = None,
        egress_image: Optional[str] = None,
        volumes: Optional[List[Volume]] = None,
        annotations: Optional[Dict[str, str]] = None,
        egress_auth_token: Optional[str] = None,
        egress_mode: str = EGRESS_MODE_DNS,
    ) -> Dict[str, Any]:
        """Create an agent-sandbox Sandbox CRD workload."""
        if self.runtime_class:
            logger.info(
                "Using Kubernetes RuntimeClass '%s' for sandbox %s",
                self.runtime_class,
                sandbox_id,
            )

        pod_spec = self._build_pod_spec(
            image_spec=image_spec,
            entrypoint=entrypoint,
            env=env,
            resource_limits=resource_limits,
            execd_image=execd_image,
            network_policy=network_policy,
            egress_image=egress_image,
            egress_auth_token=egress_auth_token,
            egress_mode=egress_mode,
        )

        # Add user-specified volumes if provided
        if volumes:
            apply_volumes_to_pod_spec(pod_spec, volumes)

        if self.service_account:
            pod_spec["serviceAccountName"] = self.service_account

        resource_name = self._resource_name(sandbox_id)
        spec = {
            "replicas": 1,
            "shutdownPolicy": self.shutdown_policy,
            "podTemplate": {
                "metadata": {
                    "labels": labels,
                },
                "spec": pod_spec,
            },
        }
        runtime_manifest = {
            "apiVersion": f"{self.group}/{self.version}",
            "kind": "Sandbox",
            "metadata": {
                "name": resource_name,
                "namespace": namespace,
                "labels": labels,
            },
            "spec": spec,
        }
        if annotations:
            runtime_manifest["metadata"]["annotations"] = annotations

        sandbox = self.template_manager.merge_with_runtime_values(runtime_manifest)
        # Set or strip shutdownTime after merge so we override any template value
        if expires_at is None:
            sandbox["spec"].pop("shutdownTime", None)
        else:
            sandbox["spec"]["shutdownTime"] = expires_at.isoformat()

        created = self.k8s_client.create_custom_object(
            group=self.group,
            version=self.version,
            namespace=namespace,
            plural=self.plural,
            body=sandbox,
        )

        return {
            "name": created["metadata"]["name"],
            "uid": created["metadata"]["uid"],
        }

    def _build_pod_spec(
        self,
        image_spec: ImageSpec,
        entrypoint: List[str],
        env: Dict[str, str],
        resource_limits: Dict[str, str],
        execd_image: str,
        network_policy: Optional[NetworkPolicy] = None,
        egress_image: Optional[str] = None,
        egress_auth_token: Optional[str] = None,
        egress_mode: str = EGRESS_MODE_DNS,
    ) -> Dict[str, Any]:
        """Build pod spec dict for the Sandbox CRD."""
        disable_ipv6_for_egress = network_policy is not None and egress_image is not None
        init_container = self._build_execd_init_container(
            execd_image, disable_ipv6_for_egress=disable_ipv6_for_egress
        )
        main_container = self._build_main_container(
            image_spec=image_spec,
            entrypoint=entrypoint,
            env=env,
            resource_limits=resource_limits,
            include_execd_volume=True,
            has_network_policy=network_policy is not None,
        )
        
        containers = [self._container_to_dict(main_container)]
        
        # Build base pod spec
        pod_spec: Dict[str, Any] = {
            "initContainers": [self._container_to_dict(init_container)],
            "containers": containers,
            "volumes": [
                {
                    "name": "opensandbox-bin",
                    "emptyDir": {},
                }
            ],
        }

        # Inject runtimeClassName if secure runtime is configured
        if self.runtime_class:
            pod_spec["runtimeClassName"] = self.runtime_class

        # Add egress sidecar if network policy is provided
        apply_egress_to_spec(
            containers=containers,
            network_policy=network_policy,
            egress_image=egress_image,
            egress_auth_token=egress_auth_token,
            egress_mode=egress_mode,
        )

        return pod_spec

    def _build_execd_init_container(
        self,
        execd_image: str,
        *,
        disable_ipv6_for_egress: bool = False,
    ) -> V1Container:
        """Build init container that copies execd binary to the shared volume."""
        script = (
            "cp ./execd /opt/opensandbox/bin/execd && "
            "cp ./bootstrap.sh /opt/opensandbox/bin/bootstrap.sh && "
            "chmod +x /opt/opensandbox/bin/execd && "
            "chmod +x /opt/opensandbox/bin/bootstrap.sh"
        )
        security_context = None
        if disable_ipv6_for_egress:
            script, sc_dict = prep_execd_init_for_egress(script)
            security_context = build_security_context_from_dict(sc_dict)

        resources = None
        if self.execd_init_resources:
            resources = V1ResourceRequirements(
                limits=self.execd_init_resources.limits,
                requests=self.execd_init_resources.requests,
            )

        return V1Container(
            name="execd-installer",
            image=execd_image,
            command=["/bin/sh", "-c"],
            args=[script],
            volume_mounts=[
                V1VolumeMount(
                    name="opensandbox-bin",
                    mount_path="/opt/opensandbox/bin",
                )
            ],
            resources=resources,
            security_context=security_context,
        )

    def _build_main_container(
        self,
        image_spec: ImageSpec,
        entrypoint: List[str],
        env: Dict[str, str],
        resource_limits: Dict[str, str],
        include_execd_volume: bool,
        has_network_policy: bool = False,
    ) -> V1Container:
        env_vars = [V1EnvVar(name=k, value=v) for k, v in env.items()]
        env_vars.append(V1EnvVar(name="EXECD", value="/opt/opensandbox/bin/execd"))

        resources = None
        if resource_limits:
            resources = V1ResourceRequirements(
                limits=resource_limits,
                requests=resource_limits,
            )

        wrapped_command = ["/opt/opensandbox/bin/bootstrap.sh"] + entrypoint

        volume_mounts = None
        if include_execd_volume:
            volume_mounts = [
                V1VolumeMount(
                    name="opensandbox-bin",
                    mount_path="/opt/opensandbox/bin",
                )
            ]

        # Apply security context when network policy is enabled
        security_context = None
        if has_network_policy:
            security_context_dict = build_security_context_for_sandbox_container(True)
            security_context = build_security_context_from_dict(security_context_dict)

        return V1Container(
            name="sandbox",
            image=image_spec.uri,
            command=wrapped_command,
            env=env_vars if env_vars else None,
            resources=resources,
            volume_mounts=volume_mounts,
            security_context=security_context,
        )

    def _container_to_dict(self, container: V1Container) -> Dict[str, Any]:
        """Convert a V1Container object to a plain dict for CRD body."""
        result: Dict[str, Any] = {
            "name": container.name,
            "image": container.image,
        }

        if container.command:
            result["command"] = container.command
        if container.args:
            result["args"] = container.args
        if container.env:
            result["env"] = [{"name": e.name, "value": e.value} for e in container.env]
        if container.resources:
            result["resources"] = {}
            if container.resources.limits:
                result["resources"]["limits"] = container.resources.limits
            if container.resources.requests:
                result["resources"]["requests"] = container.resources.requests
        if container.volume_mounts:
            result["volumeMounts"] = [
                {"name": vm.name, "mountPath": vm.mount_path}
                for vm in container.volume_mounts
            ]
        if container.security_context:
            security_context_dict = serialize_security_context_to_dict(container.security_context)
            if security_context_dict:
                result["securityContext"] = security_context_dict

        return result

    def get_workload(self, sandbox_id: str, namespace: str) -> Optional[Dict[str, Any]]:
        """Get Sandbox CRD by sandbox ID, trying all candidate resource names."""
        candidates = self._resource_name_candidates(sandbox_id)

        for name in candidates:
            workload = self.k8s_client.get_custom_object(
                group=self.group,
                version=self.version,
                namespace=namespace,
                plural=self.plural,
                name=name,
            )
            if workload:
                return workload

        return None

    def delete_workload(self, sandbox_id: str, namespace: str) -> None:
        """Delete the Sandbox CRD for the given sandbox ID."""
        sandbox = self.get_workload(sandbox_id, namespace)
        if not sandbox:
            raise Exception(f"Sandbox for sandbox {sandbox_id} not found")

        self.k8s_client.delete_custom_object(
            group=self.group,
            version=self.version,
            namespace=namespace,
            plural=self.plural,
            name=sandbox["metadata"]["name"],
            grace_period_seconds=0,
        )

    def list_workloads(self, namespace: str, label_selector: str) -> List[Dict[str, Any]]:
        """List Sandbox CRDs matching the given label selector."""
        return self.k8s_client.list_custom_objects(
            group=self.group,
            version=self.version,
            namespace=namespace,
            plural=self.plural,
            label_selector=label_selector,
        )

    def update_expiration(self, sandbox_id: str, namespace: str, expires_at: datetime) -> None:
        """Patch the Sandbox CRD shutdownTime field."""
        sandbox = self.get_workload(sandbox_id, namespace)
        if not sandbox:
            raise Exception(f"Sandbox for sandbox {sandbox_id} not found")

        body = {
            "spec": {
                "shutdownTime": expires_at.isoformat(),
            }
        }

        self.k8s_client.patch_custom_object(
            group=self.group,
            version=self.version,
            namespace=namespace,
            plural=self.plural,
            name=sandbox["metadata"]["name"],
            body=body,
        )

    def get_expiration(self, workload: Dict[str, Any]) -> Optional[datetime]:
        """Parse shutdownTime from Sandbox CRD spec."""
        spec = workload.get("spec", {})
        shutdown_time_str = spec.get("shutdownTime")

        if not shutdown_time_str:
            return None

        try:
            return datetime.fromisoformat(shutdown_time_str.replace("Z", "+00:00"))
        except (ValueError, TypeError) as e:
            logger.warning("Invalid shutdownTime format: %s, error: %s", shutdown_time_str, e)
            return None

    def get_status(self, workload: Dict[str, Any]) -> Dict[str, Any]:
        """Derive sandbox state from the Sandbox CRD status conditions."""
        status = workload.get("status", {})
        conditions = status.get("conditions", [])

        ready_condition = None
        for condition in conditions:
            if condition.get("type") == "Ready":
                ready_condition = condition
                break

        creation_timestamp = workload.get("metadata", {}).get("creationTimestamp")

        if not ready_condition:
            pod_state = self._pod_state_from_selector(workload)
            if pod_state:
                state, reason, message = pod_state
                return {
                    "state": state,
                    "reason": reason,
                    "message": message,
                    "last_transition_at": creation_timestamp,
                }
            return {
                "state": "Pending",
                "reason": "SANDBOX_PENDING",
                "message": "Sandbox is pending scheduling",
                "last_transition_at": creation_timestamp,
            }

        cond_status = ready_condition.get("status")
        reason = ready_condition.get("reason")
        message = ready_condition.get("message")
        last_transition_at = ready_condition.get("lastTransitionTime") or creation_timestamp

        if cond_status == "True":
            state = "Running"
        elif reason == "SandboxExpired":
            state = "Terminated"
        elif cond_status == "False":
            state = "Pending"
        else:
            state = "Pending"

        return {
            "state": state,
            "reason": reason,
            "message": message,
            "last_transition_at": last_transition_at,
        }

    def _pod_state_from_selector(self, workload: Dict[str, Any]) -> Optional[tuple[str, str, str]]:
        """Resolve state from Pod list via label selector.

        Returns three-state tuple (state, reason, message):
        - Running: Pod phase Running and has IP
        - Allocated: Pod has IP assigned but not Running yet
        - Pending: Pod scheduled but no IP yet
        Returns None if selector/namespace missing or API call fails.
        """
        status = workload.get("status", {})
        selector = status.get("selector")
        namespace = workload.get("metadata", {}).get("namespace")
        if not selector or not namespace:
            return None

        try:
            pods = self.k8s_client.list_pods(
                namespace=namespace,
                label_selector=selector,
            )
        except Exception:
            return None

        for pod in pods:
            if pod.status:
                if pod.status.pod_ip and pod.status.phase == "Running":
                    return (
                        "Running",
                        "POD_READY",
                        "Pod is running with IP assigned",
                    )
                if pod.status.pod_ip:
                    return (
                        "Allocated",
                        "IP_ASSIGNED",
                        "Pod has IP assigned but not running yet",
                    )
                return (
                    "Pending",
                    "POD_SCHEDULED",
                    "Pod is scheduled but waiting for IP assignment",
                )

        if pods:
            return ("Pending", "POD_PENDING", "Pod is pending")

        return None

    def get_endpoint_info(self, workload: Dict[str, Any], port: int, sandbox_id: str) -> Optional[Endpoint]:
        # ingress-based endpoint if configured (gateway)
        ingress_endpoint = format_ingress_endpoint(self.ingress_config, sandbox_id, port)
        if ingress_endpoint:
            return ingress_endpoint

        status = workload.get("status", {})
        selector = status.get("selector")
        namespace = workload.get("metadata", {}).get("namespace")
        if selector and namespace:
            try:
                pods = self.k8s_client.list_pods(
                    namespace=namespace,
                    label_selector=selector,
                )
                for pod in pods:
                    if pod.status and pod.status.pod_ip and pod.status.phase == "Running":
                        return Endpoint(endpoint=f"{pod.status.pod_ip}:{port}")
            except Exception as e:
                logger.warning("Failed to resolve pod endpoint: %s", e)

        service_fqdn = status.get("serviceFQDN")
        if service_fqdn:
            return Endpoint(endpoint=f"{service_fqdn}:{port}")

        return None


================================================
FILE: server/src/services/k8s/agent_sandbox_template.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Agent-sandbox template loader and merger.
"""

from typing import Optional

from src.services.k8s.template_manager import BaseSandboxTemplateManager


class AgentSandboxTemplateManager(BaseSandboxTemplateManager):
    """
    Manager for agent-sandbox Sandbox CR templates.
    """

    def __init__(self, template_file_path: Optional[str] = None):
        super().__init__(template_file_path, template_kind="Agent-sandbox")


================================================
FILE: server/src/services/k8s/batchsandbox_provider.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
BatchSandbox-based workload provider implementation.
"""

import logging
import json
import shlex
from datetime import datetime
from typing import Dict, List, Any, Optional

from kubernetes.client import (
    V1Container,
    V1EnvVar,
    V1ResourceRequirements,
    V1VolumeMount,
)

from src.config import AppConfig, EGRESS_MODE_DNS, INGRESS_MODE_GATEWAY
from src.services.helpers import format_ingress_endpoint
from src.api.schema import Endpoint, ImageSpec, NetworkPolicy, Volume
from src.services.k8s.image_pull_secret_helper import (
    build_image_pull_secret,
    build_image_pull_secret_name,
)
from src.services.k8s.batchsandbox_template import BatchSandboxTemplateManager
from src.services.k8s.client import K8sClient
from src.services.k8s.egress_helper import (
    apply_egress_to_spec,
    build_security_context_for_sandbox_container,
    prep_execd_init_for_egress,
)
from src.services.k8s.security_context import (
    build_security_context_from_dict,
    serialize_security_context_to_dict,
)
from src.services.k8s.volume_helper import apply_volumes_to_pod_spec
from src.services.k8s.workload_provider import WorkloadProvider
from src.services.runtime_resolver import SecureRuntimeResolver

logger = logging.getLogger(__name__)


class BatchSandboxProvider(WorkloadProvider):
    """
    Workload provider using BatchSandbox CRD.
    
    BatchSandbox is a custom resource that manages Pod lifecycle
    and provides additional features like task management.
    """
    
    def __init__(
        self,
        k8s_client: K8sClient,
        app_config: Optional[AppConfig] = None,
    ):
        """
        Initialize BatchSandbox provider.

        Args:
            k8s_client: Kubernetes client wrapper
            app_config: Application config; kubernetes/ingress sub-configs are read from it directly.
        """
        self.k8s_client = k8s_client
        self.ingress_config = app_config.ingress if app_config else None

        k8s_config = app_config.kubernetes if app_config else None
        template_file_path = k8s_config.batchsandbox_template_file if k8s_config else None
        if template_file_path:
            logger.info("Using BatchSandbox template file: %s", template_file_path)
        self.execd_init_resources = k8s_config.execd_init_resources if k8s_config else None

        # Initialize secure runtime resolver
        self.resolver = SecureRuntimeResolver(app_config) if app_config else None
        self.runtime_class = (
            self.resolver.get_k8s_runtime_class() if self.resolver else None
        )

        # CRD constants
        self.group = "sandbox.opensandbox.io"
        self.version = "v1alpha1"
        self.plural = "batchsandboxes"
        
        # Template manager
        self.template_manager = BatchSandboxTemplateManager(template_file_path)

    def supports_image_auth(self) -> bool:
        """BatchSandbox supports image pull auth via imagePullSecrets injection."""
        return True

    def create_workload(
        self,
        sandbox_id: str,
        namespace: str,
        image_spec: ImageSpec,
        entrypoint: List[str],
        env: Dict[str, str],
        resource_limits: Dict[str, str],
        labels: Dict[str, str],
        expires_at: Optional[datetime],
        execd_image: str,
        extensions: Optional[Dict[str, str]] = None,
        network_policy: Optional[NetworkPolicy] = None,
        egress_image: Optional[str] = None,
        volumes: Optional[List[Volume]] = None,
        annotations: Optional[Dict[str, str]] = None,
        egress_auth_token: Optional[str] = None,
        egress_mode: str = EGRESS_MODE_DNS,
    ) -> Dict[str, Any]:
        """
        Create a BatchSandbox workload.

        Supports both template-based and pool-based creation:
        - Template mode (default): Creates workload with user-specified image, resources, and env
        - Pool mode (when extensions contains 'poolRef'): Creates workload from pre-warmed pool,
          only entrypoint and env can be customized

        Args:
            sandbox_id: Unique sandbox identifier
            namespace: Kubernetes namespace
            image_spec: Container image specification (not used in pool mode)
            entrypoint: Container entrypoint command
            env: Environment variables
            resource_limits: Resource limits (not used in pool mode)
            labels: Labels to apply
            expires_at: Expiration time
            execd_image: execd daemon image (not used in pool mode)
            extensions: General extension field for additional configuration.
                When contains 'poolRef', enables pool-based creation.
            network_policy: Optional network policy for egress traffic control.
                When provided, an egress sidecar container will be added to the Pod.
            egress_image: Container image for the egress sidecar (required when network_policy is set).
            volumes: Optional list of volume mounts for the sandbox.

        Returns:
            Dict with 'name' and 'uid' of created BatchSandbox

        Raises:
            SandboxError: If pool mode is used with volumes (not supported).
        """
        extensions = extensions or {}

        # Log runtime class usage for debugging
        if self.runtime_class:
            logger.info(
                "Using Kubernetes RuntimeClass '%s' for sandbox %s",
                self.runtime_class,
                sandbox_id,
            )

        # If poolRef is provided and not empty, create workload from pool
        if extensions.get("poolRef"):
            # Pool mode does not support volumes
            if volumes:
                raise ValueError(
                    "Pool mode does not support volumes. "
                    "Remove 'volumes' from request or use template mode."
                )
            # When using pool, only entrypoint and env can be customized
            return self._create_workload_from_pool(
                batchsandbox_name=sandbox_id,
                namespace=namespace,
                labels=labels,
                pool_ref=extensions["poolRef"],
                expires_at=expires_at,
                entrypoint=entrypoint,
                env=env,
            )
        
        # Extract extra pod spec fragments from template (volumes/volumeMounts only).
        extra_volumes, extra_mounts = self._extract_template_pod_extras()

        # Build init container for execd installation
        disable_ipv6_for_egress = network_policy is not None and egress_image is not None
        init_container = self._build_execd_init_container(
            execd_image, disable_ipv6_for_egress=disable_ipv6_for_egress
        )
        
        # Build main container with execd support
        main_container = self._build_main_container(
            image_spec=image_spec,
            entrypoint=entrypoint,
            env=env,
            resource_limits=resource_limits,
            has_network_policy=network_policy is not None,
        )
        
        # Build containers list
        containers = [self._container_to_dict(main_container)]
        
        # Build base pod spec
        pod_spec: Dict[str, Any] = {
            "initContainers": [self._container_to_dict(init_container)],
            "containers": containers,
            "volumes": [
                {
                    "name": "opensandbox-bin",
                    "emptyDir": {}
                }
            ],
        }

        # Inject runtimeClassName if secure runtime is configured
        if self.runtime_class:
            pod_spec["runtimeClassName"] = self.runtime_class

        # Inject imagePullSecrets if image auth is provided
        # secret_name is deterministic so it can be embedded before the Secret is created
        if image_spec.auth:
            secret_name = build_image_pull_secret_name(sandbox_id)
            pod_spec["imagePullSecrets"] = [{"name": secret_name}]

        # Add egress sidecar if network policy is provided
        apply_egress_to_spec(
            containers=containers,
            network_policy=network_policy,
            egress_image=egress_image,
            egress_auth_token=egress_auth_token,
            egress_mode=egress_mode,
        )

        # Add user-specified volumes if provided
        if volumes:
            apply_volumes_to_pod_spec(pod_spec, volumes)

        spec: Dict[str, Any] = {
            "replicas": 1,
            "template": {
                "spec": pod_spec,
            },
        }
        runtime_manifest = {
            "apiVersion": f"{self.group}/{self.version}",
            "kind": "BatchSandbox",
            "metadata": {
                "name": sandbox_id,
                "namespace": namespace,
                "labels": labels,
            },
            "spec": spec,
        }
        if annotations:
            runtime_manifest["metadata"]["annotations"] = annotations
        
        # Merge with template to get final manifest
        batchsandbox = self.template_manager.merge_with_runtime_values(runtime_manifest)
        # Set or strip expireTime after merge so we override any template value
        if expires_at is None:
            batchsandbox["spec"].pop("expireTime", None)
        else:
            batchsandbox["spec"]["expireTime"] = expires_at.isoformat()
        self._merge_pod_spec_extras(batchsandbox, extra_volumes, extra_mounts)
        
        # Create BatchSandbox
        created = self.k8s_client.create_custom_object(
            group=self.group,
            version=self.version,
            namespace=namespace,
            plural=self.plural,
            body=batchsandbox,
        )

        # Create imagePullSecret with ownerReference pointing to the BatchSandbox
        if image_spec.auth:
            secret = build_image_pull_secret(
                sandbox_id=sandbox_id,
                image_uri=image_spec.uri,
                auth=image_spec.auth,
                owner_uid=created["metadata"]["uid"],
                owner_api_version=f"{self.group}/{self.version}",
                owner_kind="BatchSandbox",
            )
            try:
                self.k8s_client.create_secret(namespace=namespace, body=secret)
                logger.info("Created imagePullSecret for sandbox %s", sandbox_id)
            except Exception:
                logger.warning("Failed to create imagePullSecret for sandbox %s, rolling back BatchSandbox", sandbox_id)
                try:
                    self.k8s_client.delete_custom_object(
                        group=self.group,
                        version=self.version,
                        namespace=namespace,
                        plural=self.plural,
                        name=sandbox_id,
                        grace_period_seconds=0,
                    )
                except Exception as del_exc:
                    logger.warning("Failed to rollback BatchSandbox %s: %s", sandbox_id, del_exc)
                raise

        return {
            "name": created["metadata"]["name"],
            "uid": created["metadata"]["uid"],
        }
    
    def _create_workload_from_pool(
        self,
        batchsandbox_name: str,
        namespace: str,
        labels: Dict[str, str],
        pool_ref: str,
        expires_at: Optional[datetime],
        entrypoint: List[str],
        env: Dict[str, str],
    ) -> Dict[str, Any]:
        """
        Create BatchSandbox workload from a pre-warmed resource pool.
        
        Pool-based creation uses poolRef to reference an existing pool.
        The pool already defines the pod template, so no additional template is needed.
        Only entrypoint and env can be customized.
        
        Args:
            batchsandbox_name: Name of the BatchSandbox resource
            namespace: Kubernetes namespace
            labels: Labels to apply
            pool_ref: Reference to the resource pool
            expires_at: Expiration time
            entrypoint: Container entrypoint command (can be customized)
            env: Environment variables (can be customized)
            
        Returns:
            Dict with 'name' and 'uid' of created BatchSandbox
            
        Raises:
            SandboxError: If required parameters are invalid
        """
        spec: Dict[str, Any] = {
            "replicas": 1,
            "poolRef": pool_ref,
            "taskTemplate": self._build_task_template(entrypoint, env),
        }
        if expires_at is not None:
            spec["expireTime"] = expires_at.isoformat()
        runtime_manifest = {
            "apiVersion": f"{self.group}/{self.version}",
            "kind": "BatchSandbox",
            "metadata": {
                "name": batchsandbox_name,
                "namespace": namespace,
                "labels": labels,
            },
            "spec": spec,
        }
        
        # Pool-based creation does not need template merging
        # Create BatchSandbox directly
        created = self.k8s_client.create_custom_object(
            group=self.group,
            version=self.version,
            namespace=namespace,
            plural=self.plural,
            body=runtime_manifest,
        )
        
        return {
            "name": created["metadata"]["name"],
            "uid": created["metadata"]["uid"],
        }

    def _extract_template_pod_extras(self) -> tuple[list[Dict[str, Any]], list[Dict[str, Any]]]:
        """
        Extract extra volumes and volume mounts from the BatchSandbox template.

        Only these fields are supported here because runtime manifests must
        always inject execd init container, main container, and volumes.
        """
        template = self.template_manager.get_base_template()
        spec = template.get("spec", {}) if isinstance(template, dict) else {}
        template_spec = spec.get("template", {}).get("spec", {})
        extra_volumes = template_spec.get("volumes", []) or []

        extra_mounts: list[Dict[str, Any]] = []
        containers = template_spec.get("containers", []) or []
        if containers:
            # Prefer container named "sandbox" if present, otherwise first container.
            target = None
            for container in containers:
                if container.get("name") == "sandbox":
                    target = container
                    break
            if target is None:
                target = containers[0]
            extra_mounts = target.get("volumeMounts", []) or []

        if not isinstance(extra_volumes, list):
            extra_volumes = []
        if not isinstance(extra_mounts, list):
            extra_mounts = []
        return extra_volumes, extra_mounts

    def _merge_pod_spec_extras(
        self,
        batchsandbox: Dict[str, Any],
        extra_volumes: list[Dict[str, Any]],
        extra_mounts: list[Dict[str, Any]],
    ) -> None:
        """
        Merge extra volumes/volumeMounts into the runtime-generated pod spec.

        This keeps execd injections intact while allowing user templates to
        provide additional read-only mounts (e.g., shared skills directory).
        """
        try:
            spec = batchsandbox["spec"]["template"]["spec"]
        except KeyError:
            return

        # Merge volumes by name (do not overwrite existing runtime volumes).
        volumes = spec.get("volumes", []) or []
        if isinstance(volumes, list) and extra_volumes:
            existing = {v.get("name") for v in volumes if isinstance(v, dict)}
            for vol in extra_volumes:
                if not isinstance(vol, dict):
                    continue
                name = vol.get("name")
                if not name or name in existing:
                    continue
                volumes.append(vol)
                existing.add(name)
            spec["volumes"] = volumes

        # Merge volumeMounts into the main container (index 0).
        containers = spec.get("containers", []) or []
        if not containers or not isinstance(containers, list):
            return
        main_container = containers[0]
        mounts = main_container.get("volumeMounts", []) or []
        if isinstance(mounts, list) and extra_mounts:
            existing = {m.get("name") for m in mounts if isinstance(m, dict)}
            for mnt in extra_mounts:
                if not isinstance(mnt, dict):
                    continue
                name = mnt.get("name")
                if not name or name in existing:
                    continue
                mounts.append(mnt)
                existing.add(name)
            main_container["volumeMounts"] = mounts

    # TODO: support empty cmd or env
    def _build_task_template(
        self,
        entrypoint: List[str],
        env: Dict[str, str],
    ) -> Dict[str, Any]:
        """
        Build taskTemplate for pool-based BatchSandbox.
        
        In pool mode, task should use bootstrap.sh to start execd and business process.
        
        Generated command example:
            /bin/sh -c "/opt/opensandbox/bin/bootstrap.sh python app.py &"
        
        Note: All entrypoint arguments are properly shell-escaped using shlex.quote
        to prevent shell injection and preserve arguments with spaces or special characters.
        
        Args:
            entrypoint: Container entrypoint command
            env: Environment variables
            
        Returns:
            Dict: taskTemplate specification with TaskSpec structure
        """
        # Build command: execute bootstrap.sh with entrypoint in background
        # Use shlex.quote to safely escape each entrypoint argument to prevent shell injection
        escaped_entrypoint = ' '.join(shlex.quote(arg) for arg in entrypoint)
        user_process_cmd = f"/opt/opensandbox/bin/bootstrap.sh {escaped_entrypoint} &"
        
        wrapped_command = ["/bin/sh", "-c", user_process_cmd]
        
        # Convert env dict to k8s EnvVar format
        env_list = [{"name": k, "value": v} for k, v in env.items()] if env else []
        
        # Return TaskTemplateSpec structure
        return {
            "spec": {
                "process": {
                    "command": wrapped_command,
                    "env": env_list,
                }
            }
        }
    
    def _build_execd_init_container(
        self,
        execd_image: str,
        *,
        disable_ipv6_for_egress: bool = False,
    ) -> V1Container:
        """
        Build init container for execd installation.
        
        This init container copies execd binary and bootstrap.sh script from
        execd image to shared volume, making them available to the main container.
        
        The bootstrap.sh script (from execd image) will:
        - Start execd in background (redirects logs to /tmp/execd.log)
        - Use exec to replace current process with user's command
        
        Args:
            execd_image: execd container image
            disable_ipv6_for_egress: When True, disable IPv6 in the Pod netns first
                (privileged) then install binaries; used with egress sidecar.
            
        Returns:
            V1Container: Init container spec
        """
        # Copy execd binary and bootstrap.sh from image to shared volume
        script = (
            "cp ./execd /opt/opensandbox/bin/execd && "
            "cp ./bootstrap.sh /opt/opensandbox/bin/bootstrap.sh && "
            "chmod +x /opt/opensandbox/bin/execd && "
            "chmod +x /opt/opensandbox/bin/bootstrap.sh"
        )
        security_context = None
        if disable_ipv6_for_egress:
            script, sc_dict = prep_execd_init_for_egress(script)
            security_context = build_security_context_from_dict(sc_dict)

        resources = None
        if self.execd_init_resources:
            resources = V1ResourceRequirements(
                limits=self.execd_init_resources.limits,
                requests=self.execd_init_resources.requests,
            )

        return V1Container(
            name="execd-installer",
            image=execd_image,
            command=["/bin/sh", "-c"],
            args=[script],
            volume_mounts=[
                V1VolumeMount(
                    name="opensandbox-bin",
                    mount_path="/opt/opensandbox/bin"
                )
            ],
            resources=resources,
            security_context=security_context,
        )
    
    def _build_main_container(
        self,
        image_spec: ImageSpec,
        entrypoint: List[str],
        env: Dict[str, str],
        resource_limits: Dict[str, str],
        has_network_policy: bool = False,
    ) -> V1Container:
        """
        Build main container spec with execd support.
        
        The container will use bootstrap script to start execd in background,
        then execute user's command.
        
        Args:
            image_spec: Container image specification
            entrypoint: Container entrypoint command
            env: Environment variables
            resource_limits: Resource limits
            has_network_policy: Whether network policy is enabled for this sandbox
            
        Returns:
            V1Container: Main container spec
        """
        # Convert env dict to V1EnvVar list and inject EXECD path
        env_vars = [V1EnvVar(name=k, value=v) for k, v in env.items()]
        # Add EXECD environment variable to specify execd binary path
        env_vars.append(V1EnvVar(name="EXECD", value="/opt/opensandbox/bin/execd"))
        
        # Build resource requirements
        resources = None
        if resource_limits:
            resources = V1ResourceRequirements(
                limits=resource_limits,
                requests=resource_limits,  # Set requests = limits for guaranteed QoS
            )
        
        # Wrap entrypoint with bootstrap script to start execd
        wrapped_command = ["/opt/opensandbox/bin/bootstrap.sh"] + entrypoint
        
        # Apply security context when network policy is enabled
        security_context = None
        if has_network_policy:
            security_context_dict = build_security_context_for_sandbox_container(True)
            security_context = build_security_context_from_dict(security_context_dict)
        
        return V1Container(
            name="sandbox",
            image=image_spec.uri,
            command=wrapped_command,
            env=env_vars if env_vars else None,
            resources=resources,
            volume_mounts=[
                V1VolumeMount(
                    name="opensandbox-bin",
                    mount_path="/opt/opensandbox/bin"
                )
            ],
            security_context=security_context,
        )
    
    def _container_to_dict(self, container: V1Container) -> Dict[str, Any]:
        """
        Convert V1Container to dict for CRD.
        
        Args:
            container: V1Container object
            
        Returns:
            Dict representation of container
        """
        result = {
            "name": container.name,
            "image": container.image,
        }
        
        if container.command:
            result["command"] = container.command
        
        if container.args:
            result["args"] = container.args
        
        if container.env:
            result["env"] = [
                {"name": e.name, "value": e.value}
                for e in container.env
            ]
        
        if container.resources:
            result["resources"] = {}
            if container.resources.limits:
                result["resources"]["limits"] = container.resources.limits
            if container.resources.requests:
                result["resources"]["requests"] = container.resources.requests
        
        if container.volume_mounts:
            result["volumeMounts"] = [
                {"name": vm.name, "mountPath": vm.mount_path}
                for vm in container.volume_mounts
            ]
        
        if container.security_context:
            security_context_dict = serialize_security_context_to_dict(container.security_context)
            if security_context_dict:
                result["securityContext"] = security_context_dict
        
        return result

    def get_workload(self, sandbox_id: str, namespace: str) -> Optional[Dict[str, Any]]:
        """Get BatchSandbox by sandbox ID."""
        workload = self.k8s_client.get_custom_object(
            group=self.group,
            version=self.version,
            namespace=namespace,
            plural=self.plural,
            name=sandbox_id,
        )
        if workload:
            return workload

        # Fallback for pre-upgrade sandboxes that used "sandbox-<id>" naming
        legacy_name = self.legacy_resource_name(sandbox_id)
        if legacy_name != sandbox_id:
            return self.k8s_client.get_custom_object(
                group=self.group,
                version=self.version,
                namespace=namespace,
                plural=self.plural,
                name=legacy_name,
            )

        return None
    
    def delete_workload(self, sandbox_id: str, namespace: str) -> None:
        """Delete BatchSandbox workload."""
        batchsandbox = self.get_workload(sandbox_id, namespace)
        if not batchsandbox:
            raise Exception(f"BatchSandbox for sandbox {sandbox_id} not found")
        
        self.k8s_client.delete_custom_object(
            group=self.group,
            version=self.version,
            namespace=namespace,
            plural=self.plural,
            name=batchsandbox["metadata"]["name"],
            grace_period_seconds=0,
        )
    
    def list_workloads(self, namespace: str, label_selector: str) -> List[Dict[str, Any]]:
        """List BatchSandboxes matching label selector."""
        return self.k8s_client.list_custom_objects(
            group=self.group,
            version=self.version,
            namespace=namespace,
            plural=self.plural,
            label_selector=label_selector,
        )
    
    def update_expiration(self, sandbox_id: str, namespace: str, expires_at: datetime) -> None:
        """Update BatchSandbox expiration time.
        
        Args:
            sandbox_id: Sandbox ID
            namespace: Kubernetes namespace
            expires_at: New expiration time
            
        Raises:
            Exception: If BatchSandbox not found or update fails
        """
        batchsandbox = self.get_workload(sandbox_id, namespace)
        if not batchsandbox:
            raise Exception(f"BatchSandbox for sandbox {sandbox_id} not found")
        
        # Patch BatchSandbox spec.expireTime
        body = {
            "spec": {
                "expireTime": expires_at.isoformat()
            }
        }
        
        self.k8s_client.patch_custom_object(
            group=self.group,
            version=self.version,
            namespace=namespace,
            plural=self.plural,
            name=batchsandbox["metadata"]["name"],
            body=body,
        )
    
    def get_expiration(self, workload: Dict[str, Any]) -> Optional[datetime]:
        """Get expiration time from BatchSandbox.
        
        Args:
            workload: BatchSandbox dict
            
        Returns:
            Expiration datetime or None if not set or invalid
        """
        spec = workload.get("spec", {})
        expire_time_str = spec.get("expireTime")
        
        if not expire_time_str:
            return None
        
        try:
            # Parse ISO format datetime
            return datetime.fromisoformat(expire_time_str.replace('Z', '+00:00'))
        except (ValueError, TypeError) as e:
            logger.warning("Invalid expireTime format: %s, error: %s", expire_time_str, e)
            return None

    def _parse_pod_ip(self, workload: Dict[str, Any]) -> Optional[str]:
        """Parse the first Pod IP from the endpoints annotation.

        Returns the IP string if the annotation exists and contains a non-empty
        JSON array, otherwise returns None.
        """
        annotations = workload.get("metadata", {}).get("annotations", {})
        endpoints_str = annotations.get("sandbox.opensandbox.io/endpoints")
        if not endpoints_str:
            return None
        try:
            endpoints = json.loads(endpoints_str)
            if endpoints and len(endpoints) > 0:
                return endpoints[0]
        except (json.JSONDecodeError, IndexError, TypeError):
            pass
        return None

    def get_status(self, workload: Dict[str, Any]) -> Dict[str, Any]:
        """
        Get status from BatchSandbox.
        
        The status is derived from the BatchSandbox status fields:
        - replicas: total number of pods
        - allocated: number of scheduled pods
        - ready: number of ready pods
        """
        status = workload.get("status", {})
        
        replicas = status.get("replicas", 0)
        ready = status.get("ready", 0)
        allocated = status.get("allocated", 0)

        pod_ip = self._parse_pod_ip(workload)

        # Determine state: Pending -> Allocated (IP assigned) -> Running (Pod ready)
        if ready == 1 and pod_ip:
            # Pod is ready and has IP
            state = "Running"
            reason = "POD_READY_WITH_IP"
            message = f"Pod is ready with IP ({ready}/{replicas} ready)"
        elif pod_ip:
            # Pod has IP assigned but not ready yet
            state = "Allocated"
            reason = "IP_ASSIGNED"
            message = f"Pod has IP assigned but not ready ({allocated}/{replicas} allocated, {ready} ready)"
        else:
            # Pod is not allocated yet or allocated but no IP
            state = "Pending"
            reason = "POD_SCHEDULED" if allocated > 0 else "BATCHSANDBOX_PENDING"
            message = (
                f"Pod is scheduled but waiting for IP ({allocated}/{replicas} allocated, {ready} ready)"
                if allocated > 0
                else "BatchSandbox is pending allocation"
            )
        
        # Get creation timestamp
        creation_timestamp = workload.get("metadata", {}).get("creationTimestamp")
        
        return {
            "state": state,
            "reason": reason,
            "message": message,
            "last_transition_at": creation_timestamp,
        }
    
    def get_endpoint_info(self, workload: Dict[str, Any], port: int, sandbox_id: str) -> Optional[Endpoint]:
        """
        Get endpoint information from BatchSandbox.
        - gateway mode: use ingress config to format endpoint
        - direct/default: resolve Pod IP from annotation
        """
        if self.ingress_config and self.ingress_config.mode == INGRESS_MODE_GATEWAY:
            return format_ingress_endpoint(self.ingress_config, sandbox_id, port)

        pod_ip = self._parse_pod_ip(workload)
        if not pod_ip:
            return None
        return Endpoint(endpoint=f"{pod_ip}:{port}")


================================================
FILE: server/src/services/k8s/batchsandbox_template.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
BatchSandbox template loader and merger.
"""

from typing import Optional

from src.services.k8s.template_manager import BaseSandboxTemplateManager


class BatchSandboxTemplateManager(BaseSandboxTemplateManager):
    """
    Manager for BatchSandbox CR templates.
    """

    def __init__(self, template_file_path: Optional[str] = None):
        super().__init__(template_file_path, template_kind="BatchSandbox")


================================================
FILE: server/src/services/k8s/client.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Kubernetes client wrapper that provides a unified interface for all K8s resource
operations. All API access goes through this class.
"""

import logging
import threading
from functools import partial
from typing import Any, Dict, List, Optional, Tuple

from kubernetes import client, config
from kubernetes.client import ApiException, CoreV1Api, CustomObjectsApi, NodeV1Api

from src.config import KubernetesRuntimeConfig
from src.services.k8s.informer import WorkloadInformer
from src.services.k8s.rate_limiter import TokenBucketRateLimiter

logger = logging.getLogger(__name__)

# Type alias for informer cache key
_InformerKey = Tuple[str, str, str, str]  # (group, version, plural, namespace)


class K8sClient:
    """
    Unified Kubernetes API client.

    Encapsulates all cluster resource operations (CustomObject, Secret, Pod,
    RuntimeClass). Callers never hold raw API handles directly.
    """

    def __init__(self, k8s_config: KubernetesRuntimeConfig):
        self.config = k8s_config
        self._load_config()
        self._core_v1_api: Optional[CoreV1Api] = None
        self._custom_objects_api: Optional[CustomObjectsApi] = None
        self._node_v1_api: Optional[NodeV1Api] = None
        # Informer pool: key -> WorkloadInformer
        self._informers: Dict[_InformerKey, WorkloadInformer] = {}
        self._informers_lock = threading.Lock()
        # Rate limiters (None = unlimited)
        self._read_limiter: Optional[TokenBucketRateLimiter] = (
            TokenBucketRateLimiter(qps=k8s_config.read_qps, burst=k8s_config.read_burst)
            if k8s_config.read_qps > 0
            else None
        )
        self._write_limiter: Optional[TokenBucketRateLimiter] = (
            TokenBucketRateLimiter(qps=k8s_config.write_qps, burst=k8s_config.write_burst)
            if k8s_config.write_qps > 0
            else None
        )

    # ------------------------------------------------------------------
    # Internal API handle accessors (lazy singletons)
    # ------------------------------------------------------------------

    def _load_config(self) -> None:
        """Load kubeconfig from file path or in-cluster service account."""
        try:
            if self.config.kubeconfig_path:
                config.load_kube_config(config_file=self.config.kubeconfig_path)
            else:
                config.load_incluster_config()
        except Exception as e:
            raise Exception(f"Failed to load Kubernetes configuration: {e}") from e

    def get_core_v1_api(self) -> CoreV1Api:
        if self._core_v1_api is None:
            self._core_v1_api = client.CoreV1Api()
        return self._core_v1_api

    def get_custom_objects_api(self) -> CustomObjectsApi:
        if self._custom_objects_api is None:
            self._custom_objects_api = client.CustomObjectsApi()
        return self._custom_objects_api

    def get_node_v1_api(self) -> NodeV1Api:
        if self._node_v1_api is None:
            self._node_v1_api = client.NodeV1Api()
        return self._node_v1_api

    # ------------------------------------------------------------------
    # Internal informer pool management
    # ------------------------------------------------------------------

    def _get_informer(self, group: str, version: str, plural: str, namespace: str) -> Optional[WorkloadInformer]:
        """Return the informer for this resource+namespace, starting it lazily."""
        if not self.config.informer_enabled:
            return None

        key: _InformerKey = (group, version, plural, namespace)
        with self._informers_lock:
            informer = self._informers.get(key)
            if informer is None:
                list_fn = partial(
                    self.get_custom_objects_api().list_namespaced_custom_object,
                    group=group,
                    version=version,
                    namespace=namespace,
                    plural=plural,
                )
                informer = WorkloadInformer(
                    list_fn=list_fn,
                    resync_period_seconds=self.config.informer_resync_seconds,
                    watch_timeout_seconds=self.config.informer_watch_timeout_seconds,
                    thread_name=f"workload-informer-{plural}-{namespace}",
                )
                self._informers[key] = informer
                try:
                    informer.start()
                except Exception as exc:  # pragma: no cover - defensive
                    logger.warning("Failed to start informer for %s/%s: %s", plural, namespace, exc)
                    self._informers.pop(key, None)
                    return None
        return informer

    # ------------------------------------------------------------------
    # CustomObject operations
    # ------------------------------------------------------------------

    def create_custom_object(
        self,
        group: str,
        version: str,
        namespace: str,
        plural: str,
        body: Dict[str, Any],
    ) -> Dict[str, Any]:
        """Create a namespaced custom resource."""
        if self._write_limiter:
            self._write_limiter.acquire()
        return self.get_custom_objects_api().create_namespaced_custom_object(
            group=group,
            version=version,
            namespace=namespace,
            plural=plural,
            body=body,
        )

    def get_custom_object(
        self,
        group: str,
        version: str,
        namespace: str,
        plural: str,
        name: str,
    ) -> Optional[Dict[str, Any]]:
        """Get a namespaced custom resource by name.

        Tries the informer cache first when available and synced.
        Returns None on 404.
        """
        informer = self._get_informer(group, version, plural, namespace)
        if informer and informer.has_synced:
            cached = informer.get(name)
            if cached is not None:
                return cached

        if self._read_limiter:
            self._read_limiter.acquire()
        try:
            obj = self.get_custom_objects_api().get_namespaced_custom_object(
                group=group,
                version=version,
                namespace=namespace,
                plural=plural,
                name=name,
            )
            if informer:
                informer.update_cache(obj)
            return obj
        except ApiException as e:
            if e.status == 404:
                return None
            raise

    def list_custom_objects(
        self,
        group: str,
        version: str,
        namespace: str,
        plural: str,
        label_selector: str = "",
    ) -> List[Dict[str, Any]]:
        """List namespaced custom resources, returning the items list."""
        if self._read_limiter:
            self._read_limiter.acquire()
        try:
            resp = self.get_custom_objects_api().list_namespaced_custom_object(
                group=group,
                version=version,
                namespace=namespace,
                plural=plural,
                label_selector=label_selector,
            )
            return resp.get("items", [])
        except ApiException as e:
            if e.status == 404:
                return []
            raise

    def delete_custom_object(
        self,
        group: str,
        version: str,
        namespace: str,
        plural: str,
        name: str,
        grace_period_seconds: int = 0,
    ) -> None:
        """Delete a namespaced custom resource."""
        if self._write_limiter:
            self._write_limiter.acquire()
        self.get_custom_objects_api().delete_namespaced_custom_object(
            group=group,
            version=version,
            namespace=namespace,
            plural=plural,
            name=name,
            grace_period_seconds=grace_period_seconds,
        )

    def patch_custom_object(
        self,
        group: str,
        version: str,
        namespace: str,
        plural: str,
        name: str,
        body: Dict[str, Any],
    ) -> Dict[str, Any]:
        """Patch a namespaced custom resource."""
        if self._write_limiter:
            self._write_limiter.acquire()
        return self.get_custom_objects_api().patch_namespaced_custom_object(
            group=group,
            version=version,
            namespace=namespace,
            plural=plural,
            name=name,
            body=body,
        )

    # ------------------------------------------------------------------
    # Secret operations
    # ------------------------------------------------------------------

    def create_secret(self, namespace: str, body: Any) -> Any:
        """Create a namespaced Secret."""
        if self._write_limiter:
            self._write_limiter.acquire()
        return self.get_core_v1_api().create_namespaced_secret(
            namespace=namespace,
            body=body,
        )

    # ------------------------------------------------------------------
    # Pod operations
    # ------------------------------------------------------------------

    def list_pods(
        self,
        namespace: str,
        label_selector: str = "",
    ) -> List[Any]:
        """List pods in a namespace, returning the items list."""
        if self._read_limiter:
            self._read_limiter.acquire()
        resp = self.get_core_v1_api().list_namespaced_pod(
            namespace=namespace,
            label_selector=label_selector,
        )
        return resp.items

    # ------------------------------------------------------------------
    # RuntimeClass operations
    # ------------------------------------------------------------------

    def read_runtime_class(self, name: str) -> Any:
        """Read a RuntimeClass from the cluster."""
        if self._read_limiter:
            self._read_limiter.acquire()
        return self.get_node_v1_api().read_runtime_class(name)


================================================
FILE: server/src/services/k8s/egress_helper.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Egress sidecar helpers for Kubernetes pod specs.

Public entry points: ``prep_execd_init_for_egress``, ``build_security_context_for_sandbox_container``,
``apply_egress_to_spec``. SecurityContext dict ↔ V1 conversion lives in ``security_context``.
"""

import json
from typing import Any, Dict, List, Optional

from src.api.schema import NetworkPolicy
from src.config import EGRESS_MODE_DNS
from src.services.constants import (
    EGRESS_MODE_ENV,
    EGRESS_RULES_ENV,
    OPENSANDBOX_EGRESS_TOKEN,
)


def prep_execd_init_for_egress(exec_install_script: str) -> tuple[str, Dict[str, Any]]:
    """
    Prepare execd init when an egress sidecar is used: disable IPv6 in the Pod netns, then install.

    Writes ``/proc/sys/.../disable_ipv6`` (no ``sysctl`` binary required). The returned
    security context dict must be applied to the execd init container (typically via
    ``build_security_context_from_dict`` in ``security_context``).

    Returns:
        ``(prefixed_shell_script, {"privileged": True})``
    """
    script = (
        "set -e; "
        "echo 1 > /proc/sys/net/ipv6/conf/all/disable_ipv6 && "
        f"{exec_install_script}"
    )
    return script, {"privileged": True}


def build_security_context_for_sandbox_container(
    has_network_policy: bool,
) -> Dict[str, Any]:
    """
    Security context dict for the main sandbox container.

    When network policy is enabled, drops ``NET_ADMIN`` so only the egress sidecar can
    mutate network stack state.
    """
    if not has_network_policy:
        return {}

    return {
        "capabilities": {
            "drop": ["NET_ADMIN"],
        },
    }


def apply_egress_to_spec(
    containers: List[Dict[str, Any]],
    network_policy: Optional[NetworkPolicy],
    egress_image: Optional[str],
    egress_auth_token: Optional[str] = None,
    egress_mode: str = EGRESS_MODE_DNS,
) -> None:
    """
    Append the egress sidecar to ``containers``. IPv6 is handled in execd init
    (``prep_execd_init_for_egress``); Pod-level sysctls are not modified.
    """
    if not network_policy or not egress_image:
        return

    policy_payload = json.dumps(
        network_policy.model_dump(by_alias=True, exclude_none=True)
    )

    env: List[Dict[str, str]] = [
        {"name": EGRESS_RULES_ENV, "value": policy_payload},
        {"name": EGRESS_MODE_ENV, "value": egress_mode},
    ]
    if egress_auth_token:
        env.append({"name": OPENSANDBOX_EGRESS_TOKEN, "value": egress_auth_token})

    containers.append(
        {
            "name": "egress",
            "image": egress_image,
            "env": env,
            "securityContext": {
                "capabilities": {"add": ["NET_ADMIN"]},
            },
        }
    )


================================================
FILE: server/src/services/k8s/image_pull_secret_helper.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Helpers for creating Kubernetes imagePullSecrets.
"""

import base64
import json

from kubernetes.client import V1ObjectMeta, V1OwnerReference, V1Secret

from src.api.schema import ImageAuth

IMAGE_AUTH_SECRET_PREFIX = "opensandbox-image-auth"


def build_image_pull_secret_name(sandbox_id: str) -> str:
    """Derive a deterministic imagePullSecret name from sandbox_id."""
    return f"{IMAGE_AUTH_SECRET_PREFIX}-{sandbox_id}"


def build_image_pull_secret(
    sandbox_id: str,
    image_uri: str,
    auth: ImageAuth,
    owner_uid: str,
    owner_api_version: str,
    owner_kind: str,
) -> V1Secret:
    """
    Build a kubernetes.io/dockerconfigjson Secret for image pull auth.

    The Secret's ownerReference points to the owning CR so it is
    garbage-collected automatically when the owner is deleted.

    Args:
        sandbox_id: Sandbox identifier (used to derive Secret name)
        image_uri: Container image URI (used to determine registry hostname)
        auth: ImageAuth credentials
        owner_uid: UID of the owning CR
        owner_api_version: apiVersion of the owning CR (e.g. "sandbox.opensandbox.io/v1alpha1")
        owner_kind: Kind of the owning CR (e.g. "BatchSandbox")

    Returns:
        V1Secret ready to be created via CoreV1Api
    """
    secret_name = build_image_pull_secret_name(sandbox_id)

    # Derive registry hostname from image URI
    # e.g. "registry.example.com/ns/image:tag" -> "registry.example.com"
    # e.g. "python:3.11" -> "https://index.docker.io/v1/"
    parts = image_uri.split("/")
    if len(parts) >= 2 and ("." in parts[0] or ":" in parts[0]):
        registry = parts[0]
    else:
        registry = "https://index.docker.io/v1/"

    auth_str = base64.b64encode(
        f"{auth.username}:{auth.password}".encode()
    ).decode()
    docker_config = {
        "auths": {
            registry: {
                "username": auth.username,
                "password": auth.password,
                "auth": auth_str,
            }
        }
    }
    docker_config_b64 = base64.b64encode(
        json.dumps(docker_config).encode()
    ).decode()

    return V1Secret(
        api_version="v1",
        kind="Secret",
        metadata=V1ObjectMeta(
            name=secret_name,
            owner_references=[
                V1OwnerReference(
                    api_version=owner_api_version,
                    kind=owner_kind,
                    name=sandbox_id,
                    uid=owner_uid,
                    controller=False,
                )
            ],
        ),
        type="kubernetes.io/dockerconfigjson",
        data={".dockerconfigjson": docker_config_b64},
    )


================================================
FILE: server/src/services/k8s/informer.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Lightweight informer-style cache for namespaced custom resources."""

import logging
import threading
from typing import Any, Callable, Dict, Optional

from kubernetes import watch
from kubernetes.client import ApiException

logger = logging.getLogger(__name__)


class WorkloadInformer:
    """Maintain an in-memory cache of a namespaced custom resource via watch."""

    def __init__(
        self,
        list_fn: Callable[..., Any],
        resync_period_seconds: int = 300,
        watch_timeout_seconds: int = 60,
        enable_watch: bool = True,
        thread_name: str = "workload-informer",
    ):
        """
        Args:
            list_fn: Callable that lists the custom resource, with signature
                     ``list_fn(**kwargs) -> dict``.  Typically a bound method
                     like ``custom_api.list_namespaced_custom_object``.
            resync_period_seconds: Full-resync interval when watch is disabled.
            watch_timeout_seconds: Per-stream watch timeout before restart.
            enable_watch: When False only the initial list is performed.
            thread_name: Name for the background thread, used in stack traces
                         and debuggers.  Should be unique per informer instance.
        """
        self.list_fn = list_fn
        self.resync_period_seconds = resync_period_seconds
        self.watch_timeout_seconds = watch_timeout_seconds
        self.enable_watch = enable_watch
        self._thread_name = thread_name

        self._cache: Dict[str, Dict[str, Any]] = {}
        self._lock = threading.RLock()
        self._resource_version: Optional[str] = None
        self._has_synced = False
        self._stop_event = threading.Event()
        self._thread: Optional[threading.Thread] = None

    @property
    def has_synced(self) -> bool:
        """Return True once an initial list has completed."""
        return self._has_synced

    def start(self) -> None:
        """Start the background watch thread if not already running."""
        if self._thread and self._thread.is_alive():
            return

        self._thread = threading.Thread(
            target=self._run,
            name=self._thread_name,
            daemon=True,
        )
        self._thread.start()

    def stop(self) -> None:
        """Stop the background watch thread."""
        self._stop_event.set()

    def get(self, name: str) -> Optional[Dict[str, Any]]:
        """Return cached object by name, if present."""
        with self._lock:
            return self._cache.get(name)

    def update_cache(self, obj: Dict[str, Any]) -> None:
        """Upsert a single object into the cache.

        Only advances ``_resource_version`` if the incoming version is strictly
        newer, preventing a stale API response from rolling back the watch cursor.
        """
        metadata = obj.get("metadata", {})
        name = metadata.get("name")
        if not name:
            return

        with self._lock:
            self._cache[name] = obj
            self._advance_resource_version(metadata.get("resourceVersion"))

    def _advance_resource_version(self, rv: Optional[str]) -> None:
        """Advance ``_resource_version`` only when *rv* is strictly newer.

        K8s resourceVersions are opaque strings but etcd encodes them as
        monotonically increasing integers.  If the conversion fails we skip the
        update (conservative: keep the current, newer cursor).

        Must be called with ``self._lock`` already held.
        """
        if not rv:
            return
        if self._resource_version is None:
            self._resource_version = rv
            return
        try:
            if int(rv) > int(self._resource_version):
                self._resource_version = rv
        except ValueError:
            # Non-integer resourceVersion — skip to avoid downgrade.
            pass

    def _run(self) -> None:
        backoff = 1.0
        while not self._stop_event.is_set():
            try:
                if not self._has_synced:
                    self._full_resync()
                    backoff = 1.0

                if not self.enable_watch:
                    self._stop_event.wait(self.resync_period_seconds)
                    self._has_synced = False  # trigger a fresh list on next loop
                    continue

                self._run_watch_loop()
                backoff = 1.0
            except ApiException as exc:
                if exc.status == 410:
                    # Resource version too old; force a fresh list on next loop.
                    self._resource_version = None
                    self._has_synced = False
                else:
                    logger.warning("Informer watch error: %s", exc, exc_info=True)
                    self._has_synced = False
                    self._stop_event.wait(min(backoff, 30.0))
                    backoff = min(backoff * 2, 30.0)
            except Exception as exc:  # pragma: no cover - defensive
                logger.warning("Unexpected informer error: %s", exc, exc_info=True)
                self._has_synced = False
                self._stop_event.wait(min(backoff, 30.0))
                backoff = min(backoff * 2, 30.0)

    def _full_resync(self) -> None:
        """Perform a full list to refresh the cache."""
        resp = self.list_fn()

        # list response is a dict for CustomObjectsApi
        items = resp.get("items", []) if isinstance(resp, dict) else []
        metadata = resp.get("metadata", {}) if isinstance(resp, dict) else {}
        resource_version = metadata.get("resourceVersion")

        # Build new cache outside the lock to avoid blocking readers
        new_cache: Dict[str, Dict[str, Any]] = {}
        for item in items:
            name = item.get("metadata", {}).get("name")
            if name:
                new_cache[name] = item

        with self._lock:
            self._cache = new_cache
            self._advance_resource_version(resource_version)
            self._has_synced = True

    def _run_watch_loop(self) -> None:
        """Stream watch events to keep the cache fresh."""
        w = watch.Watch()
        try:
            for event in w.stream(
                self.list_fn,
                resource_version=self._resource_version,
                timeout_seconds=self.watch_timeout_seconds,
            ):
                if self._stop_event.is_set():
                    break
                self._handle_event(event)
        finally:
            w.stop()

    def _handle_event(self, event: Dict[str, Any]) -> None:
        obj = event.get("object")
        if obj is None:
            return

        if not isinstance(obj, dict):
            try:
                obj = obj.to_dict()
            except Exception:
                return

        metadata = obj.get("metadata", {})
        name = metadata.get("name")
        if not name:
            return

        event_type = event.get("type")
        with self._lock:
            if event_type == "DELETED":
                self._cache.pop(name, None)
            else:
                self._cache[name] = obj
            self._advance_resource_version(metadata.get("resourceVersion"))


================================================
FILE: server/src/services/k8s/kubernetes_service.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Kubernetes-based implementation of SandboxService.

This module provides a Kubernetes implementation of the sandbox service interface,
using Kubernetes resources for sandbox lifecycle management.
"""

import asyncio
import logging
import time
from datetime import datetime, timezone
from typing import Optional, Dict, Any

from fastapi import HTTPException, status

from src.api.schema import (
    CreateSandboxRequest,
    CreateSandboxResponse,
    Endpoint,
    ImageSpec,
    ListSandboxesRequest,
    ListSandboxesResponse,
    PaginationInfo,
    RenewSandboxExpirationRequest,
    RenewSandboxExpirationResponse,
    Sandbox,
    SandboxStatus,
)
from src.config import AppConfig, EGRESS_MODE_DNS, get_config
from src.services.constants import (
    SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY,
    SANDBOX_ID_LABEL,
    SANDBOX_MANUAL_CLEANUP_LABEL,
    SandboxErrorCodes,
)
from src.services.endpoint_auth import generate_egress_token
from src.services.endpoint_auth import build_egress_auth_headers, merge_endpoint_headers
from src.services.helpers import matches_filter
from src.services.sandbox_service import SandboxService
from src.services.validators import (
    calculate_expiration_or_raise,
    ensure_entrypoint,
    ensure_egress_configured,
    ensure_future_expiration,
    ensure_metadata_labels,
    ensure_timeout_within_limit,
    ensure_volumes_valid,
)
from src.services.k8s.client import K8sClient
from src.services.k8s.provider_factory import create_workload_provider

logger = logging.getLogger(__name__)


class KubernetesSandboxService(SandboxService):
    """
    Kubernetes-based implementation of SandboxService.
    
    This class implements sandbox lifecycle operations using Kubernetes resources.
    """
    
    def __init__(self, config: Optional[AppConfig] = None):
        """
        Initialize Kubernetes sandbox service.
        
        Args:
            config: Application configuration
            
        Raises:
            HTTPException: If initialization fails
        """
        self.app_config = config or get_config()
        runtime_config = self.app_config.runtime
        
        if runtime_config.type != "kubernetes":
            raise ValueError("KubernetesSandboxService requires runtime.type = 'kubernetes'")
        
        if not self.app_config.kubernetes:
            raise ValueError("Kubernetes configuration is required")
        
        # Ingress configuration (direct/gateway) if provided
        self.ingress_config = self.app_config.ingress

        self.namespace = self.app_config.kubernetes.namespace
        self.execd_image = runtime_config.execd_image
        
        # Initialize Kubernetes client
        try:
            self.k8s_client = K8sClient(self.app_config.kubernetes)
            logger.info("Kubernetes client initialized successfully")
        except Exception as e:
            logger.error(f"Failed to initialize Kubernetes client: {e}")
            raise HTTPException(
                status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
                detail={
                    "code": SandboxErrorCodes.K8S_INITIALIZATION_ERROR,
                    "message": f"Failed to initialize Kubernetes client: {str(e)}",
                },
            ) from e
        
        # Initialize workload provider
        provider_type = self.app_config.kubernetes.workload_provider
        try:
            self.workload_provider = create_workload_provider(
                provider_type=provider_type,
                k8s_client=self.k8s_client,
                app_config=self.app_config,
            )
            logger.info(
                f"Initialized workload provider: {self.workload_provider.__class__.__name__}"
            )
        except ValueError as e:
            logger.error(f"Failed to create workload provider: {e}")
            raise HTTPException(
                status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
                detail={
                    "code": SandboxErrorCodes.K8S_INITIALIZATION_ERROR,
                    "message": f"Invalid workload provider configuration: {str(e)}",
                },
            ) from e
        
        logger.info(
            "KubernetesSandboxService initialized: namespace=%s, execd_image=%s",
            self.namespace,
            self.execd_image,
        )
    
    async def _wait_for_sandbox_ready(
        self,
        sandbox_id: str,
        timeout_seconds: int = 60,
        poll_interval_seconds: float = 1.0,
    ) -> Dict[str, Any]:
        """
        Wait for Pod to be Running and have an IP address.
        
        Args:
            sandbox_id: Sandbox ID
            timeout_seconds: Maximum time to wait in seconds
            poll_interval_seconds: Time between polling attempts
            
        Returns:
            Workload dict when Pod is Running with IP
            
        Raises:
            HTTPException: If timeout or Pod fails
        """
        logger.info(
            f"Waiting for sandbox {sandbox_id} to be Running with IP (timeout: {timeout_seconds}s)"
        )
        
        start_time = time.time()
        last_state = None
        last_message = None
        
        while time.time() - start_time < timeout_seconds:
            try:
                # Get current workload status
                workload = self.workload_provider.get_workload(
                    sandbox_id=sandbox_id,
                    namespace=self.namespace,
                )
                
                if not workload:
                    logger.debug(f"Workload not found yet for sandbox {sandbox_id}")
                    time.sleep(poll_interval_seconds)
                    continue
                
                # Get status
                status_info = self.workload_provider.get_status(workload)
                current_state = status_info["state"]
                current_message = status_info["message"]
                
                # Log state changes
                if current_state != last_state or current_message != last_message:
                    logger.info(
                        f"Sandbox {sandbox_id} state: {current_state} - {current_message}"
                    )
                    last_state = current_state
                    last_message = current_message
                
                # Check if Running or Allocated (IP assigned)
                if current_state in ("Running", "Allocated"):
                    return workload
                
            except HTTPException:
                raise
            except Exception as e:
                logger.warning(
                    f"Error checking sandbox {sandbox_id} status: {e}",
                    exc_info=True
                )
            
            # Wait before next poll
            await asyncio.sleep(poll_interval_seconds)
        
        # Timeout
        elapsed = time.time() - start_time
        raise HTTPException(
            status_code=status.HTTP_504_GATEWAY_TIMEOUT,
            detail={
                "code": SandboxErrorCodes.K8S_POD_READY_TIMEOUT,
                "message": (
                    f"Timeout waiting for sandbox {sandbox_id} to be Running with IP. "
                    f"Elapsed: {elapsed:.1f}s, Last state: {last_state}"
                ),
            },
        )
    
    def _ensure_network_policy_support(self, request: CreateSandboxRequest) -> None:
        """
        Validate that network policy can be honored under the current runtime config.
        
        This validates that egress.image is configured when network_policy is provided.
        """
        # Common validation: egress.image must be configured
        ensure_egress_configured(request.network_policy, self.app_config.egress)

    def _ensure_image_auth_support(self, request: CreateSandboxRequest) -> None:
        """
        Validate image auth support for the current workload provider.

        Raises HTTP 400 if the provider does not support per-request image auth.
        """
        if request.image.auth is None:
            return
        if self.workload_provider.supports_image_auth():
            return
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_PARAMETER,
                "message": (
                    "image.auth is not supported by the current workload provider. "
                    "Use imagePullSecrets via Kubernetes ServiceAccount or sandbox template."
                ),
            },
        )

    async def create_sandbox(self, request: CreateSandboxRequest) -> CreateSandboxResponse:
        """
        Create a new sandbox using Kubernetes Pod.
        
        Wait for the Pod to be Running and have an IP address before returning.
        
        Args:
            request: Sandbox creation request.
            
        Returns:
            CreateSandboxResponse: Created sandbox information with Running state
            
        Raises:
            HTTPException: If creation fails, timeout, or invalid parameters
        """
        # Validate request
        ensure_entrypoint(request.entrypoint)
        ensure_metadata_labels(request.metadata)
        ensure_timeout_within_limit(
            request.timeout,
            self.app_config.server.max_sandbox_timeout_seconds,
        )
        self._ensure_network_policy_support(request)
        self._ensure_image_auth_support(request)
        
        # Generate sandbox ID
        sandbox_id = self.generate_sandbox_id()
        
        # Calculate expiration time (None = no TTL, manual cleanup only; same as Docker)
        created_at = datetime.now(timezone.utc)
        expires_at = None
        if request.timeout is not None:
            expires_at = calculate_expiration_or_raise(created_at, request.timeout)

        # Build labels
        labels = {
            SANDBOX_ID_LABEL: sandbox_id,
        }
        annotations: Dict[str, str] = {}
        if expires_at is None:
            labels[SANDBOX_MANUAL_CLEANUP_LABEL] = "true"
        
        # Add user metadata as labels
        if request.metadata:
            labels.update(request.metadata)
        
        # Extract resource limits
        resource_limits = {}
        if request.resource_limits and request.resource_limits.root:
            resource_limits = request.resource_limits.root
        
        try:
            egress_mode = (
                self.app_config.egress.mode
                if self.app_config.egress
                else EGRESS_MODE_DNS
            )
            # Get egress image if network policy is provided
            egress_image = None
            egress_auth_token = None
            if request.network_policy:
                egress_image = self.app_config.egress.image if self.app_config.egress else None
                egress_auth_token = generate_egress_token()
                annotations[SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY] = egress_auth_token
            
            # Validate volumes before creating workload
            ensure_volumes_valid(
                request.volumes,
                self.app_config.storage.allowed_host_paths or None,
            )
            
            # Create workload
            workload_info = self.workload_provider.create_workload(
                sandbox_id=sandbox_id,
                namespace=self.namespace,
                image_spec=request.image,
                entrypoint=request.entrypoint,
                env=request.env or {},
                resource_limits=resource_limits,
                labels=labels,
                annotations=annotations or None,
                expires_at=expires_at,
                execd_image=self.execd_image,
                extensions=request.extensions,
                network_policy=request.network_policy,
                egress_image=egress_image,
                egress_auth_token=egress_auth_token,
                egress_mode=egress_mode,
                volumes=request.volumes,
            )
            
            logger.info(
                "Created sandbox: id=%s, workload=%s",
                sandbox_id,
                workload_info.get("name"),
            )
            
            # Wait for Pod to be Running with IP
            try:
                workload = await self._wait_for_sandbox_ready(
                    sandbox_id=sandbox_id,
                    timeout_seconds=self.app_config.kubernetes.sandbox_create_timeout_seconds,
                    poll_interval_seconds=self.app_config.kubernetes.sandbox_create_poll_interval_seconds,
                )
                
                # Get final status
                status_info = self.workload_provider.get_status(workload)
                
                # Build and return response with Running state
                return CreateSandboxResponse(
                    id=sandbox_id,
                    status=SandboxStatus(
                        state=status_info["state"],
                        reason=status_info["reason"],
                        message=status_info["message"],
                        last_transition_at=status_info["last_transition_at"],
                    ),
                    created_at=created_at,
                    expires_at=expires_at,
                    metadata=request.metadata,
                    image=request.image,
                    entrypoint=request.entrypoint,
                )
                
            except HTTPException:
                # Clean up on failure
                try:
                    logger.warning(f"Creation failed, cleaning up sandbox: {sandbox_id}")
                    self.workload_provider.delete_workload(sandbox_id, self.namespace)
                except Exception as cleanup_ex:
                    logger.error(f"Failed to cleanup sandbox {sandbox_id}", exc_info=cleanup_ex)
                raise
            
        except HTTPException:
            raise
        except ValueError as e:
            # Handle parameter validation errors from provider
            logger.error(f"Invalid parameters for sandbox creation: {e}")
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_PARAMETER,
                    "message": str(e),
                },
            ) from e
        except Exception as e:
            logger.error(f"Error creating sandbox: {e}")
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.K8S_API_ERROR,
                    "message": f"Failed to create sandbox: {str(e)}",
                },
            ) from e
    
    def get_sandbox(self, sandbox_id: str) -> Sandbox:
        """
        Get sandbox by ID.
        
        Args:
            sandbox_id: Unique sandbox identifier
            
        Returns:
            Sandbox: Sandbox information
            
        Raises:
            HTTPException: If sandbox not found
        """
        try:
            workload = self.workload_provider.get_workload(
                sandbox_id=sandbox_id,
                namespace=self.namespace,
            )
            
            if not workload:
                raise HTTPException(
                    status_code=status.HTTP_404_NOT_FOUND,
                    detail={
                        "code": SandboxErrorCodes.K8S_SANDBOX_NOT_FOUND,
                        "message": f"Sandbox '{sandbox_id}' not found",
                    },
                )
            
            return self._build_sandbox_from_workload(workload)
            
        except HTTPException:
            raise
        except Exception as e:
            logger.error(f"Error getting sandbox {sandbox_id}: {e}")
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.K8S_API_ERROR,
                    "message": f"Failed to get sandbox: {str(e)}",
                },
            ) from e
    
    def list_sandboxes(self, request: ListSandboxesRequest) -> ListSandboxesResponse:
        """
        List sandboxes with filtering and pagination.
        
        Args:
            request: List request with filters and pagination
            
        Returns:
            ListSandboxesResponse: Paginated list of sandboxes
        """
        try:
            # Build label selector
            label_selector = SANDBOX_ID_LABEL
            
            # List all workloads
            workloads = self.workload_provider.list_workloads(
                namespace=self.namespace,
                label_selector=label_selector,
            )
            
            # Convert to Sandbox objects
            sandboxes = [
                self._build_sandbox_from_workload(w) for w in workloads
            ]
            
            # Apply filters
            filtered = self._apply_filters(sandboxes, request.filter)
            
            # Sort by creation time (newest first)
            filtered.sort(key=lambda s: s.created_at or datetime.min, reverse=True)
            
            # Apply pagination
            total_items = len(filtered)
            page = request.pagination.page
            page_size = request.pagination.page_size
            
            start_idx = (page - 1) * page_size
            end_idx = start_idx + page_size
            paginated_items = filtered[start_idx:end_idx]
            
            total_pages = (total_items + page_size - 1) // page_size
            has_next = page < total_pages
            
            return ListSandboxesResponse(
                items=paginated_items,
                pagination=PaginationInfo(
                    page=page,
                    page_size=page_size,
                    total_items=total_items,
                    total_pages=total_pages,
                    has_next_page=has_next,
                ),
            )
            
        except Exception as e:
            logger.error(f"Error listing sandboxes: {e}")
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.K8S_API_ERROR,
                    "message": f"Failed to list sandboxes: {str(e)}",
                },
            ) from e
    
    def delete_sandbox(self, sandbox_id: str) -> None:
        """
        Delete a sandbox.
        
        Args:
            sandbox_id: Unique sandbox identifier
            
        Raises:
            HTTPException: If deletion fails
        """
        try:
            self.workload_provider.delete_workload(
                sandbox_id=sandbox_id,
                namespace=self.namespace,
            )
            
            logger.info(f"Deleted sandbox: {sandbox_id}")
            
        except Exception as e:
            if "not found" in str(e).lower():
                raise HTTPException(
                    status_code=status.HTTP_404_NOT_FOUND,
                    detail={
                        "code": SandboxErrorCodes.K8S_SANDBOX_NOT_FOUND,
                        "message": f"Sandbox '{sandbox_id}' not found",
                    },
                ) from e
            
            logger.error(f"Error deleting sandbox {sandbox_id}: {e}")
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.K8S_API_ERROR,
                    "message": f"Failed to delete sandbox: {str(e)}",
                },
            ) from e
    
    def pause_sandbox(self, sandbox_id: str) -> None:
        """
        Pause sandbox (not supported in Kubernetes).
        
        Args:
            sandbox_id: Unique sandbox identifier
            
        Raises:
            HTTPException: Always raises 501 Not Implemented
        """
        raise HTTPException(
            status_code=status.HTTP_501_NOT_IMPLEMENTED,
            detail={
                "code": SandboxErrorCodes.API_NOT_SUPPORTED,
                "message": "Pause operation is not supported in Kubernetes runtime",
            },
        )
    
    def resume_sandbox(self, sandbox_id: str) -> None:
        """
        Resume sandbox (not supported in Kubernetes).
        
        Args:
            sandbox_id: Unique sandbox identifier
            
        Raises:
            HTTPException: Always raises 501 Not Implemented
        """
        raise HTTPException(
            status_code=status.HTTP_501_NOT_IMPLEMENTED,
            detail={
                "code": SandboxErrorCodes.API_NOT_SUPPORTED,
                "message": "Resume operation is not supported in Kubernetes runtime",
            },
        )
    
    def renew_expiration(
        self,
        sandbox_id: str,
        request: RenewSandboxExpirationRequest,
    ) -> RenewSandboxExpirationResponse:
        """
        Renew sandbox expiration time.
        
        Updates both the BatchSandbox spec.expireTime and label for consistency.
        
        Args:
            sandbox_id: Unique sandbox identifier
            request: Renewal request with new expiration time
            
        Returns:
            RenewSandboxExpirationResponse: Updated expiration time
            
        Raises:
            HTTPException: If renewal fails
        """
        # Validate future expiration
        new_expiration = ensure_future_expiration(request.expires_at)
        
        try:
            # Verify sandbox exists
            workload = self.workload_provider.get_workload(
                sandbox_id=sandbox_id,
                namespace=self.namespace,
            )
            
            if not workload:
                raise HTTPException(
                    status_code=status.HTTP_404_NOT_FOUND,
                    detail={
                        "code": SandboxErrorCodes.K8S_SANDBOX_NOT_FOUND,
                        "message": f"Sandbox '{sandbox_id}' not found",
                    },
                )

            current_expiration = self.workload_provider.get_expiration(workload)
            if current_expiration is None:
                raise HTTPException(
                    status_code=status.HTTP_409_CONFLICT,
                    detail={
                        "code": SandboxErrorCodes.INVALID_EXPIRATION,
                        "message": f"Sandbox {sandbox_id} does not have automatic expiration enabled.",
                    },
                )

            # Update BatchSandbox spec.expireTime field
            self.workload_provider.update_expiration(
                sandbox_id=sandbox_id,
                namespace=self.namespace,
                expires_at=new_expiration,
            )
            
            logger.info(
                f"Renewed sandbox {sandbox_id} expiration to {new_expiration}"
            )
            
            return RenewSandboxExpirationResponse(
                expires_at=new_expiration
            )
            
        except HTTPException:
            raise
        except Exception as e:
            logger.error(f"Error renewing expiration for {sandbox_id}: {e}")
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.K8S_API_ERROR,
                    "message": f"Failed to renew expiration: {str(e)}",
                },
            ) from e
    
    def get_endpoint(
        self,
        sandbox_id: str,
        port: int,
        resolve_internal: bool = False,
    ) -> Endpoint:
        """
        Get sandbox access endpoint.
        
        Args:
            sandbox_id: Unique sandbox identifier
            port: Port number
            resolve_internal: Ignored for Kubernetes (always returns Pod IP)
            
        Returns:
            Endpoint: Endpoint information
            
        Raises:
            HTTPException: If endpoint not available
        """
        self.validate_port(port)
        
        try:
            workload = self.workload_provider.get_workload(
                sandbox_id=sandbox_id,
                namespace=self.namespace,
            )
            
            if not workload:
                raise HTTPException(
                    status_code=status.HTTP_404_NOT_FOUND,
                    detail={
                        "code": SandboxErrorCodes.K8S_SANDBOX_NOT_FOUND,
                        "message": f"Sandbox '{sandbox_id}' not found",
                    },
                )
            
            endpoint = self.workload_provider.get_endpoint_info(workload, port, sandbox_id)
            if not endpoint:
                raise HTTPException(
                    status_code=status.HTTP_404_NOT_FOUND,
                    detail={
                        "code": SandboxErrorCodes.K8S_POD_IP_NOT_AVAILABLE,
                        "message": "Pod IP is not yet available. The Pod may still be starting.",
                    },
                )
            self._attach_egress_auth_headers(endpoint, workload)
            return endpoint
            
        except HTTPException:
            raise
        except Exception as e:
            logger.error(f"Error getting endpoint for {sandbox_id}:{port}: {e}")
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.K8S_API_ERROR,
                    "message": f"Failed to get endpoint: {str(e)}",
                },
            ) from e

    def _attach_egress_auth_headers(self, endpoint: Endpoint, workload: Any) -> None:
        token = self._get_egress_auth_token(workload)
        if not token:
            return

        endpoint.headers = merge_endpoint_headers(
            endpoint.headers,
            build_egress_auth_headers(token),
        )

    def _get_egress_auth_token(self, workload: Any) -> Optional[str]:
        if isinstance(workload, dict):
            metadata = workload.get("metadata", {})
            annotations = metadata.get("annotations", {}) or {}
            return annotations.get(SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY)

        metadata = getattr(workload, "metadata", None)
        annotations = getattr(metadata, "annotations", None) or {}
        if isinstance(annotations, dict):
            return annotations.get(SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY)
        return None

    def _build_sandbox_from_workload(self, workload: Any) -> Sandbox:
        """
        Build Sandbox object from Kubernetes workload.
        
        Args:
            workload: Kubernetes workload object (V1Pod or dict for CRD)
            
        Returns:
            Sandbox: Sandbox object
        """
        # Handle both dict (CRD) and object (Pod) formats
        if isinstance(workload, dict):
            metadata = workload.get("metadata", {})
            spec = workload.get("spec", {})
            labels = metadata.get("labels", {})
            creation_timestamp = metadata.get("creationTimestamp")
        else:
            metadata = workload.metadata
            spec = workload.spec
            labels = metadata.labels or {}
            creation_timestamp = metadata.creation_timestamp
        
        sandbox_id = labels.get(SANDBOX_ID_LABEL, "")
        
        # Get expiration from provider
        expires_at = self.workload_provider.get_expiration(workload)
        
        # Get status
        status_info = self.workload_provider.get_status(workload)
        
        # Extract metadata (filter out system labels)
        user_metadata = {
            k: v for k, v in labels.items()
            if not k.startswith("opensandbox.io/")
        }
        
        # Get image and entrypoint from spec
        image_uri = ""
        entrypoint = []
        
        if isinstance(workload, dict):
            # For CRD, extract from template
            template = spec.get("template") or spec.get("podTemplate") or {}
            pod_spec = template.get("spec", {})
            containers = pod_spec.get("containers", [])
            if containers:
                container = containers[0]
                image_uri = container.get("image", "")
                entrypoint = container.get("command", [])
        else:
            # For Pod object
            if hasattr(spec, 'containers') and spec.containers:
                container = spec.containers[0]
                image_uri = container.image or ""
                entrypoint = container.command or []
        
        image_spec = ImageSpec(uri=image_uri) if image_uri else ImageSpec(uri="unknown")
        
        return Sandbox(
            id=sandbox_id,
            status=SandboxStatus(
                state=status_info["state"],
                reason=status_info["reason"],
                message=status_info["message"],
                last_transition_at=status_info["last_transition_at"],
            ),
            created_at=creation_timestamp,
            expires_at=expires_at,
            metadata=user_metadata if user_metadata else None,
            image=image_spec,
            entrypoint=entrypoint,
        )
    
    def _apply_filters(self, sandboxes: list[Sandbox], filter_spec: Any) -> list[Sandbox]:
        """
        Apply filters to sandbox list.
        
        Args:
            sandboxes: List of sandboxes
            filter_spec: Filter specification
            
        Returns:
            Filtered list of sandboxes
        """
        if not filter_spec:
            return sandboxes
        
        filtered = []
        for sandbox in sandboxes:
            if matches_filter(sandbox, filter_spec):
                filtered.append(sandbox)
        
        return filtered


================================================
FILE: server/src/services/k8s/provider_factory.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Factory for creating WorkloadProvider instances.
"""

import logging
from typing import Dict, Type, Optional

from src.config import AppConfig
from src.services.k8s.workload_provider import WorkloadProvider
from src.services.k8s.batchsandbox_provider import BatchSandboxProvider
from src.services.k8s.agent_sandbox_provider import AgentSandboxProvider
from src.services.k8s.client import K8sClient

logger = logging.getLogger(__name__)

# Provider type constants
PROVIDER_TYPE_BATCHSANDBOX = "batchsandbox"
PROVIDER_TYPE_AGENT_SANDBOX = "agent-sandbox"

# Registry of available workload providers
_PROVIDER_REGISTRY: Dict[str, Type[WorkloadProvider]] = {
    PROVIDER_TYPE_BATCHSANDBOX: BatchSandboxProvider,
    PROVIDER_TYPE_AGENT_SANDBOX: AgentSandboxProvider,
    # Future providers can be registered here:
    # "pod": PodProvider
}


def create_workload_provider(
    provider_type: str | None,
    k8s_client: K8sClient,
    app_config: Optional[AppConfig] = None,
) -> WorkloadProvider:
    """
    Create a WorkloadProvider instance based on the provider type.

    Args:
        provider_type: Type of provider (e.g., 'batchsandbox', 'pod', 'job').
                      If None, uses the first registered provider.
        k8s_client: Kubernetes client instance
        app_config: Application config; kubernetes/agent_sandbox/ingress sub-configs
                    are read from it directly.

    Returns:
        WorkloadProvider instance

    Raises:
        ValueError: If provider_type is not supported or no providers are registered
    """
    # Use first registered provider if not specified
    if provider_type is None:
        if not _PROVIDER_REGISTRY:
            raise ValueError(
                "No workload providers are registered. "
                "Cannot create a default provider."
            )
        provider_type = next(iter(_PROVIDER_REGISTRY.keys()))
        logger.info(f"No provider specified, using default: {provider_type}")

    provider_type_lower = provider_type.lower()

    if provider_type_lower not in _PROVIDER_REGISTRY:
        available = ", ".join(_PROVIDER_REGISTRY.keys())
        raise ValueError(
            f"Unsupported workload provider type '{provider_type}'. "
            f"Available providers: {available}"
        )

    provider_class = _PROVIDER_REGISTRY[provider_type_lower]
    logger.info(f"Creating workload provider: {provider_class.__name__}")

    # BatchSandboxProvider and AgentSandboxProvider read all sub-configs from app_config.
    if provider_type_lower in (PROVIDER_TYPE_BATCHSANDBOX, PROVIDER_TYPE_AGENT_SANDBOX):
        return provider_class(k8s_client, app_config=app_config)

    # Providers that do not accept app_config
    return provider_class(k8s_client)


def register_provider(name: str, provider_class: Type[WorkloadProvider]) -> None:
    """
    Register a custom WorkloadProvider implementation.
    
    This allows extending the system with custom provider implementations
    without modifying core code.
    
    Args:
        name: Provider name (used in configuration)
        provider_class: Provider class that implements WorkloadProvider
        
    Example:
        from my_module import CustomProvider
        register_provider("custom", CustomProvider)
    """
    if not issubclass(provider_class, WorkloadProvider):
        raise TypeError(
            f"Provider class must inherit from WorkloadProvider, "
            f"got {provider_class.__name__}"
        )
    
    name_lower = name.lower()
    if name_lower in _PROVIDER_REGISTRY:
        logger.warning(
            f"Overwriting existing provider registration: {name_lower}"
        )
    
    _PROVIDER_REGISTRY[name_lower] = provider_class
    logger.info(f"Registered workload provider: {name_lower} -> {provider_class.__name__}")


def list_available_providers() -> list[str]:
    """
    List all registered provider types.
    
    Returns:
        List of provider type names
    """
    return sorted(_PROVIDER_REGISTRY.keys())


================================================
FILE: server/src/services/k8s/rate_limiter.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Generic token-bucket rate limiter.

Usage example::

    limiter = TokenBucketRateLimiter(qps=10.0, burst=20)
    limiter.acquire()   # blocks until a token is available
    do_something()
"""

import threading
import time


class TokenBucketRateLimiter:
    """Thread-safe token-bucket rate limiter.

    Tokens refill at ``qps`` tokens per second up to a maximum of ``burst``.
    Calling :meth:`acquire` consumes one token, blocking if the bucket is empty.

    Args:
        qps: Sustained request rate in requests per second.
        burst: Maximum burst size (bucket capacity). Defaults to ``qps``,
               with a minimum of 1 to ensure at least one token is always
               available regardless of qps.
    """

    def __init__(self, qps: float, burst: float = 0.0) -> None:
        if qps <= 0:
            raise ValueError(f"qps must be > 0, got {qps}")
        self._qps = qps
        self._burst = max(burst if burst > 0 else qps, 1.0)
        self._tokens = self._burst
        self._last_refill = time.monotonic()
        self._lock = threading.Lock()

    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------

    def acquire(self) -> None:
        """Acquire one token, blocking until one is available."""
        while True:
            wait = self._try_acquire()
            if wait <= 0.0:
                return
            # Clamp to a minimum of 1 ms to avoid a busy-loop caused by
            # floating-point imprecision when the deficit is near-zero.
            time.sleep(max(wait, 0.001))

    def try_acquire(self) -> bool:
        """Try to acquire one token without blocking.

        Returns:
            ``True`` if a token was consumed, ``False`` if the bucket is empty.
        """
        return self._try_acquire() <= 0.0

    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------

    def _try_acquire(self) -> float:
        """Attempt to take a token.

        Returns:
            0.0 if a token was consumed successfully, otherwise the approximate
            number of seconds to wait before retrying.
        """
        with self._lock:
            self._refill()
            if self._tokens >= 1.0:
                self._tokens -= 1.0
                return 0.0
            # Time until one token is available
            return (1.0 - self._tokens) / self._qps

    def _refill(self) -> None:
        """Add tokens proportional to elapsed time (call with lock held)."""
        now = time.monotonic()
        elapsed = now - self._last_refill
        self._tokens = min(self._burst, self._tokens + elapsed * self._qps)
        self._last_refill = now


================================================
FILE: server/src/services/k8s/security_context.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Kubernetes V1SecurityContext ↔ plain dict helpers for CRD pod specs."""

from typing import Any, Dict, Optional


def build_security_context_from_dict(
    security_context_dict: Dict[str, Any],
) -> Optional[Any]:
    """
    Convert a security context dict to ``V1SecurityContext``.

    Empty dict returns None.
    """
    if not security_context_dict:
        return None

    from kubernetes.client import V1SecurityContext, V1Capabilities

    capabilities = None
    if "capabilities" in security_context_dict:
        caps_dict = security_context_dict["capabilities"]
        add_caps = caps_dict.get("add", [])
        drop_caps = caps_dict.get("drop", [])
        capabilities = V1Capabilities(
            add=add_caps if add_caps else None,
            drop=drop_caps if drop_caps else None,
        )

    privileged = security_context_dict.get("privileged")

    if capabilities is None and privileged is None:
        return None

    return V1SecurityContext(
        capabilities=capabilities,
        privileged=privileged,
    )


def serialize_security_context_to_dict(
    security_context: Optional[Any],
) -> Optional[Dict[str, Any]]:
    """Serialize ``V1SecurityContext`` to a CRD-friendly dict."""
    if not security_context:
        return None

    result: Dict[str, Any] = {}

    if security_context.capabilities:
        caps: Dict[str, Any] = {}
        if security_context.capabilities.add:
            caps["add"] = security_context.capabilities.add
        if security_context.capabilities.drop:
            caps["drop"] = security_context.capabilities.drop
        if caps:
            result["capabilities"] = caps

    if security_context.privileged is not None:
        result["privileged"] = security_context.privileged

    return result if result else None


================================================
FILE: server/src/services/k8s/template_manager.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Shared template loader and merger for Kubernetes Sandbox CR manifests.
"""

import logging
from pathlib import Path
from typing import Any, Dict, Optional

import yaml

logger = logging.getLogger(__name__)


class BaseSandboxTemplateManager:
    """
    Shared manager for loading YAML templates and merging runtime manifests.
    """

    def __init__(self, template_file_path: Optional[str], template_kind: str):
        self.template_file_path = template_file_path
        self._template_kind = template_kind
        self._template: Optional[Dict[str, Any]] = None

        if template_file_path:
            self._load_template()

    def _load_template(self) -> None:
        if not self.template_file_path:
            return

        template_path = Path(self.template_file_path).expanduser()

        if not template_path.exists():
            raise FileNotFoundError(
                f"{self._template_kind} template file not found: {template_path}"
            )

        try:
            with template_path.open("r") as f:
                self._template = yaml.safe_load(f)

            if not isinstance(self._template, dict):
                raise ValueError(
                    f"Invalid template file {template_path}: must be a YAML object, "
                    f"got {type(self._template).__name__}"
                )

            logger.info("Loaded %s template from %s", self._template_kind, template_path)
        except (FileNotFoundError, ValueError):
            raise
        except Exception as e:
            raise RuntimeError(
                f"Failed to load {self._template_kind} template from {template_path}: {e}"
            ) from e

    def get_base_template(self) -> Dict[str, Any]:
        if self._template:
            return self._deep_copy(self._template)
        return {}

    def merge_with_runtime_values(self, runtime_manifest: Dict[str, Any]) -> Dict[str, Any]:
        base = self.get_base_template()

        if not base:
            return runtime_manifest

        return self._deep_merge(base, runtime_manifest)

    @staticmethod
    def _deep_copy(obj: Any) -> Any:
        if isinstance(obj, dict):
            return {k: BaseSandboxTemplateManager._deep_copy(v) for k, v in obj.items()}
        if isinstance(obj, list):
            return [BaseSandboxTemplateManager._deep_copy(item) for item in obj]
        return obj

    @staticmethod
    def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
        result = base.copy()

        for key, override_value in override.items():
            if override_value is None:
                continue

            if key not in result:
                result[key] = BaseSandboxTemplateManager._deep_copy(override_value)
            elif isinstance(result[key], dict) and isinstance(override_value, dict):
                result[key] = BaseSandboxTemplateManager._deep_merge(
                    result[key], override_value
                )
            else:
                result[key] = BaseSandboxTemplateManager._deep_copy(override_value)

        return result


================================================
FILE: server/src/services/k8s/volume_helper.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Volume helper utilities for Kubernetes pod specs.
"""

import logging
from typing import Any, Dict, List

from src.api.schema import Volume

logger = logging.getLogger(__name__)


def apply_volumes_to_pod_spec(
    pod_spec: Dict[str, Any],
    volumes: List[Volume],
) -> None:
    """
    Apply user-specified volumes to a Kubernetes pod spec.

    This function converts Volume API objects to Kubernetes volume and volumeMount
    definitions and adds them to the pod spec in-place.

    Currently supported backends:
    - pvc: Maps to Kubernetes PersistentVolumeClaim
    - host: Maps to Kubernetes hostPath volume

    Args:
        pod_spec: The pod spec dictionary to modify in-place
        volumes: List of Volume API objects

    Raises:
        ValueError: If an unsupported volume backend is specified
    """
    containers = pod_spec.get("containers", [])
    if not containers:
        logger.warning("No containers in pod spec, skipping volume mounts")
        return

    main_container = containers[0]
    mounts = main_container.get("volumeMounts", [])
    pod_volumes = pod_spec.get("volumes", [])

    # Collect existing volume names to prevent collisions with internal volumes
    existing_volume_names = {v.get("name") for v in pod_volumes if isinstance(v, dict)}
    # One Kubernetes volume per unique PVC; multiple volumeMounts can reference it
    pvc_to_volume_name: Dict[str, str] = {}

    for vol in volumes:
        vol_name = vol.name

        # Check for collision with internal volumes
        if vol_name in existing_volume_names:
            raise ValueError(
                f"Volume name '{vol_name}' conflicts with an internal volume. "
                "Please use a different volume name."
            )

        if vol.pvc is not None:
            # PVC backend: maps to Kubernetes PersistentVolumeClaim.
            # Multiple Volume API objects sharing the same claim_name must produce
            # a single Kubernetes volume and multiple volumeMounts (CSI drivers
            # can fail when the same PVC is defined in multiple volume entries).
            pvc_claim_name = vol.pvc.claim_name

            if pvc_claim_name not in pvc_to_volume_name:
                # First use of this PVC: create one volume, use current vol.name as volume name
                pod_volumes.append({
                    "name": vol_name,
                    "persistentVolumeClaim": {
                        "claimName": pvc_claim_name,
                    },
                })
                pvc_to_volume_name[pvc_claim_name] = vol_name
                existing_volume_names.add(vol_name)

            mount = {
                "name": pvc_to_volume_name[pvc_claim_name],
                "mountPath": vol.mount_path,
                "readOnly": vol.read_only,
            }
            if vol.sub_path:
                mount["subPath"] = vol.sub_path
            mounts.append(mount)

            logger.info(
                f"Added PVC volume '{vol_name}' (claim: {pvc_claim_name}) mounted at '{vol.mount_path}' for sandbox"
            )
        elif vol.host is not None:
            # Host backend: maps to hostPath volume
            # Note: hostPath is node-local and not recommended for production
            host_path = vol.host.path

            pod_volumes.append({
                "name": vol_name,
                "hostPath": {
                    "path": host_path,
                    "type": "DirectoryOrCreate",
                },
            })

            mount = {
                "name": vol_name,
                "mountPath": vol.mount_path,
                "readOnly": vol.read_only,
            }
            if vol.sub_path:
                mount["subPath"] = vol.sub_path
            mounts.append(mount)

            logger.info(
                f"Added hostPath volume '{vol_name}' (path: {host_path}) mounted at '{vol.mount_path}' for sandbox"
            )
        else:
            raise ValueError(
                f"Volume '{vol_name}' has no supported backend specified. "
                "Supported backends: pvc, host"
            )

    # Update pod spec with modified volumes and mounts
    pod_spec["volumes"] = pod_volumes
    main_container["volumeMounts"] = mounts


================================================
FILE: server/src/services/k8s/workload_provider.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Abstract workload provider interface for Kubernetes resources.
"""

from abc import ABC, abstractmethod
from datetime import datetime
from typing import Dict, List, Any, Optional

from src.api.schema import Endpoint, ImageSpec, NetworkPolicy, Volume
from src.config import EGRESS_MODE_DNS


class WorkloadProvider(ABC):
    """
    Abstract interface for managing Kubernetes workload resources.
    
    This abstraction allows supporting different K8s resource types
    (Pod, Job, StatefulSet, etc.) with a unified interface.
    """
    
    @abstractmethod
    def create_workload(
        self,
        sandbox_id: str,
        namespace: str,
        image_spec: ImageSpec,
        entrypoint: List[str],
        env: Dict[str, str],
        resource_limits: Dict[str, str],
        labels: Dict[str, str],
        expires_at: Optional[datetime],
        execd_image: str,
        extensions: Optional[Dict[str, str]] = None,
        network_policy: Optional[NetworkPolicy] = None,
        egress_image: Optional[str] = None,
        volumes: Optional[List[Volume]] = None,
        annotations: Optional[Dict[str, str]] = None,
        egress_auth_token: Optional[str] = None,
        egress_mode: str = EGRESS_MODE_DNS,
    ) -> Dict[str, Any]:
        """
        Create a new workload resource.

        Args:
            sandbox_id: Unique sandbox identifier
            namespace: Kubernetes namespace
            image_spec: Container image specification
            entrypoint: Container entrypoint command
            env: Environment variables
            resource_limits: Resource limits (cpu, memory)
            labels: Labels to apply to the workload
            expires_at: Expiration time, or None for manual cleanup (no TTL)
            execd_image: execd daemon image
            extensions: General extension field for passing additional configuration.
                This is a flexible field for various use cases (e.g., ``poolRef`` for pool-based creation).
            network_policy: Optional network policy for egress traffic control.
                When provided, an egress sidecar container will be added to the Pod.
            egress_image: Optional egress sidecar image. Required when network_policy is provided.
            egress_mode: Sidecar ``OPENSANDBOX_EGRESS_MODE`` (from app ``[egress].mode`` when using network policy).
            volumes: Optional list of volume mounts for the sandbox.

        Returns:
            Dict containing workload metadata (name, uid, etc.)

        Raises:
            ApiException: If creation fails
        """
        pass
    
    @abstractmethod
    def get_workload(self, sandbox_id: str, namespace: str) -> Optional[Any]:
        """
        Get workload by sandbox ID.
        
        Args:
            sandbox_id: Unique sandbox identifier
            namespace: Kubernetes namespace
            
        Returns:
            Workload object or None if not found
        """
        pass
    
    @abstractmethod
    def delete_workload(self, sandbox_id: str, namespace: str) -> None:
        """
        Delete a workload resource.
        
        Args:
            sandbox_id: Unique sandbox identifier
            namespace: Kubernetes namespace
            
        Raises:
            ApiException: If deletion fails
        """
        pass
    
    @abstractmethod
    def list_workloads(self, namespace: str, label_selector: str) -> List[Any]:
        """
        List workloads matching label selector.
        
        Args:
            namespace: Kubernetes namespace
            label_selector: Label selector query
            
        Returns:
            List of workload objects
        """
        pass
    
    @abstractmethod
    def update_expiration(self, sandbox_id: str, namespace: str, expires_at: datetime) -> None:
        """
        Update workload expiration time.
        
        Args:
            sandbox_id: Unique sandbox identifier
            namespace: Kubernetes namespace
            expires_at: New expiration time
            
        Raises:
            Exception: If update fails
        """
        pass
    
    @abstractmethod
    def get_expiration(self, workload: Any) -> Optional[datetime]:
        """
        Get expiration time from workload.
        
        Args:
            workload: Workload object
            
        Returns:
            Expiration datetime or None if not set
        """
        pass
    
    @abstractmethod
    def get_status(self, workload: Any) -> Dict[str, Any]:
        """
        Get status from workload object.
        
        Args:
            workload: Workload object
            
        Returns:
            Dict with state, reason, message, last_transition_at
        """
        pass
    
    @abstractmethod
    def get_endpoint_info(self, workload: Any, port: int, sandbox_id: str) -> Optional[Endpoint]:
        """
        Get endpoint information from workload.
        
        Args:
            workload: Workload object
            port: Port number
            sandbox_id: Sandbox identifier for ingress-based endpoints
            
        Returns:
            Endpoint object (including optional headers) or None if not available
        """
        pass

    def supports_image_auth(self) -> bool:
        """
        Whether this provider supports per-request image pull authentication.

        Providers that implement imagePullSecrets injection should override
        this method to return True.
        """
        return False

    def legacy_resource_name(self, sandbox_id: str) -> str:
        """
        Convert a sandbox_id to the legacy resource name with prefix.

        Pre-upgrade sandboxes were named ``sandbox-<id>``. This helper
        preserves access to those resources while allowing plain IDs
        for new ones.
        """
        if sandbox_id.startswith("sandbox-"):
            return sandbox_id
        return f"sandbox-{sandbox_id}"


================================================
FILE: server/src/services/ossfs_mixin.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""OSSFS-specific Docker runtime behaviors."""

from __future__ import annotations

import logging
import os
import posixpath
import re
import subprocess
import tempfile
from typing import Any, Optional
from uuid import uuid4

from fastapi import HTTPException, status

from src.services.constants import SandboxErrorCodes
from src.services.helpers import normalize_external_endpoint_url

logger = logging.getLogger(__name__)


class OSSFSMixin:
    @staticmethod
    def _validate_bucket_name(bucket: str) -> None:
        """
        Validate OSS bucket name to prevent command injection.
        
        Bucket names must follow OSS naming rules: lowercase letters, numbers, hyphens.
        Length: 3-63 characters. Cannot start/end with hyphen.
        """
        if not bucket or not isinstance(bucket, str):
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_PARAMETER,
                    "message": "OSSFS bucket name cannot be empty.",
                },
            )
        
        # OSS bucket naming: 3-63 chars, lowercase alphanumeric and hyphens only
        # Must start and end with lowercase letter or digit
        if not re.match(r'^[a-z0-9]([a-z0-9-]{1,61}[a-z0-9])?$', bucket):
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_PARAMETER,
                    "message": (
                        f"Invalid bucket name '{bucket}'. Bucket names must be 3-63 characters, "
                        "contain only lowercase letters, numbers, and hyphens, "
                        "and start/end with a letter or number."
                    ),
                },
            )

    @staticmethod
    def _validate_ossfs_option(option: str) -> None:
        """
        Validate OSSFS option to prevent command injection.
        
        Options should not contain shell metacharacters or command separators.
        """
        # Check for dangerous characters that could be used for command injection
        dangerous_chars = [';', '&', '|', '`', '$', '(', ')', '<', '>', '\n', '\r']
        for char in dangerous_chars:
            if char in option:
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    detail={
                        "code": SandboxErrorCodes.INVALID_PARAMETER,
                        "message": (
                            f"Invalid OSSFS option: contains forbidden character '{char}'. "
                            "Options must not contain shell metacharacters."
                        ),
                    },
                )

    @staticmethod
    def _validate_mount_path(path: str) -> None:
        """
        Validate mount path to prevent command injection in unmount operations.
        
        Path must be absolute and not contain dangerous characters.
        """
        if not path or not isinstance(path, str):
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_PARAMETER,
                    "message": "Mount path cannot be empty.",
                },
            )
        
        # Path must be absolute
        if not path.startswith('/'):
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_PARAMETER,
                    "message": f"Mount path must be absolute: '{path}'",
                },
            )
        
        # Check for dangerous characters that could be used for command injection
        dangerous_chars = [';', '&', '|', '`', '$', '(', ')', '<', '>', '\n', '\r']
        for char in dangerous_chars:
            if char in path:
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    detail={
                        "code": SandboxErrorCodes.INVALID_PARAMETER,
                        "message": (
                            f"Invalid mount path: contains forbidden character '{char}'. "
                            "Paths must not contain shell metacharacters."
                        ),
                    },
                )

    @staticmethod
    def _validate_endpoint_url(endpoint_url: str) -> None:
        """
        Validate endpoint URL to prevent command injection.
        
        URL should not contain dangerous shell metacharacters.
        """
        if not endpoint_url or not isinstance(endpoint_url, str):
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_PARAMETER,
                    "message": "Endpoint URL cannot be empty.",
                },
            )
        
        # Check for dangerous characters
        dangerous_chars = [';', '&', '|', '`', '$', '(', ')', '<', '>', '\n', '\r', ' ']
        for char in dangerous_chars:
            if char in endpoint_url:
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    detail={
                        "code": SandboxErrorCodes.INVALID_PARAMETER,
                        "message": (
                            f"Invalid endpoint URL: contains forbidden character '{char}'. "
                            "URLs must not contain shell metacharacters or spaces."
                        ),
                    },
                )

    @staticmethod
    def _normalize_ossfs_option(raw_option: str) -> str:
        option = str(raw_option).strip()
        if not option:
            return ""
        return option

    def _resolve_ossfs_paths(self, volume) -> tuple[str, str]:
        """
        Resolve OSSFS base mount path and bind path.

        For OSSFS, ``volume.subPath`` represents the bucket prefix.
        The backend mount path and bind path are identical:
        - path = ossfs_mount_root/<bucket>/<subPath?>
        """
        mount_root = (self.app_config.storage.ossfs_mount_root or "").strip()
        if not mount_root.startswith("/"):
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.INVALID_OSSFS_MOUNT_ROOT,
                    "message": (
                        "storage.ossfs_mount_root must be configured as an absolute path."
                    ),
                },
            )

        mount_root = posixpath.normpath(mount_root)
        bucket_root = posixpath.normpath(posixpath.join(mount_root, volume.ossfs.bucket))
        prefix = (volume.sub_path or "").lstrip("/")
        backend_path = posixpath.normpath(posixpath.join(bucket_root, prefix))

        bucket_prefix = bucket_root if bucket_root.endswith("/") else bucket_root + "/"
        if backend_path != bucket_root and not backend_path.startswith(bucket_prefix):
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_SUB_PATH,
                    "message": (
                        f"Volume '{volume.name}': resolved OSSFS prefix escapes bucket root."
                    ),
                },
            )

        return backend_path, backend_path

    def _build_ossfs_v1_command(
        self,
        volume,
        source: str,
        backend_path: str,
        endpoint_url: str,
        passwd_file: str,
    ) -> list[str]:
        # Validate inputs for security
        self._validate_bucket_name(volume.ossfs.bucket)
        self._validate_endpoint_url(endpoint_url)
        self._validate_mount_path(backend_path)
        
        cmd: list[str] = [
            "ossfs",
            source,
            backend_path,
            "-o",
            f"url={endpoint_url}",
            "-o",
            f"passwd_file={passwd_file}",
        ]
        if volume.ossfs.options:
            for raw_opt in volume.ossfs.options:
                opt = self._normalize_ossfs_option(raw_opt)
                if opt:
                    # Validate each option for dangerous characters
                    self._validate_ossfs_option(opt)
                    cmd.extend(["-o", opt])
        return cmd

    def _build_ossfs_v2_config_lines(
        self,
        volume,
        endpoint_url: str,
        prefix: str,
    ) -> list[str]:
        # Validate inputs for security
        self._validate_bucket_name(volume.ossfs.bucket)
        self._validate_endpoint_url(endpoint_url)
        
        conf_lines: list[str] = [
            f"--oss_endpoint={endpoint_url}",
            f"--oss_bucket={volume.ossfs.bucket}",
            f"--oss_access_key_id={volume.ossfs.access_key_id}",
            f"--oss_access_key_secret={volume.ossfs.access_key_secret}",
        ]
        if prefix:
            normalized_prefix = prefix if prefix.endswith("/") else f"{prefix}/"
            conf_lines.append(f"--oss_bucket_prefix={normalized_prefix}")
        if volume.ossfs.options:
            for raw_opt in volume.ossfs.options:
                opt = self._normalize_ossfs_option(raw_opt)
                if opt:
                    # Validate each option for dangerous characters
                    self._validate_ossfs_option(opt)
                    conf_lines.append(f"--{opt}")
        return conf_lines

    def _build_ossfs_v2_mount_command(self, backend_path: str, conf_file: str) -> list[str]:
        # Validate backend path for security
        self._validate_mount_path(backend_path)
        return ["ossfs2", "mount", backend_path, "-c", conf_file]

    @staticmethod
    def _run_ossfs_mount_command(cmd: list[str], volume_name: str) -> None:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=30,
            check=False,
        )
        if result.returncode != 0:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.OSSFS_MOUNT_FAILED,
                    "message": (
                        f"Volume '{volume_name}': failed to mount OSSFS backend. "
                        f"stderr={result.stderr.strip() or 'unknown error'}"
                    ),
                },
            )

    def _mount_ossfs_backend_path(self, volume, backend_path: str) -> None:
        """Mount OSS bucket/path to backend_path with version-specific OSSFS arguments."""
        access_key_id = volume.ossfs.access_key_id
        access_key_secret = volume.ossfs.access_key_secret
        if not access_key_id or not access_key_secret:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_OSSFS_CREDENTIALS,
                    "message": (
                        "OSSFS inline credentials are required: "
                        "accessKeyId and accessKeySecret must be provided."
                    ),
                },
            )
        os.makedirs(backend_path, exist_ok=True)

        bucket = volume.ossfs.bucket
        prefix = (volume.sub_path or "").strip("/")
        source = f"{bucket}:/{prefix}" if prefix else bucket
        endpoint = volume.ossfs.endpoint
        endpoint_url = normalize_external_endpoint_url(endpoint)

        passwd_file: Optional[str] = None
        conf_file: Optional[str] = None
        version = volume.ossfs.version or "2.0"
        try:
            if version == "1.0":
                passwd_file = os.path.join(
                    tempfile.gettempdir(),
                    f"opensandbox-ossfs-inline-{uuid4().hex}",
                )
                with open(passwd_file, "w", encoding="utf-8") as f:
                    # ossfs passwd_file format: bucket:accessKeyId:accessKeySecret
                    f.write(f"{bucket}:{access_key_id}:{access_key_secret}")
                os.chmod(passwd_file, 0o600)
                cmd = self._build_ossfs_v1_command(
                    volume=volume,
                    source=source,
                    backend_path=backend_path,
                    endpoint_url=endpoint_url,
                    passwd_file=passwd_file,
                )
            elif version == "2.0":
                conf_lines = self._build_ossfs_v2_config_lines(
                    volume=volume,
                    endpoint_url=endpoint_url,
                    prefix=prefix,
                )
                conf_file = os.path.join(
                    tempfile.gettempdir(),
                    f"opensandbox-ossfs2-{uuid4().hex}.conf",
                )
                with open(conf_file, "w", encoding="utf-8") as f:
                    f.write("\n".join(conf_lines) + "\n")
                os.chmod(conf_file, 0o600)
                cmd = self._build_ossfs_v2_mount_command(backend_path, conf_file)
            else:
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    detail={
                        "code": SandboxErrorCodes.INVALID_OSSFS_VERSION,
                        "message": (
                            f"Volume '{volume.name}': unsupported OSSFS version '{version}'."
                        ),
                    },
                )
            self._run_ossfs_mount_command(cmd, volume.name)
        except OSError as exc:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "code": SandboxErrorCodes.OSSFS_MOUNT_FAILED,
                    "message": (
                        f"Volume '{volume.name}': failed to execute ossfs command: {exc}"
                    ),
                },
            ) from exc
        finally:
            if passwd_file:
                try:
                    os.remove(passwd_file)
                except OSError:
                    pass
            if conf_file:
                try:
                    os.remove(conf_file)
                except OSError:
                    pass

    def _ensure_ossfs_mounted(self, volume_or_mount_key) -> str:
        """Ensure OSSFS backend path is mounted and return mount key."""
        if isinstance(volume_or_mount_key, str):
            mount_key = volume_or_mount_key
            backend_path = volume_or_mount_key
            volume = None
        else:
            volume = volume_or_mount_key
            backend_path, _ = self._resolve_ossfs_paths(volume)
            mount_key = backend_path

        with self._ossfs_mount_lock:
            current = self._ossfs_mount_ref_counts.get(mount_key, 0)
            if current > 0:
                self._ossfs_mount_ref_counts[mount_key] = current + 1
                return mount_key

            if not os.path.ismount(backend_path):
                if volume is None:
                    raise HTTPException(
                        status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                        detail={
                            "code": SandboxErrorCodes.OSSFS_MOUNT_FAILED,
                            "message": (
                                f"Failed to mount OSSFS path '{mount_key}': "
                                "missing volume context."
                            ),
                        },
                    )
                self._mount_ossfs_backend_path(volume, backend_path)

            self._ossfs_mount_ref_counts[mount_key] = 1
            return mount_key

    def _release_ossfs_mount(self, mount_key: str) -> None:
        """Release one reference and unmount when ref count reaches zero."""
        # Validate mount path before using in unmount commands
        self._validate_mount_path(mount_key)
        
        with self._ossfs_mount_lock:
            current = self._ossfs_mount_ref_counts.get(mount_key, 0)
            if current <= 0:
                logger.warning(
                    "Skipping OSSFS unmount for untracked mount key '%s'.",
                    mount_key,
                )
                return
            if current == 1:
                self._ossfs_mount_ref_counts.pop(mount_key, None)
                should_unmount = True
            else:
                self._ossfs_mount_ref_counts[mount_key] = current - 1
                should_unmount = False

        if not should_unmount or not os.path.ismount(mount_key):
            return

        errors: list[str] = []
        for cmd in (["fusermount", "-u", mount_key], ["umount", mount_key]):
            result = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                timeout=20,
                check=False,
            )
            if result.returncode == 0:
                return
            errors.append(result.stderr.strip() or "unknown error")

        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail={
                "code": SandboxErrorCodes.OSSFS_UNMOUNT_FAILED,
                "message": f"Failed to unmount OSSFS path '{mount_key}': {'; '.join(errors)}",
            },
        )

    def _release_ossfs_mounts(self, mount_keys: list[str]) -> None:
        for key in mount_keys:
            try:
                self._release_ossfs_mount(key)
            except HTTPException as exc:
                logger.warning("Failed to release OSSFS mount %s: %s", key, exc.detail)

    def _prepare_ossfs_mounts(self, volumes: Optional[list]) -> list[str]:
        if not volumes:
            return []
        key_to_volume: dict[str, Any] = {}
        prepared_mount_keys: list[str] = []
        for volume in volumes:
            if volume.ossfs is not None:
                mount_key, _ = self._resolve_ossfs_paths(volume)
                if mount_key not in key_to_volume:
                    key_to_volume[mount_key] = volume
        try:
            for mount_key, volume in key_to_volume.items():
                self._ensure_ossfs_mounted(volume)
                prepared_mount_keys.append(mount_key)
            return list(key_to_volume.keys())
        except Exception:
            # Roll back mounts already prepared in this batch.
            self._release_ossfs_mounts(prepared_mount_keys)
            raise

    def _validate_ossfs_volume(self, volume) -> None:
        """
        Docker-specific validation for OSSFS backend.

        Ensures inline credentials and path semantics are valid.
        """
        if os.name == "nt":
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_PARAMETER,
                    "message": (
                        "OSSFS backend on Docker runtime requires a Linux host with FUSE support. "
                        "Running OpenSandbox Server on Windows is not supported for OSSFS mounts."
                    ),
                },
            )

        if not volume.ossfs.access_key_id or not volume.ossfs.access_key_secret:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_OSSFS_CREDENTIALS,
                    "message": (
                        "OSSFS inline credentials are required: "
                        "accessKeyId and accessKeySecret must be provided."
                    ),
                },
            )

        self._resolve_ossfs_paths(volume)


================================================
FILE: server/src/services/runtime_resolver.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Secure runtime resolver for translating secure runtime configuration
to backend-specific parameters (Docker --runtime, Kubernetes RuntimeClass).

This module provides:
- SecureRuntimeResolver: Translates AppConfig to runtime parameters
- validate_secure_runtime_on_startup: Validates runtime availability at server startup
"""

from __future__ import annotations

import asyncio
import logging
from typing import TYPE_CHECKING, Optional

from kubernetes.client.exceptions import ApiException

logger = logging.getLogger(__name__)

if TYPE_CHECKING:
    from docker import DockerClient
    from src.config import AppConfig, SecureRuntimeConfig
    from src.services.k8s.client import K8sClient


class SecureRuntimeResolver:
    """
    Resolver for secure container runtime configuration.

    Translates server-level secure_runtime configuration into
    backend-specific parameters:
    - Docker: OCI runtime name (e.g., "runsc", "kata-runtime")
    - Kubernetes: RuntimeClass name (e.g., "gvisor", "kata-qemu")
    """

    # Default runtime mappings
    DEFAULT_DOCKER_RUNTIMES = {
        "gvisor": "runsc",
        "kata": "kata-runtime",
    }

    DEFAULT_K8S_RUNTIME_CLASSES = {
        "gvisor": "gvisor",
        "kata": "kata-qemu",
        "firecracker": "kata-fc",
    }

    def __init__(self, config: AppConfig):
        """
        Initialize the resolver with application configuration.

        Args:
            config: Application configuration containing secure_runtime settings
        """
        self.secure_runtime: Optional[SecureRuntimeConfig] = getattr(
            config, "secure_runtime", None
        )
        self.runtime_mode = config.runtime.type  # "docker" or "kubernetes"

    def is_enabled(self) -> bool:
        """Check if secure runtime is configured and enabled."""
        return (
            self.secure_runtime is not None
            and self.secure_runtime.type != ""
        )

    def get_docker_runtime(self) -> Optional[str]:
        """
        Get the Docker OCI runtime name for secure containers.

        Returns the configured docker_runtime if set, otherwise uses
        the default mapping for the secure runtime type.

        Returns:
            OCI runtime name (e.g., "runsc", "kata-runtime") or None
        """
        if not self.is_enabled():
            return None

        if self.secure_runtime is None:
            return None

        # Use explicit docker_runtime if configured
        if self.secure_runtime.docker_runtime:
            return self.secure_runtime.docker_runtime

        # Fall back to default mapping
        runtime_type = self.secure_runtime.type
        return self.DEFAULT_DOCKER_RUNTIMES.get(runtime_type)

    def get_k8s_runtime_class(self) -> Optional[str]:
        """
        Get the Kubernetes RuntimeClass name for secure containers.

        Returns the configured k8s_runtime_class if set, otherwise uses
        the default mapping for the secure runtime type.

        Returns:
            RuntimeClass name (e.g., "gvisor", "kata-qemu") or None
        """
        if not self.is_enabled():
            return None

        if self.secure_runtime is None:
            return None

        # Use explicit k8s_runtime_class if configured
        if self.secure_runtime.k8s_runtime_class:
            return self.secure_runtime.k8s_runtime_class

        # Fall back to default mapping
        runtime_type = self.secure_runtime.type
        return self.DEFAULT_K8S_RUNTIME_CLASSES.get(runtime_type)


async def validate_secure_runtime_on_startup(
    config: AppConfig,
    docker_client: Optional["DockerClient"] = None,
    k8s_client: Optional["K8sClient"] = None,
) -> None:
    """
    Validate that configured secure runtimes are available at startup.

    This function performs fail-fast validation to ensure the server
    starts with a valid secure runtime configuration. It checks:
    - Docker runtimes: Verifies the runtime exists in Docker daemon
    - Kubernetes RuntimeClasses: Verifies the RuntimeClass exists in cluster

    Args:
        config: Application configuration
        docker_client: Optional Docker client for runtime validation
        k8s_client: Optional K8s client wrapper for RuntimeClass validation

    Raises:
        ValueError: If a configured secure runtime is not available
        Exception: For other validation errors
    """
    resolver = SecureRuntimeResolver(config)

    if not resolver.is_enabled():
        logger.info("Secure runtime is not configured.")
        return

    if config.runtime.type == "docker":
        await _validate_docker_runtime(resolver, docker_client)
    elif config.runtime.type == "kubernetes":
        await _validate_k8s_runtime_class(resolver, k8s_client, config)
    else:
        logger.warning(
            "Secure runtime validation skipped for unknown runtime type: %s",
            config.runtime.type,
        )


async def _validate_docker_runtime(
    resolver: SecureRuntimeResolver,
    docker_client: Optional["DockerClient"],
) -> None:
    """Validate that the Docker OCI runtime exists."""
    runtime_name = resolver.get_docker_runtime()

    if not runtime_name:
        logger.info("No Docker runtime configured for secure containers.")
        return

    logger.info("Validating Docker OCI runtime: %s", runtime_name)

    if docker_client is None:
        logger.warning(
            "Docker client not available; skipping runtime validation. "
            "Runtime '%s' will be used but not validated.",
            runtime_name,
        )
        return

    try:
        # Get list of available runtimes from Docker daemon
        # Docker stores runtimes in daemon configuration
        info = docker_client.info()
        runtimes = info.get("Runtimes", {})

        if runtime_name not in runtimes:
            available = ", ".join(runtimes.keys()) if runtimes else "none"
            raise ValueError(
                f"Configured Docker runtime '{runtime_name}' is not available. "
                f"Available runtimes: {available}. "
                f"Please install and configure the runtime before starting the server."
            )

        logger.info(
            "Docker OCI runtime '%s' is available: %s",
            runtime_name,
            runtimes.get(runtime_name, {}),
        )
    except Exception as exc:
        logger.error("Failed to validate Docker runtime: %s", exc)
        raise


async def _validate_k8s_runtime_class(
    resolver: SecureRuntimeResolver,
    k8s_client: Optional["K8sClient"],
    config: AppConfig,
) -> None:
    """Validate that the Kubernetes RuntimeClass exists."""
    runtime_class_name = resolver.get_k8s_runtime_class()

    if not runtime_class_name:
        logger.info("No Kubernetes RuntimeClass configured for secure containers.")
        return

    logger.info("Validating Kubernetes RuntimeClass: %s", runtime_class_name)

    if k8s_client is None:
        logger.warning(
            "Kubernetes client not available; skipping RuntimeClass validation. "
            "RuntimeClass '%s' will be used but not validated.",
            runtime_class_name,
        )
        return

    try:
        loop = asyncio.get_event_loop()
        await loop.run_in_executor(None, k8s_client.read_runtime_class, runtime_class_name)
        logger.info("Kubernetes RuntimeClass '%s' is available.", runtime_class_name)
    except ApiException as exc:
        if exc.status == 404:
            raise ValueError(
                f"Configured Kubernetes RuntimeClass '{runtime_class_name}' does not exist. "
                f"Please create the RuntimeClass before starting the server."
            ) from exc
        logger.error("Failed to validate RuntimeClass: %s", exc)
        raise
    except Exception as exc:
        logger.error("Failed to validate RuntimeClass: %s", exc)
        raise


__all__ = [
    "SecureRuntimeResolver",
    "validate_secure_runtime_on_startup",
]


================================================
FILE: server/src/services/sandbox_service.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Sandbox service layer for business logic.

This module contains the business logic for sandbox lifecycle management.
This module defines the abstract interface for sandbox services.
"""

from abc import ABC, abstractmethod
import socket
from uuid import uuid4

from src.api.schema import (
    CreateSandboxRequest,
    CreateSandboxResponse,
    Endpoint,
    ListSandboxesRequest,
    ListSandboxesResponse,
    RenewSandboxExpirationRequest,
    RenewSandboxExpirationResponse,
    Sandbox,
)
from src.services.validators import ensure_valid_port


class SandboxService(ABC):
    """
    Abstract service interface for sandbox lifecycle operations.

    This class defines the interface for all sandbox service implementations.
    Implementations should handle creating, managing, and destroying sandboxes.
    """

    @staticmethod
    def generate_sandbox_id() -> str:
        """
        Generate a unique sandbox identifier.

        Returns:
            str: A RFC4122-compliant UUID4 string (with hyphens)
        """
        return str(uuid4())

    @staticmethod
    def _resolve_bind_ip(family: int = socket.AF_INET) -> str:
        """
        Resolve the outward-facing IP for hosts binding to 0.0.0.0.

        Returns:
            str: Detected local IP address, or 127.0.0.1 as a safe fallback.
        """
        try:
            target = ("2001:4860:4860::8888", 80, 0, 0) if family == socket.AF_INET6 else ("8.8.8.8", 80)
            with socket.socket(family, socket.SOCK_DGRAM) as sock:
                sock.connect(target)
                ip = sock.getsockname()[0]
                if ip:
                    if family == socket.AF_INET or not ip.startswith("fe80"):
                        return ip
        except OSError:
            if family == socket.AF_INET6:
                return SandboxService._resolve_bind_ip(socket.AF_INET)

        try:
            family_name = socket.AF_INET6 if family == socket.AF_INET6 else socket.AF_INET
            hostname = socket.gethostname()
            infos = socket.getaddrinfo(hostname, None, family_name, socket.SOCK_DGRAM)
            if infos:
                addr = infos[0][4][0]
                if addr:
                    return addr
        except OSError:
            pass

        return "::1" if family == socket.AF_INET6 else "127.0.0.1"

    @staticmethod
    def validate_port(port: int) -> None:
        """
        Validate that the supplied port falls within the allowed range.

        Args:
            port: Port to validate

        Raises:
            ValueError: If port is outside 1-65535
        """
        ensure_valid_port(port)

    @abstractmethod
    async def create_sandbox(self, request: CreateSandboxRequest) -> CreateSandboxResponse:
        """
        Create a new sandbox from a container image.

        Args:
            request: Sandbox creation request

        Returns:
            CreateSandboxResponse: Created sandbox information

        Raises:
            HTTPException: If sandbox creation fails
        """
        pass

    @abstractmethod
    def list_sandboxes(self, request: ListSandboxesRequest) -> ListSandboxesResponse:
        """
        List sandboxes with optional filtering and pagination.

        Args:
            request: List request with filters and pagination

        Returns:
            ListSandboxesResponse: Paginated list of sandboxes
        """
        pass

    @abstractmethod
    def get_sandbox(self, sandbox_id: str) -> Sandbox:
        """
        Fetch a sandbox by id.

        Args:
            sandbox_id: Unique sandbox identifier

        Returns:
            Sandbox: Complete sandbox information

        Raises:
            HTTPException: If sandbox not found
        """
        pass

    @abstractmethod
    def delete_sandbox(self, sandbox_id: str) -> None:
        """
        Delete a sandbox.

        Args:
            sandbox_id: Unique sandbox identifier

        Raises:
            HTTPException: If sandbox not found or deletion fails
        """
        pass

    @abstractmethod
    def pause_sandbox(self, sandbox_id: str) -> None:
        """
        Pause a running sandbox.

        Args:
            sandbox_id: Unique sandbox identifier

        Raises:
            HTTPException: If sandbox not found or cannot be paused
        """
        pass

    @abstractmethod
    def resume_sandbox(self, sandbox_id: str) -> None:
        """
        Resume a paused sandbox.

        Args:
            sandbox_id: Unique sandbox identifier

        Raises:
            HTTPException: If sandbox not found or cannot be resumed
        """
        pass

    @abstractmethod
    def renew_expiration(
        self,
        sandbox_id: str,
        request: RenewSandboxExpirationRequest,
    ) -> RenewSandboxExpirationResponse:
        """
        Renew sandbox expiration time.

        Args:
            sandbox_id: Unique sandbox identifier
            request: Renewal request with new expiration time

        Returns:
            RenewSandboxExpirationResponse: Updated expiration time

        Raises:
            HTTPException: If sandbox not found or renewal fails
        """
        pass

    @abstractmethod
    def get_endpoint(self, sandbox_id: str, port: int, resolve_internal: bool = False) -> Endpoint:
        """
        Get sandbox access endpoint.

        Args:
            sandbox_id: Unique sandbox identifier
            port: Port number where the service is listening inside the sandbox
            resolve_internal: If True, return the internal container IP (for proxy), ignoring router config.

        Returns:
            Endpoint: Public endpoint URL

        Raises:
            HTTPException: If sandbox not found or endpoint not available
        """
        pass


================================================
FILE: server/src/services/validators.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Shared validation helpers for container-based sandbox services.

These helpers centralize request validation so all container runtimes
enforce the same preconditions before performing runtime-specific work.
"""

from __future__ import annotations

import os
from datetime import datetime, timedelta, timezone
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence

from fastapi import HTTPException, status
import re

from src.services.constants import RESERVED_LABEL_PREFIX, SandboxErrorCodes

if TYPE_CHECKING:
    from src.api.schema import NetworkPolicy, OSSFS, Volume
    from src.config import EgressConfig


def ensure_entrypoint(entrypoint: Sequence[str]) -> None:
    """
    Ensure a sandbox entrypoint is provided.

    Raises:
        HTTPException: When entrypoint is empty.
    """
    if not entrypoint:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_ENTRYPOINT,
                "message": "Entrypoint must contain at least one command.",
            },
        )


DNS_LABEL_PATTERN = r"[a-z0-9]([-a-z0-9]*[a-z0-9])?"
DNS_SUBDOMAIN_RE = re.compile(rf"^(?:{DNS_LABEL_PATTERN}\.)*{DNS_LABEL_PATTERN}$")
LABEL_NAME_RE = re.compile(r"^[A-Za-z0-9]([-A-Za-z0-9_.]*[A-Za-z0-9])?$")
LABEL_VALUE_RE = re.compile(r"^([A-Za-z0-9]([-A-Za-z0-9_.]*[A-Za-z0-9])?)?$")


def _is_valid_label_key(key: str) -> bool:
    if "/" in key:
        prefix, name = key.split("/", 1)
        if not prefix or not name:
            return False
        # Kubernetes requires the prefix to be a DNS subdomain <= 253 chars.
        # The name portion is validated separately below (max 63 chars).
        # Note: the total key length (prefix + "/" + name) may exceed 253 chars
        # when the prefix uses its full 253-character allowance; this is valid.
        if len(prefix) > 253:
            return False
        if not DNS_SUBDOMAIN_RE.match(prefix):
            return False
    else:
        name = key
    if len(name) > 63 or not LABEL_NAME_RE.match(name):
        return False
    return True


def _is_valid_label_value(value: str) -> bool:
    if len(value) > 63:
        return False
    return bool(LABEL_VALUE_RE.match(value))


def ensure_metadata_labels(metadata: Optional[Dict[str, str]]) -> None:
    """
    Validate metadata keys/values against Kubernetes label rules.

    Raises:
        HTTPException: When a key/value is invalid.
    """
    if not metadata:
        return
    for key, value in metadata.items():
        if not isinstance(key, str) or not isinstance(value, str):
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_METADATA_LABEL,
                    "message": "Metadata keys and values must be strings.",
                },
            )
        if key.startswith(RESERVED_LABEL_PREFIX):
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_METADATA_LABEL,
                    "message": (
                        f"Metadata key '{key}' uses the reserved prefix '{RESERVED_LABEL_PREFIX}'. "
                        "Keys under this prefix are managed by the system and cannot be set via metadata."
                    ),
                },
            )
        if not _is_valid_label_key(key):
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_METADATA_LABEL,
                    "message": f"Metadata key '{key}' is not a valid Kubernetes label key.",
                },
            )
        if not _is_valid_label_value(value):
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_METADATA_LABEL,
                    "message": f"Metadata value '{value}' is not a valid Kubernetes label value.",
                },
            )


def ensure_future_expiration(expires_at: datetime) -> datetime:
    """
    Validate and normalize expiration timestamps to UTC.

    Args:
        expires_at: Requested expiration time (timezone aware or naive).

    Returns:
        datetime: Normalized UTC expiration timestamp.

    Raises:
        HTTPException: If the timestamp is not in the future.
    """
    if expires_at.tzinfo is None:
        normalized = expires_at.replace(tzinfo=timezone.utc)
    else:
        normalized = expires_at.astimezone(timezone.utc)

    if normalized <= datetime.now(timezone.utc):
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_EXPIRATION,
                "message": "New expiration time must be in the future.",
            },
        )

    return normalized


def ensure_valid_port(port: int) -> None:
    """
    Validate that a port falls within the 1-65535 range.

    Raises:
        HTTPException: When the port is out of range.
    """
    if port < 1 or port > 65535:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_PORT,
                "message": "Port must be between 1 and 65535.",
            },
        )


def ensure_timeout_within_limit(timeout_seconds: Optional[int], max_timeout_seconds: Optional[int]) -> None:
    """
    Validate that a requested sandbox TTL does not exceed the configured limit.

    Args:
        timeout_seconds: Requested sandbox TTL in seconds, or None for manual cleanup.
        max_timeout_seconds: Configured maximum TTL in seconds, or None to disable the limit.

    Raises:
        HTTPException: When the timeout exceeds the configured maximum.
    """
    if timeout_seconds is None:
        return

    calculate_expiration_or_raise(datetime.now(timezone.utc), timeout_seconds)

    if max_timeout_seconds is None:
        return

    if timeout_seconds > max_timeout_seconds:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_PARAMETER,
                "message": (
                    f"Sandbox timeout {timeout_seconds}s exceeds configured maximum "
                    f"of {max_timeout_seconds}s."
                ),
            },
        )


def calculate_expiration_or_raise(created_at: datetime, timeout_seconds: int) -> datetime:
    """
    Compute an expiration timestamp and convert datetime overflow into a 400 error.

    Raises:
        HTTPException: When the timeout value is too large to represent safely.
    """
    try:
        return created_at + timedelta(seconds=timeout_seconds)
    except (OverflowError, ValueError) as exc:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_PARAMETER,
                "message": (
                    f"Sandbox timeout {timeout_seconds}s is too large to represent safely."
                ),
            },
        ) from exc


# Volume name must be a valid DNS label
VOLUME_NAME_RE = re.compile(r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$")
# Kubernetes resource name pattern
K8S_RESOURCE_NAME_RE = re.compile(r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$")


def ensure_valid_volume_name(name: str) -> None:
    """
    Validate that a volume name is a valid DNS label.

    Args:
        name: Volume name to validate.

    Raises:
        HTTPException: When the name is invalid.
    """
    if not name:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_VOLUME_NAME,
                "message": "Volume name cannot be empty.",
            },
        )
    if len(name) > 63:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_VOLUME_NAME,
                "message": f"Volume name '{name}' exceeds maximum length of 63 characters.",
            },
        )
    if not VOLUME_NAME_RE.match(name):
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_VOLUME_NAME,
                "message": f"Volume name '{name}' is not a valid DNS label. Must be lowercase alphanumeric with optional hyphens.",
            },
        )


def ensure_valid_mount_path(mount_path: str) -> None:
    """
    Validate that a mount path is an absolute path.

    Args:
        mount_path: Mount path to validate.

    Raises:
        HTTPException: When the path is not absolute.
    """
    if not mount_path:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_MOUNT_PATH,
                "message": "Mount path cannot be empty.",
            },
        )
    if not mount_path.startswith("/"):
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_MOUNT_PATH,
                "message": f"Mount path '{mount_path}' must be an absolute path starting with '/'.",
            },
        )


def ensure_valid_sub_path(sub_path: Optional[str]) -> None:
    """
    Validate that a subPath does not contain path traversal or is absolute.

    Args:
        sub_path: SubPath to validate (optional).

    Raises:
        HTTPException: When the subPath is invalid.
    """
    if sub_path is None:
        return

    if not sub_path:
        # Empty string is valid (no subpath)
        return

    # Check for absolute path
    if sub_path.startswith("/"):
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_SUB_PATH,
                "message": f"SubPath '{sub_path}' must be a relative path, not absolute.",
            },
        )

    # Check for path traversal
    # Normalize and check each component
    parts = sub_path.split("/")
    for part in parts:
        if part == "..":
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_SUB_PATH,
                    "message": f"SubPath '{sub_path}' contains path traversal '..' which is not allowed.",
                },
            )


def ensure_valid_host_path(
    path: str,
    allowed_prefixes: Optional[List[str]] = None,
) -> None:
    """
    Validate that a host path is absolute and optionally within allowed prefixes.

    Args:
        path: Host path to validate.
        allowed_prefixes: Optional list of allowed path prefixes.

    Raises:
        HTTPException: When the path is invalid or not allowed.
    """
    if not path:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_HOST_PATH,
                "message": "Host path cannot be empty.",
            },
        )

    if not os.path.isabs(path):
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_HOST_PATH,
                "message": f"Host path '{path}' must be an absolute path.",
            },
        )

    # Normalize separators to forward slashes for consistent security checks.
    # Strip the drive prefix (e.g. "C:") so that "C:/" is not mis-detected as
    # containing "//".
    _drive, _tail = os.path.splitdrive(path)
    _tail_fwd = _tail.replace("\\", "/")

    # Reject path traversal components
    if "/.." in _tail_fwd or _tail_fwd == "/..":
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_HOST_PATH,
                "message": f"Host path '{path}' contains path traversal component '..'.",
            },
        )

    # Reject non-normalized paths (double slashes, trailing slashes except root)
    if "//" in _tail_fwd or (len(_tail_fwd) > 1 and _tail_fwd.endswith("/")):
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_HOST_PATH,
                "message": f"Host path '{path}' is not normalized. Remove redundant slashes.",
            },
        )

    # Check against allowed prefixes if provided
    if allowed_prefixes is not None:
        norm_path = os.path.normpath(path)
        is_allowed = any(
            norm_path == os.path.normpath(prefix)
            or norm_path.startswith(os.path.normpath(prefix) + os.sep)
            for prefix in allowed_prefixes
        )
        if not is_allowed:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.HOST_PATH_NOT_ALLOWED,
                    "message": f"Host path '{path}' is not under any allowed prefix. Allowed prefixes: {allowed_prefixes}",
                },
            )


def ensure_valid_pvc_name(claim_name: str) -> None:
    """
    Validate that a PVC claim name is a valid Kubernetes resource name.

    Args:
        claim_name: PVC claim name to validate.

    Raises:
        HTTPException: When the claim name is invalid.
    """
    if not claim_name:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_PVC_NAME,
                "message": "PVC claim name cannot be empty.",
            },
        )
    if len(claim_name) > 253:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_PVC_NAME,
                "message": f"PVC claim name '{claim_name}' exceeds maximum length of 253 characters.",
            },
        )
    if not K8S_RESOURCE_NAME_RE.match(claim_name):
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_PVC_NAME,
                "message": f"PVC claim name '{claim_name}' is not a valid Kubernetes resource name.",
            },
        )


def ensure_valid_ossfs_volume(ossfs: "OSSFS") -> None:
    """
    Validate OSSFS backend fields.

    Args:
        ossfs: OSSFS backend model.

    Raises:
        HTTPException: When any OSSFS field is invalid.
    """
    if not isinstance(ossfs.bucket, str) or not ossfs.bucket.strip():
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_OSSFS_BUCKET,
                "message": "OSSFS bucket cannot be empty.",
            },
        )

    if not ossfs.endpoint.strip():
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_OSSFS_ENDPOINT,
                "message": "OSSFS endpoint cannot be empty.",
            },
        )

    if ossfs.options is not None:
        for opt in ossfs.options:
            if not isinstance(opt, str) or not opt.strip():
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    detail={
                        "code": SandboxErrorCodes.INVALID_OSSFS_OPTION,
                        "message": "OSSFS options must be non-empty strings.",
                    },
                )
            normalized = opt.strip()
            if normalized.startswith("-"):
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    detail={
                        "code": SandboxErrorCodes.INVALID_OSSFS_OPTION,
                        "message": (
                            "OSSFS options must be raw option payloads without '-' prefix "
                            "(e.g. 'allow_other', 'uid=1000')."
                        ),
                    },
                )

    if not ossfs.access_key_id or not ossfs.access_key_secret:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_OSSFS_CREDENTIALS,
                "message": (
                    "OSSFS inline credentials are required: "
                    "accessKeyId and accessKeySecret must be provided."
                ),
            },
        )


def ensure_egress_configured(
    network_policy: Optional["NetworkPolicy"],
    egress_config: Optional["EgressConfig"],
) -> None:
    """
    Validate that egress.image is configured when network policy is provided.
    
    This is a common validation shared by Docker and Kubernetes runtimes.
    
    Args:
        network_policy: Optional network policy from the request.
        egress_config: Optional egress configuration from app config.
    
    Raises:
        HTTPException: When network_policy is provided but egress.image is not configured.
    """
    if not network_policy:
        return
    
    egress_image = egress_config.image if egress_config else None
    if not egress_image:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail={
                "code": SandboxErrorCodes.INVALID_PARAMETER,
                "message": "egress.image must be configured when networkPolicy is provided.",
            },
        )


def ensure_volumes_valid(
    volumes: Optional[List["Volume"]],
    allowed_host_prefixes: Optional[List[str]] = None,
) -> None:
    """
    Validate a list of volume definitions.

    This function performs comprehensive validation:
    - Unique volume names
    - Exactly one backend per volume
    - Valid mount paths
    - Valid subPaths
    - Backend-specific validation (host path, pvc name, ossfs config)

    Args:
        volumes: List of volumes to validate (optional).
        allowed_host_prefixes: Optional list of allowed host path prefixes.

    Raises:
        HTTPException: When any validation fails.
    """
    if volumes is None or len(volumes) == 0:
        return

    # Check for duplicate volume names
    seen_names: set[str] = set()
    for volume in volumes:
        if volume.name in seen_names:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.DUPLICATE_VOLUME_NAME,
                    "message": f"Duplicate volume name '{volume.name}'. Each volume must have a unique name.",
                },
            )
        seen_names.add(volume.name)

        # Validate volume name
        ensure_valid_volume_name(volume.name)

        # Validate mount path
        ensure_valid_mount_path(volume.mount_path)

        # Validate subPath
        ensure_valid_sub_path(volume.sub_path)

        # Count specified backends
        backends_specified = sum([
            volume.host is not None,
            volume.pvc is not None,
            volume.ossfs is not None,
        ])

        if backends_specified == 0:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_VOLUME_BACKEND,
                    "message": (
                        f"Volume '{volume.name}' must specify exactly one backend "
                        "(host, pvc, ossfs), but none was provided."
                    ),
                },
            )

        if backends_specified > 1:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail={
                    "code": SandboxErrorCodes.INVALID_VOLUME_BACKEND,
                    "message": (
                        f"Volume '{volume.name}' must specify exactly one backend "
                        "(host, pvc, ossfs), but multiple were provided."
                    ),
                },
            )

        # Backend-specific validation
        if volume.host is not None:
            ensure_valid_host_path(volume.host.path, allowed_host_prefixes)

        if volume.pvc is not None:
            ensure_valid_pvc_name(volume.pvc.claim_name)

        if volume.ossfs is not None:
            ensure_valid_ossfs_volume(volume.ossfs)


__all__ = [
    "ensure_entrypoint",
    "ensure_future_expiration",
    "ensure_valid_port",
    "ensure_metadata_labels",
    "ensure_egress_configured",
    "ensure_valid_volume_name",
    "ensure_valid_mount_path",
    "ensure_valid_sub_path",
    "ensure_valid_host_path",
    "ensure_valid_pvc_name",
    "ensure_valid_ossfs_volume",
    "ensure_volumes_valid",
]


================================================
FILE: server/tests/__init__.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: server/tests/conftest.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Pytest configuration and fixtures for sandbox server tests.

This module provides shared fixtures and configuration for all test modules.
"""

import os
from pathlib import Path

import pytest
from fastapi.testclient import TestClient
from unittest.mock import MagicMock

TEST_CONFIG_PATH = Path(__file__).resolve().parent / "testdata" / "config.toml"
os.environ.setdefault("SANDBOX_CONFIG_PATH", str(TEST_CONFIG_PATH))

# Prevent real Docker connections during tests by mocking docker.from_env
import docker  # noqa: E402

_mock_docker_client = MagicMock()
_mock_docker_client.containers.list.return_value = []
docker.from_env = lambda: _mock_docker_client  # type: ignore

from src.main import app  # noqa: E402


@pytest.fixture(scope="session")
def test_api_key() -> str:
    """
    Fixture providing a test API key (matches test configuration file).
    """
    return "test-api-key-12345"


@pytest.fixture(scope="function")
def client() -> TestClient:
    """
    Fixture providing a FastAPI test client.
    """
    return TestClient(app)


@pytest.fixture(scope="function")
def auth_headers(test_api_key: str) -> dict:
    """
    Fixture providing authentication headers.
    """
    return {"OPEN-SANDBOX-API-KEY": test_api_key}


@pytest.fixture(scope="session")
def sample_sandbox_request() -> dict:
    """
    Fixture providing a sample sandbox creation request.
    """
    return {
        "image": {"uri": "python:3.11"},
        "timeout": 3600,
        "resourceLimits": {"cpu": "500m", "memory": "512Mi"},
        "env": {"DEBUG": "true", "LOG_LEVEL": "info"},
        "metadata": {"name": "Test Sandbox", "project": "test-project"},
        "entrypoint": ["python", "-c", "print('Hello from sandbox')"],
    }


================================================
FILE: server/tests/k8s/__init__.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Kubernetes runtime unit tests.
"""


================================================
FILE: server/tests/k8s/conftest.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Pytest configuration for K8s runtime tests.
"""

# Import fixtures directly to avoid using pytest_plugins
import pytest
from tests.k8s.fixtures.k8s_fixtures import *  # noqa: F401, F403


@pytest.fixture(autouse=True)
def stub_workload_informer(monkeypatch):
    """
    Prevent real informer threads in unit tests.
    
    Stubs the WorkloadInformer used inside K8sClient so that watch threads are
    not started during unit tests. Cache is always empty (has_synced=False),
    so get_custom_object falls through to the mocked API call.
    """

    class _FakeInformer:
        def __init__(self, *args, **kwargs):
            self.has_synced = False

        def start(self):
            return None

        def stop(self):
            return None

        def get(self, name):
            return None

        def update_cache(self, obj):
            return None

    monkeypatch.setattr(
        "src.services.k8s.client.WorkloadInformer", _FakeInformer
    )


================================================
FILE: server/tests/k8s/fixtures/__init__.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Fixtures package for K8s tests.
"""


================================================
FILE: server/tests/k8s/fixtures/k8s_fixtures.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Shared fixtures for Kubernetes runtime tests.
"""
from datetime import datetime, timezone, timedelta
from unittest.mock import MagicMock
from typing import Dict, Any

import pytest

from src.api.schema import CreateSandboxRequest, ImageSpec, ResourceLimits
from src.config import KubernetesRuntimeConfig
from src.services.k8s.client import K8sClient
from src.services.k8s.provider_factory import PROVIDER_TYPE_BATCHSANDBOX


@pytest.fixture
def mock_k8s_client():
    """Provide mocked K8sClient"""
    client = MagicMock(spec=K8sClient)
    mock_custom_api = MagicMock()
    mock_core_api = MagicMock()
    client.get_custom_objects_api.return_value = mock_custom_api
    client.get_core_v1_api.return_value = mock_core_api
    client.custom_api = mock_custom_api
    client.core_api = mock_core_api
    # Unified resource operation methods
    client.create_custom_object = MagicMock(return_value={"metadata": {"name": "test", "uid": "uid"}})
    client.get_custom_object = MagicMock(return_value=None)
    client.list_custom_objects = MagicMock(return_value=[])
    client.delete_custom_object = MagicMock()
    client.patch_custom_object = MagicMock()
    client.create_secret = MagicMock()
    client.list_pods = MagicMock(return_value=[])
    return client


@pytest.fixture
def k8s_runtime_config():
    """Provide test Kubernetes configuration"""
    return KubernetesRuntimeConfig(
        kubeconfig_path="/tmp/test-kubeconfig",
        namespace="test-namespace",
        service_account="test-sa",
        workload_provider=PROVIDER_TYPE_BATCHSANDBOX,
    )


@pytest.fixture
def agent_sandbox_runtime_config():
    """Provide agent-sandbox runtime configuration"""
    return KubernetesRuntimeConfig(
        kubeconfig_path="/tmp/test-kubeconfig",
        namespace="test-namespace",
        service_account="test-sa",
        workload_provider="agent-sandbox",
    )


@pytest.fixture
def k8s_runtime_config_with_template(tmp_path):
    """Provide Kubernetes configuration with template file"""
    template_file = tmp_path / "template.yaml"
    template_file.write_text("""
metadata:
  annotations:
    managed-by: opensandbox
spec:
  template:
    spec:
      nodeSelector:
        workload: sandbox
      tolerations:
        - operator: Exists
""")
    return KubernetesRuntimeConfig(
        kubeconfig_path="/tmp/test-kubeconfig",
        namespace="test-namespace",
        service_account="test-sa",
        workload_provider=PROVIDER_TYPE_BATCHSANDBOX,
        batchsandbox_template_file=str(template_file),
    )


@pytest.fixture
def valid_batchsandbox_template() -> Dict[str, Any]:
    """Provide valid BatchSandbox template"""
    return {
        "metadata": {
            "annotations": {
                "managed-by": "opensandbox",
                "template-source": "test-template"
            }
        },
        "spec": {
            "template": {
                "spec": {
                    "restartPolicy": "Never",
                    "nodeSelector": {
                        "workload": "sandbox",
                        "environment": "test"
                    },
                    "tolerations": [
                        {
                            "key": "sandbox",
                            "operator": "Equal",
                            "value": "true",
                            "effect": "NoSchedule"
                        }
                    ],
                    "priorityClassName": "sandbox-default"
                }
            }
        }
    }


@pytest.fixture
def sample_create_request():
    """Provide sample create request"""
    return CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        entrypoint=["/bin/bash", "-c", "sleep 3600"],
        timeout=3600,
        resourceLimits=ResourceLimits(root={"cpu": "1", "memory": "1Gi"}),
        env={"ENV": "test", "DEBUG": "true"},
        metadata={"team": "platform", "project": "test"}
    )


@pytest.fixture
def mock_batchsandbox_response():
    """Provide mocked BatchSandbox response"""
    return {
        "apiVersion": "sandbox.opensandbox.io/v1alpha1",
        "kind": "BatchSandbox",
        "metadata": {
            "name": "test-id",
            "namespace": "test-namespace",
            "creationTimestamp": "2025-12-24T10:00:00Z",
            "uid": "test-uid-12345",
            "annotations": {
                "sandbox.opensandbox.io/endpoints": '["10.0.0.1"]'
            },
            "labels": {
                "opensandbox.io/id": "test-id"
            }
        },
        "spec": {
            "replicas": 1,
            "expireTime": "2025-12-24T11:00:00+00:00",
            "template": {
                "spec": {
                    "containers": [
                        {
                            "name": "sandbox",
                            "image": "python:3.11"
                        }
                    ]
                }
            }
        },
        "status": {
            "replicas": 1,
            "allocated": 1,
            "ready": 1,
            "taskFailed": 0,
            "taskPending": 0,
            "taskRunning": 0,
            "taskSucceed": 0,
            "taskUnknown": 0
        }
    }


@pytest.fixture
def mock_batchsandbox_list_response(mock_batchsandbox_response):
    """Provide mocked BatchSandbox list response"""
    return {
        "apiVersion": "sandbox.opensandbox.io/v1alpha1",
        "kind": "BatchSandboxList",
        "items": [mock_batchsandbox_response]
    }


@pytest.fixture
def fixed_datetime():
    """Provide fixed datetime for testing"""
    return datetime(2025, 12, 24, 10, 0, 0, tzinfo=timezone.utc)


@pytest.fixture
def k8s_app_config(k8s_runtime_config):
    """Provide complete app configuration (Kubernetes type)"""
    from src.config import AppConfig, RuntimeConfig, ServerConfig
    
    return AppConfig(
        server=ServerConfig(
            host="0.0.0.0",
            port=8080,
            log_level="DEBUG",
            api_key="test-api-key",
        ),
        runtime=RuntimeConfig(
            type="kubernetes",
            execd_image="ghcr.io/opensandbox/execd:test",
        ),
        kubernetes=k8s_runtime_config,
    )


@pytest.fixture
def agent_sandbox_app_config(agent_sandbox_runtime_config):
    """Provide complete app configuration (kubernetes + agent-sandbox provider)"""
    from src.config import AppConfig, RuntimeConfig, ServerConfig, AgentSandboxRuntimeConfig

    return AppConfig(
        server=ServerConfig(
            host="0.0.0.0",
            port=8080,
            log_level="DEBUG",
            api_key="test-api-key",
        ),
        runtime=RuntimeConfig(
            type="kubernetes",
            execd_image="ghcr.io/opensandbox/execd:test",
        ),
        kubernetes=agent_sandbox_runtime_config,
        agent_sandbox=AgentSandboxRuntimeConfig(
            template_file=None,
            shutdown_policy="Delete",
            ingress_enabled=True,
        ),
    )


@pytest.fixture
def app_config_no_k8s():
    """Provide app configuration without Kubernetes config"""
    from src.config import AppConfig, RuntimeConfig, ServerConfig
    
    return AppConfig(
        server=ServerConfig(
            host="0.0.0.0",
            port=8080,
            log_level="DEBUG",
            api_key="test-api-key",
        ),
        runtime=RuntimeConfig(
            type="kubernetes",
            execd_image="ghcr.io/opensandbox/execd:test",
        ),
        kubernetes=None,  # No Kubernetes config
    )


@pytest.fixture
def app_config_docker():
    """Provide Docker type app configuration"""
    from src.config import AppConfig, RuntimeConfig, ServerConfig
    
    return AppConfig(
        server=ServerConfig(
            host="0.0.0.0",
            port=8080,
            log_level="DEBUG",
            api_key="test-api-key",
        ),
        runtime=RuntimeConfig(
            type="docker",  # Docker type
            execd_image="ghcr.io/opensandbox/execd:test",
        ),
        kubernetes=None,
    )


@pytest.fixture
def k8s_service(k8s_app_config):
    """Provide mocked KubernetesSandboxService"""
    from unittest.mock import patch, MagicMock
    
    with patch('src.services.k8s.kubernetes_service.K8sClient') as mock_k8s_client_cls, \
         patch('src.services.k8s.kubernetes_service.create_workload_provider') as mock_create_provider:
        
        # Mock K8sClient instance
        mock_k8s_client = MagicMock()
        mock_k8s_client_cls.return_value = mock_k8s_client
        
        # Mock WorkloadProvider instance
        mock_provider = MagicMock()
        mock_create_provider.return_value = mock_provider
        
        from src.services.k8s.kubernetes_service import KubernetesSandboxService
        service = KubernetesSandboxService(k8s_app_config)
        
        # Save mock objects for access in tests
        service.k8s_client = mock_k8s_client
        service.workload_provider = mock_provider
        
        yield service


@pytest.fixture
def create_sandbox_request():
    """Provide standard sandbox creation request"""
    from src.api.schema import ResourceLimits
    
    return CreateSandboxRequest(
        image=ImageSpec(uri="python:3.9"),
        entrypoint=["/bin/bash", "-c", "sleep infinity"],
        timeout=3600,
        env={"ENV": "test"},
        metadata={"team": "test"},
        resourceLimits=ResourceLimits(root={"cpu": "1", "memory": "1Gi"}),
    )


@pytest.fixture
def mock_workload():
    """Provide mocked workload object"""
    return {
        "metadata": {
            "name": "test-sandbox-123",
            "uid": "abc-123",
            "labels": {
                "opensandbox.io/id": "test-sandbox-123",
            },
            "annotations": {
                "opensandbox.io/created-at": datetime.now(timezone.utc).isoformat(),
                "opensandbox.io/expires-at": (datetime.now(timezone.utc) + timedelta(hours=1)).isoformat(),
                "opensandbox.io/image": '{"uri": "python:3.9"}',
                "opensandbox.io/entrypoint": '["/bin/bash", "-c", "sleep infinity"]',
            },
            "creationTimestamp": datetime.now(timezone.utc).isoformat(),
        },
        "spec": {},
        "status": {
            "state": "Running",
        },
    }


@pytest.fixture
def isolated_registry():
    """
    Fixture to isolate provider registry for each test.

    Saves the original registry before test and restores it after,
    preventing global state pollution.
    """
    from src.services.k8s import provider_factory

    # Save original registry
    original_registry = provider_factory._PROVIDER_REGISTRY.copy()

    yield

    # Restore original registry
    provider_factory._PROVIDER_REGISTRY = original_registry


================================================
FILE: server/tests/k8s/test_agent_sandbox_provider.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Unit tests for AgentSandboxProvider.
"""

from datetime import datetime, timezone
from types import SimpleNamespace
from unittest.mock import MagicMock

import pytest
from kubernetes.client import ApiException

from src.api.schema import ImageSpec, NetworkPolicy, NetworkRule
from src.config import (
    AppConfig,
    AgentSandboxRuntimeConfig,
    EGRESS_MODE_DNS,
    EGRESS_MODE_DNS_NFT,
    ExecdInitResources,
    KubernetesRuntimeConfig,
    RuntimeConfig,
)
from src.services.constants import SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY
from src.services.k8s.agent_sandbox_provider import AgentSandboxProvider
from src.services.constants import OPENSANDBOX_EGRESS_TOKEN


def _app_config(shutdown_policy: str = "Delete", service_account: str | None = None, execd_init_resources: ExecdInitResources | None = None) -> AppConfig:
    """Build an AppConfig for AgentSandboxProvider tests."""
    return AppConfig(
        runtime=RuntimeConfig(type="kubernetes", execd_image="execd:test"),
        kubernetes=KubernetesRuntimeConfig(
            namespace="test-ns",
            service_account=service_account,
            workload_provider="agent-sandbox",
            execd_init_resources=execd_init_resources,
        ),
        agent_sandbox=AgentSandboxRuntimeConfig(shutdown_policy=shutdown_policy),
    )


class TestAgentSandboxProvider:
    """AgentSandboxProvider unit tests"""

    def test_init_sets_crd_constants_correctly(self, mock_k8s_client):
        """
        Test case: Verify CRD constants set correctly
        """
        provider = AgentSandboxProvider(mock_k8s_client)

        assert provider.group == "agents.x-k8s.io"
        assert provider.version == "v1alpha1"
        assert provider.plural == "sandboxes"

    def test_create_workload_builds_correct_manifest_init_mode(self, mock_k8s_client):
        """
        Test case: Verify created manifest structure with init mode
        """
        provider = AgentSandboxProvider(
            mock_k8s_client,
            _app_config(shutdown_policy="Delete", service_account="agent-sa"),
        )
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)

        result = provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={"FOO": "bar"},
            resource_limits={"cpu": "1", "memory": "1Gi"},
            labels={"opensandbox.io/id": "test-id"},
            expires_at=expires_at,
            execd_image="execd:latest",
        )

        assert result == {"name": "test-id", "uid": "test-uid"}

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        assert body["apiVersion"] == "agents.x-k8s.io/v1alpha1"
        assert body["kind"] == "Sandbox"
        assert body["metadata"]["name"] == "test-id"
        assert body["metadata"]["namespace"] == "test-ns"
        assert body["spec"]["replicas"] == 1
        assert body["spec"]["shutdownTime"] == "2025-12-31T10:00:00+00:00"
        assert body["spec"]["shutdownPolicy"] == "Delete"
        assert body["spec"]["podTemplate"]["spec"]["serviceAccountName"] == "agent-sa"
        assert "initContainers" in body["spec"]["podTemplate"]["spec"]
        assert "containers" in body["spec"]["podTemplate"]["spec"]
        assert "volumes" in body["spec"]["podTemplate"]["spec"]

    def test_create_workload_sanitizes_resource_name(self, mock_k8s_client):
        """
        Test case: Ensure sandbox names are DNS-1035 compliant when IDs start with digits
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "sandbox-1234", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)

        result = provider.create_workload(
            sandbox_id="1234",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={"FOO": "bar"},
            resource_limits={"cpu": "1", "memory": "1Gi"},
            labels={"opensandbox.io/id": "1234"},
            expires_at=expires_at,
            execd_image="execd:latest",
        )

        assert result == {"name": "sandbox-1234", "uid": "test-uid"}
        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        assert body["metadata"]["name"] == "sandbox-1234"

    def test_resource_name_uses_hash_when_id_has_no_alnum(self, mock_k8s_client):
        """
        Test case: Ensure symbol-only sandbox ids do not collapse to the same name
        """
        provider = AgentSandboxProvider(mock_k8s_client)

        first = provider._resource_name("!!!")
        second = provider._resource_name("???")

        assert first.startswith("sandbox-")
        assert second.startswith("sandbox-")
        assert first != second

    def test_get_workload_returns_none_on_404(self, mock_k8s_client):
        """
        Test case: Verify None returned when not found
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.return_value = None

        result = provider.get_workload("test-id", "test-ns")

        assert result is None

    def test_get_workload_prefers_sanitized_name(self, mock_k8s_client):
        """
        Test case: Ensure DNS-1035 resource name is tried before raw id
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.side_effect = [
            None,
            {"metadata": {"name": "1234"}},
        ]

        result = provider.get_workload("1234", "test-ns")

        assert result["metadata"]["name"] == "1234"
        assert mock_k8s_client.get_custom_object.call_args_list[0].kwargs["name"] == "sandbox-1234"
        assert mock_k8s_client.get_custom_object.call_args_list[1].kwargs["name"] == "1234"

    def test_get_workload_falls_back_to_legacy_name(self, mock_k8s_client):
        """
        Test case: Verify legacy sandbox-<id> name is used when primary lookup returns None
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.side_effect = [
            None,
            {"metadata": {"name": "sandbox-test-id"}},
        ]

        result = provider.get_workload("test-id", "test-ns")

        assert result["metadata"]["name"] == "sandbox-test-id"
        assert mock_k8s_client.get_custom_object.call_args_list[0].kwargs["name"] == "test-id"
        assert mock_k8s_client.get_custom_object.call_args_list[1].kwargs["name"] == "sandbox-test-id"

    def test_get_workload_reraises_non_404_exceptions(self, mock_k8s_client):
        """
        Test case: Verify non-404 exceptions are re-raised
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.side_effect = ApiException(status=500)

        with pytest.raises(ApiException) as exc_info:
            provider.get_workload("test-id", "test-ns")

        assert exc_info.value.status == 500

    def test_get_workload_prefers_informer_cache(self, mock_k8s_client):
        """
        Test case: get_workload calls k8s_client.get_custom_object and returns result
        """
        cached = {"metadata": {"name": "test-id"}}
        mock_k8s_client.get_custom_object.return_value = cached

        provider = AgentSandboxProvider(mock_k8s_client)

        result = provider.get_workload("test-id", "test-ns")

        assert result == cached
        mock_k8s_client.get_custom_object.assert_called()

    def test_create_workload_updates_informer_cache(self, mock_k8s_client):
        """
        Test case: create_workload returns name and uid from created resource
        """
        created_body = {"metadata": {"name": "test-id", "uid": "test-uid"}}
        mock_k8s_client.create_custom_object.return_value = created_body

        provider = AgentSandboxProvider(mock_k8s_client)

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)

        result = provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={"FOO": "bar"},
            resource_limits={"cpu": "1", "memory": "1Gi"},
            labels={"opensandbox.io/id": "test-id"},
            expires_at=expires_at,
            execd_image="execd:latest",
        )

        assert result == {"name": "test-id", "uid": "test-uid"}

    def test_update_expiration_patches_spec(self, mock_k8s_client):
        """
        Test case: Verify expiration time update
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.return_value = {"metadata": {"name": "sandbox-test-id"}}

        expires_at = datetime(2025, 12, 31, 0, 0, 0, tzinfo=timezone.utc)
        provider.update_expiration("test-id", "test-ns", expires_at)

        call_kwargs = mock_k8s_client.patch_custom_object.call_args.kwargs
        assert call_kwargs["body"] == {
            "spec": {"shutdownTime": "2025-12-31T00:00:00+00:00"}
        }

    def test_get_expiration_parses_z_suffix(self):
        """
        Test case: Verify handling time with Z suffix
        """
        provider = AgentSandboxProvider(MagicMock())
        workload = {"spec": {"shutdownTime": "2025-12-31T10:00:00Z"}}

        result = provider.get_expiration(workload)

        assert result == datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)

    def test_get_status_ready_condition_true(self):
        """
        Test case: Verify Ready True is Running
        """
        provider = AgentSandboxProvider(MagicMock())
        workload = {
            "status": {
                "conditions": [
                    {
                        "type": "Ready",
                        "status": "True",
                        "reason": "SandboxReady",
                        "message": "Ready",
                        "lastTransitionTime": "2025-12-31T10:00:00Z",
                    }
                ]
            },
            "metadata": {"creationTimestamp": "2025-12-31T09:00:00Z"},
        }

        result = provider.get_status(workload)

        assert result["state"] == "Running"
        assert result["reason"] == "SandboxReady"
        assert result["message"] == "Ready"

    def test_get_status_expired_condition(self):
        """
        Test case: Verify SandboxExpired reason maps to Terminated
        """
        provider = AgentSandboxProvider(MagicMock())
        workload = {
            "status": {
                "conditions": [
                    {
                        "type": "Ready",
                        "status": "False",
                        "reason": "SandboxExpired",
                        "message": "Expired",
                        "lastTransitionTime": "2025-12-31T10:00:00Z",
                    }
                ]
            },
            "metadata": {"creationTimestamp": "2025-12-31T09:00:00Z"},
        }

        result = provider.get_status(workload)

        assert result["state"] == "Terminated"
        assert result["reason"] == "SandboxExpired"

    def test_get_status_falls_back_to_pod_state(self, mock_k8s_client):
        """
        Test case: Verify status fallback uses pod selector state (Running + IP = Running)
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.list_pods.return_value = [
            SimpleNamespace(
                status=SimpleNamespace(phase="Running", pod_ip="10.0.0.2")
            )
        ]
        workload = {
            "status": {"conditions": [], "selector": "app=sandbox"},
            "metadata": {"creationTimestamp": "2025-12-31T09:00:00Z", "namespace": "test-ns"},
        }

        result = provider.get_status(workload)

        assert result["state"] == "Running"
        assert result["reason"] == "POD_READY"

    def test_get_status_falls_back_to_allocated_when_ip_assigned_not_running(self, mock_k8s_client):
        """
        Test case: Verify Allocated state when Pod has IP but is not Running yet
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.list_pods.return_value = [
            SimpleNamespace(
                status=SimpleNamespace(phase="Pending", pod_ip="10.0.0.2")
            )
        ]
        workload = {
            "status": {"conditions": [], "selector": "app=sandbox"},
            "metadata": {"creationTimestamp": "2025-12-31T09:00:00Z", "namespace": "test-ns"},
        }

        result = provider.get_status(workload)

        assert result["state"] == "Allocated"
        assert result["reason"] == "IP_ASSIGNED"

    def test_get_endpoint_info_prefers_running_pod(self, mock_k8s_client):
        """
        Test case: Verify endpoint uses running pod IP
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.list_pods.return_value = [
            SimpleNamespace(
                status=SimpleNamespace(phase="Running", pod_ip="10.0.0.9")
            )
        ]
        workload = {
            "status": {"selector": "app=sandbox"},
            "metadata": {"namespace": "test-ns"},
        }

        endpoint = provider.get_endpoint_info(workload, 8080, "sandbox-123")

        assert endpoint.endpoint == "10.0.0.9:8080"
        assert endpoint.headers is None

    def test_get_endpoint_info_falls_back_to_service_fqdn(self, mock_k8s_client):
        """
        Test case: Verify endpoint falls back to serviceFQDN on pod lookup failure
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.list_pods.side_effect = Exception("boom")
        workload = {
            "status": {"selector": "app=sandbox", "serviceFQDN": "svc.example.com"},
            "metadata": {"namespace": "test-ns"},
        }

        endpoint = provider.get_endpoint_info(workload, 9000, "sandbox-123")

        assert endpoint.endpoint == "svc.example.com:9000"
        assert endpoint.headers is None


class TestAgentSandboxProviderExecdInit:
    """AgentSandboxProvider execd init container resource tests"""

    def test_init_container_has_no_resources_when_not_configured(self, mock_k8s_client):
        """
        Test case: Verify init container has no resources when execd_init_resources is not set
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc),
            execd_image="execd:latest",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        init_containers = body["spec"]["podTemplate"]["spec"]["initContainers"]
        assert len(init_containers) == 1
        assert "resources" not in init_containers[0]

    def test_init_container_has_resources_when_configured(self, mock_k8s_client):
        """
        Test case: Verify init container applies resources when execd_init_resources is set
        """
        provider = AgentSandboxProvider(
            mock_k8s_client,
            _app_config(execd_init_resources=ExecdInitResources(
                limits={"cpu": "100m", "memory": "128Mi"},
                requests={"cpu": "50m", "memory": "64Mi"},
            )),
        )
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc),
            execd_image="execd:latest",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        init_containers = body["spec"]["podTemplate"]["spec"]["initContainers"]
        assert init_containers[0]["resources"]["limits"] == {"cpu": "100m", "memory": "128Mi"}
        assert init_containers[0]["resources"]["requests"] == {"cpu": "50m", "memory": "64Mi"}


class TestAgentSandboxProviderEgress:
    """AgentSandboxProvider egress sidecar tests"""

    def test_create_workload_without_network_policy_no_sidecar(self, mock_k8s_client):
        """
        Test case: Verify no sidecar is added when network_policy is None
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=None,
            egress_image=None,
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["podTemplate"]["spec"]
        containers = pod_spec["containers"]
        
        # Should only have main container
        assert len(containers) == 1
        assert containers[0]["name"] == "sandbox"
        # Should not have securityContext with sysctls
        assert "securityContext" not in pod_spec or "sysctls" not in pod_spec.get("securityContext", {})

    def test_create_workload_with_network_policy_adds_sidecar(self, mock_k8s_client):
        """
        Test case: Verify egress sidecar is added when network_policy is provided
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="pypi.org")],
        )

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=network_policy,
            egress_image="opensandbox/egress:v1.0.3",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["podTemplate"]["spec"]
        containers = pod_spec["containers"]
        
        # Should have both main container and sidecar
        assert len(containers) == 2
        
        # Find sidecar container
        sidecar = next((c for c in containers if c["name"] == "egress"), None)
        assert sidecar is not None
        assert sidecar["image"] == "opensandbox/egress:v1.0.3"
        
        # Verify sidecar has environment variable
        env_vars = {e["name"]: e["value"] for e in sidecar.get("env", [])}
        assert "OPENSANDBOX_EGRESS_RULES" in env_vars
        assert env_vars["OPENSANDBOX_EGRESS_MODE"] == EGRESS_MODE_DNS

        caps = sidecar.get("securityContext", {}).get("capabilities", {})
        assert "NET_ADMIN" in caps.get("add", [])
        assert sidecar.get("securityContext", {}).get("privileged") is not True
        assert "command" not in sidecar

        inits = pod_spec.get("initContainers", [])
        assert len(inits) == 1
        execd_init = inits[0]
        assert execd_init["name"] == "execd-installer"
        assert execd_init["image"] == "execd:latest"
        assert execd_init.get("securityContext", {}).get("privileged") is True
        assert "/proc/sys/net/ipv6/conf/all/disable_ipv6" in execd_init["args"][0]

    def test_create_workload_with_network_policy_persists_annotation_and_sidecar_token(self, mock_k8s_client):
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=None,
            execd_image="execd:latest",
            network_policy=NetworkPolicy(default_action="deny", egress=[]),
            egress_image="opensandbox/egress:v1.0.3",
            annotations={SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY: "egress-token"},
            egress_auth_token="egress-token",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        assert body["metadata"]["annotations"][SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY] == "egress-token"

        containers = body["spec"]["podTemplate"]["spec"]["containers"]
        sidecar = next((c for c in containers if c["name"] == "egress"), None)
        assert sidecar is not None
        env_vars = {e["name"]: e["value"] for e in sidecar.get("env", [])}
        assert env_vars[OPENSANDBOX_EGRESS_TOKEN] == "egress-token"
        assert env_vars["OPENSANDBOX_EGRESS_MODE"] == EGRESS_MODE_DNS

    def test_create_workload_with_egress_mode_dns_nft(self, mock_k8s_client):
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=None,
            execd_image="execd:latest",
            network_policy=NetworkPolicy(default_action="deny", egress=[]),
            egress_image="opensandbox/egress:v1.0.3",
            egress_mode=EGRESS_MODE_DNS_NFT,
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        containers = body["spec"]["podTemplate"]["spec"]["containers"]
        sidecar = next((c for c in containers if c["name"] == "egress"), None)
        assert sidecar is not None
        env_vars = {e["name"]: e["value"] for e in sidecar.get("env", [])}
        assert env_vars["OPENSANDBOX_EGRESS_MODE"] == EGRESS_MODE_DNS_NFT

    def test_create_workload_with_network_policy_does_not_add_pod_ipv6_sysctls(self, mock_k8s_client):
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=network_policy,
            egress_image="opensandbox/egress:v1.0.3",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["podTemplate"]["spec"]

        assert "securityContext" not in pod_spec or "sysctls" not in pod_spec.get("securityContext", {})

        sidecar = next(c for c in pod_spec["containers"] if c["name"] == "egress")
        assert "command" not in sidecar
        execd_init = pod_spec["initContainers"][0]
        assert execd_init["name"] == "execd-installer"
        assert "/proc/sys/net/ipv6/conf/all/disable_ipv6" in execd_init["args"][0]

    def test_create_workload_with_network_policy_drops_net_admin_from_main_container(self, mock_k8s_client):
        """
        Test case: Verify main container drops NET_ADMIN when network_policy is enabled
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=network_policy,
            egress_image="opensandbox/egress:v1.0.3",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["podTemplate"]["spec"]
        containers = pod_spec["containers"]
        
        # Find main container
        main_container = next((c for c in containers if c["name"] == "sandbox"), None)
        assert main_container is not None
        
        # Verify main container has securityContext
        assert "securityContext" in main_container
        assert "capabilities" in main_container["securityContext"]
        assert "drop" in main_container["securityContext"]["capabilities"]
        assert "NET_ADMIN" in main_container["securityContext"]["capabilities"]["drop"]

    def test_create_workload_without_egress_image_no_sidecar(self, mock_k8s_client):
        """
        Test case: Verify no sidecar is added when egress_image is None even if network_policy exists
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=network_policy,
            egress_image=None,
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["podTemplate"]["spec"]
        containers = pod_spec["containers"]
        
        # Should only have main container
        assert len(containers) == 1
        assert containers[0]["name"] == "sandbox"

    def test_egress_sidecar_contains_network_policy_in_env(self, mock_k8s_client):
        """
        Test case: Verify sidecar environment variable contains serialized network policy
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[
                NetworkRule(action="allow", target="pypi.org"),
                NetworkRule(action="deny", target="*.malicious.com"),
            ],
        )

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=network_policy,
            egress_image="opensandbox/egress:v1.0.3",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["podTemplate"]["spec"]
        containers = pod_spec["containers"]
        
        sidecar = next((c for c in containers if c["name"] == "egress"), None)
        assert sidecar is not None
        
        env_vars = {e["name"]: e["value"] for e in sidecar.get("env", [])}
        assert "OPENSANDBOX_EGRESS_RULES" in env_vars
        
        # Verify the environment variable contains valid JSON with network policy
        import json
        policy_json = json.loads(env_vars["OPENSANDBOX_EGRESS_RULES"])
        assert policy_json["defaultAction"] == "deny"
        assert len(policy_json["egress"]) == 2
        assert policy_json["egress"][0]["action"] == "allow"
        assert policy_json["egress"][0]["target"] == "pypi.org"

    def test_main_container_no_security_context_without_network_policy(self, mock_k8s_client):
        """
        Test case: Verify main container has no securityContext when network_policy is None
        """
        provider = AgentSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=None,
            egress_image=None,
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["podTemplate"]["spec"]
        containers = pod_spec["containers"]
        
        main_container = containers[0]
        # Main container should not have securityContext when no network policy
        assert "securityContext" not in main_container


================================================
FILE: server/tests/k8s/test_agent_sandbox_template.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Unit tests for AgentSandboxTemplateManager.
"""

import pytest
import yaml

from src.services.k8s.agent_sandbox_template import AgentSandboxTemplateManager


class TestAgentSandboxTemplateManager:
    """AgentSandboxTemplateManager unit tests"""

    def test_load_valid_yaml_template_successfully(self, tmp_path):
        """
        Test case: Verify loading valid YAML template
        """
        template_file = tmp_path / "valid_template.yaml"
        template_content = {
            "metadata": {"annotations": {"test": "value"}},
            "spec": {"podTemplate": {"spec": {"nodeSelector": {"env": "test"}}}},
        }
        template_file.write_text(yaml.dump(template_content))

        manager = AgentSandboxTemplateManager(str(template_file))

        assert manager._template == template_content
        assert manager.template_file_path == str(template_file)

    def test_load_nonexistent_file_raises_error(self):
        """
        Test case: Verify FileNotFoundError raised when file doesn't exist
        """
        with pytest.raises(FileNotFoundError) as exc_info:
            AgentSandboxTemplateManager("/path/to/nonexistent.yaml")

        assert "not found" in str(exc_info.value)

    def test_load_invalid_yaml_raises_error(self, tmp_path):
        """
        Test case: Verify RuntimeError raised with invalid YAML file
        """
        template_file = tmp_path / "invalid.yaml"
        template_file.write_text("invalid: yaml: [missing: bracket")

        with pytest.raises(RuntimeError) as exc_info:
            AgentSandboxTemplateManager(str(template_file))

        assert "Failed to load" in str(exc_info.value)

    def test_load_non_dict_yaml_raises_error(self, tmp_path):
        """
        Test case: Verify ValueError raised when YAML content is not a dict
        """
        template_file = tmp_path / "list.yaml"
        template_file.write_text("- item1\n- item2")

        with pytest.raises(ValueError) as exc_info:
            AgentSandboxTemplateManager(str(template_file))

        assert "must be a YAML object" in str(exc_info.value)
        assert "got list" in str(exc_info.value)

    def test_init_without_template_file_creates_empty_manager(self):
        """
        Test case: Verify empty manager created without template file
        """
        manager = AgentSandboxTemplateManager(None)

        assert manager._template is None
        assert manager.template_file_path is None

    def test_deep_merge_runtime_overrides_template(self):
        """
        Test case: Verify runtime values override template values
        """
        base = {"spec": {"replicas": 1, "shutdownTime": "old"}}
        override = {"spec": {"shutdownTime": "new"}}

        result = AgentSandboxTemplateManager._deep_merge(base, override)

        assert result == {"spec": {"replicas": 1, "shutdownTime": "new"}}

    def test_deep_merge_preserves_template_only_fields(self):
        """
        Test case: Verify template-only fields are preserved
        """
        base = {
            "spec": {
                "podTemplate": {
                    "spec": {
                        "nodeSelector": {"env": "prod"},
                        "tolerations": [{"key": "test"}],
                    }
                }
            }
        }
        override = {"spec": {"replicas": 1}}

        result = AgentSandboxTemplateManager._deep_merge(base, override)

        assert result["spec"]["replicas"] == 1
        assert result["spec"]["podTemplate"]["spec"]["nodeSelector"] == {"env": "prod"}
        assert result["spec"]["podTemplate"]["spec"]["tolerations"] == [{"key": "test"}]

    def test_deep_merge_nested_dicts_recursively(self):
        """
        Test case: Verify nested dicts are merged recursively
        """
        base = {"metadata": {"annotations": {"a": "1", "b": "2"}}}
        override = {"metadata": {"annotations": {"b": "3", "c": "4"}}}

        result = AgentSandboxTemplateManager._deep_merge(base, override)

        expected = {"metadata": {"annotations": {"a": "1", "b": "3", "c": "4"}}}
        assert result == expected

    def test_deep_merge_replaces_lists_not_merges(self):
        """
        Test case: Verify lists are replaced not merged
        """
        base = {"spec": {"tolerations": [{"key": "a"}]}}
        override = {"spec": {"tolerations": [{"key": "b"}]}}

        result = AgentSandboxTemplateManager._deep_merge(base, override)

        assert result == {"spec": {"tolerations": [{"key": "b"}]}}

    def test_deep_merge_none_values_do_not_override(self):
        """
        Test case: Verify None values don't override existing values
        """
        base = {"spec": {"shutdownTime": "2024-12-31"}}
        override = {"spec": {"shutdownTime": None}}

        result = AgentSandboxTemplateManager._deep_merge(base, override)

        assert result == {"spec": {"shutdownTime": "2024-12-31"}}

    def test_deep_copy_creates_independent_copies(self):
        """
        Test case: Verify deep copy creates independent copies
        """
        original = {
            "nested": {"list": [1, 2, 3], "dict": {"key": "value"}},
        }

        copy = AgentSandboxTemplateManager._deep_copy(original)

        copy["nested"]["list"].append(4)
        copy["nested"]["dict"]["key"] = "new_value"

        assert original["nested"]["list"] == [1, 2, 3]
        assert original["nested"]["dict"]["key"] == "value"

    def test_get_base_template_returns_copy(self, tmp_path):
        """
        Test case: Verify get_base_template returns a copy
        """
        template_file = tmp_path / "template.yaml"
        template_content = {"spec": {"replicas": 1}}
        template_file.write_text(yaml.dump(template_content))

        manager = AgentSandboxTemplateManager(str(template_file))

        template1 = manager.get_base_template()
        template2 = manager.get_base_template()

        assert template1 == template2
        assert template1 is not template2

    def test_get_base_template_returns_empty_dict_when_no_template(self):
        """
        Test case: Verify empty dict returned when no template
        """
        manager = AgentSandboxTemplateManager(None)

        assert manager.get_base_template() == {}


================================================
FILE: server/tests/k8s/test_batchsandbox_provider.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Unit tests for BatchSandboxProvider.
"""

import pytest
from datetime import datetime, timezone
from unittest.mock import MagicMock
from kubernetes.client import ApiException

from src.api.schema import ImageSpec, ImageAuth, NetworkPolicy, NetworkRule
from src.config import (
    AppConfig,
    EGRESS_MODE_DNS,
    EGRESS_MODE_DNS_NFT,
    ExecdInitResources,
    KubernetesRuntimeConfig,
    RuntimeConfig,
)
from src.services.constants import SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY
from src.services.k8s.batchsandbox_provider import BatchSandboxProvider
from src.services.constants import OPENSANDBOX_EGRESS_TOKEN
from src.services.k8s.image_pull_secret_helper import IMAGE_AUTH_SECRET_PREFIX
from src.services.k8s.volume_helper import apply_volumes_to_pod_spec


def _app_config_with_template(template_file_path: str) -> AppConfig:
    """Build an AppConfig with a batchsandbox_template_file set."""
    return AppConfig(
        runtime=RuntimeConfig(type="kubernetes", execd_image="execd:test"),
        kubernetes=KubernetesRuntimeConfig(
            namespace="test-ns",
            batchsandbox_template_file=template_file_path,
        ),
    )


def _app_config_with_execd_resources(execd_init_resources: ExecdInitResources) -> AppConfig:
    """Build an AppConfig with execd_init_resources set."""
    return AppConfig(
        runtime=RuntimeConfig(type="kubernetes", execd_image="execd:test"),
        kubernetes=KubernetesRuntimeConfig(
            namespace="test-ns",
            execd_init_resources=execd_init_resources,
        ),
    )


class TestBatchSandboxProvider:
    """BatchSandboxProvider unit tests"""
    
    # ===== Initialization Tests =====
    
    def test_init_without_template_creates_provider(self, mock_k8s_client):
        """
        Test case: Verify normal initialization without template
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        
        assert provider.k8s_client == mock_k8s_client
        assert provider.template_manager._template is None
        assert provider.group == "sandbox.opensandbox.io"
        assert provider.version == "v1alpha1"
        assert provider.plural == "batchsandboxes"
    
    def test_init_with_template_loads_template(self, mock_k8s_client, tmp_path):
        """
        Test case: Verify correct loading with template
        """
        template_file = tmp_path / "template.yaml"
        template_file.write_text("spec:\n  replicas: 1")
        
        provider = BatchSandboxProvider(mock_k8s_client, _app_config_with_template(str(template_file)))
        
        assert provider.template_manager._template is not None
    
    def test_init_sets_crd_constants_correctly(self, mock_k8s_client):
        """
        Test case: Verify CRD constants set correctly
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        
        assert provider.group == "sandbox.opensandbox.io"
        assert provider.version == "v1alpha1"
        assert provider.plural == "batchsandboxes"
    
    # ===== Workload Creation Tests =====
    
    def test_create_workload_builds_correct_manifest(self, mock_k8s_client):
        """
        Test case: Verify created manifest structure is correct
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }
        
        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        
        result = provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={"FOO": "bar"},
            resource_limits={"cpu": "1", "memory": "1Gi"},
            labels={"opensandbox.io/id": "test-id"},
            expires_at=expires_at,
            execd_image="execd:latest"
        )
        
        assert result == {"name": "test-id", "uid": "test-uid"}
        
        # Verify API call
        call_args = mock_k8s_client.create_custom_object.call_args
        body = call_args.kwargs["body"]
        
        assert body["apiVersion"] == "sandbox.opensandbox.io/v1alpha1"
        assert body["kind"] == "BatchSandbox"
        assert body["metadata"]["name"] == "test-id"
        assert body["metadata"]["namespace"] == "test-ns"
        assert body["spec"]["replicas"] == 1
        assert body["spec"]["expireTime"] == "2025-12-31T10:00:00+00:00"
        assert "template" in body["spec"]
        assert "initContainers" in body["spec"]["template"]["spec"]
        assert "containers" in body["spec"]["template"]["spec"]
        assert "volumes" in body["spec"]["template"]["spec"]
    
    def test_create_workload_builds_execd_init_container(self, mock_k8s_client):
        """
        Test case: Verify execd init container built correctly without resources when not configured
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test", "uid": "uid"}
        }
        
        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:test"
        )
        
        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        init_container = body["spec"]["template"]["spec"]["initContainers"][0]
        
        assert init_container["name"] == "execd-installer"
        assert init_container["image"] == "execd:test"
        assert init_container["command"] == ["/bin/sh", "-c"]
        assert "bootstrap.sh" in init_container["args"][0]
        assert init_container["volumeMounts"][0]["name"] == "opensandbox-bin"
        # No resources configured: resources field should be absent
        assert "resources" not in init_container

    def test_create_workload_init_container_with_configured_resources(self, mock_k8s_client):
        """
        Test case: Verify init container applies resources when execd_init_resources is configured
        """
        provider = BatchSandboxProvider(
            mock_k8s_client,
            _app_config_with_execd_resources(ExecdInitResources(
                limits={"cpu": "100m", "memory": "128Mi"},
                requests={"cpu": "50m", "memory": "64Mi"},
            )),
        )
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test", "uid": "uid"}
        }

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:test",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        init_container = body["spec"]["template"]["spec"]["initContainers"][0]
        assert init_container["resources"]["limits"] == {"cpu": "100m", "memory": "128Mi"}
        assert init_container["resources"]["requests"] == {"cpu": "50m", "memory": "64Mi"}
    
    def test_create_workload_wraps_entrypoint_with_bootstrap(self, mock_k8s_client):
        """
        Test case: Verify user entrypoint is wrapped with bootstrap
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "sandbox-test", "uid": "uid"}
        }
        
        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/usr/bin/python", "app.py"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest"
        )
        
        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        main_container = body["spec"]["template"]["spec"]["containers"][0]
        
        assert main_container["command"] == [
            "/opt/opensandbox/bin/bootstrap.sh",
            "/usr/bin/python",
            "app.py"
        ]
    
    def test_create_workload_converts_env_to_list(self, mock_k8s_client):
        """
        Test case: Verify environment variable dict converted to list.
        Also verifies EXECD environment variable is automatically injected.
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "sandbox-test", "uid": "uid"}
        }
        
        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={"FOO": "bar", "BAZ": "qux"},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest"
        )
        
        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        env_vars = body["spec"]["template"]["spec"]["containers"][0]["env"]
        
        # Should have user env vars plus EXECD
        assert len(env_vars) == 3
        env_dict = {e["name"]: e["value"] for e in env_vars}
        assert env_dict["FOO"] == "bar"
        assert env_dict["BAZ"] == "qux"
        # Verify EXECD is automatically injected
        assert env_dict["EXECD"] == "/opt/opensandbox/bin/execd"

    def test_create_workload_merges_template_volumes_and_mounts(self, mock_k8s_client, tmp_path):
        """
        Test case: Verify template volumes/volumeMounts are merged into runtime manifest
        """
        template_file = tmp_path / "template.yaml"
        template_file.write_text(
            """
spec:
  template:
    spec:
      volumes:
        - name: sandbox-shared-data
          emptyDir: {}
      containers:
        - name: sandbox
          image: ubuntu:latest
          volumeMounts:
            - name: sandbox-shared-data
              mountPath: /data
"""
        )
        provider = BatchSandboxProvider(mock_k8s_client, _app_config_with_template(str(template_file)))
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "sandbox-test", "uid": "uid"}
        }

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest"
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        spec = body["spec"]["template"]["spec"]

        volume_names = [v["name"] for v in spec["volumes"]]
        assert "sandbox-shared-data" in volume_names
        assert "opensandbox-bin" in volume_names

        # Runtime container should stay intact (template image should not override)
        container = spec["containers"][0]
        assert container["name"] == "sandbox"
        assert container["image"] == "python:3.11"

        mount_names = [m["name"] for m in container["volumeMounts"]]
        assert "sandbox-shared-data" in mount_names
        assert "opensandbox-bin" in mount_names

    def test_create_workload_dedupes_template_volume_and_mount_names(self, mock_k8s_client, tmp_path):
        """
        Test case: Verify template entries do not duplicate runtime volumes/volumeMounts
        """
        template_file = tmp_path / "template.yaml"
        template_file.write_text(
            """
spec:
  template:
    spec:
      volumes:
        - name: opensandbox-bin
          emptyDir: {}
        - name: sandbox-shared-data
          emptyDir: {}
      containers:
        - name: sandbox
          volumeMounts:
            - name: opensandbox-bin
              mountPath: /opt/opensandbox/bin
            - name: sandbox-shared-data
              mountPath: /data
"""
        )
        provider = BatchSandboxProvider(mock_k8s_client, _app_config_with_template(str(template_file)))
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "sandbox-test", "uid": "uid"}
        }

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest"
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        spec = body["spec"]["template"]["spec"]

        volume_names = [v["name"] for v in spec["volumes"]]
        assert volume_names.count("opensandbox-bin") == 1
        assert "sandbox-shared-data" in volume_names

        mount_names = [m["name"] for m in spec["containers"][0]["volumeMounts"]]
        assert mount_names.count("opensandbox-bin") == 1
        assert "sandbox-shared-data" in mount_names
    
    def test_create_workload_sets_resource_limits_and_requests(self, mock_k8s_client):
        """
        Test case: Verify resource limits set correctly
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "sandbox-test", "uid": "uid"}
        }
        
        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={"cpu": "1", "memory": "1Gi"},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest"
        )
        
        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        resources = body["spec"]["template"]["spec"]["containers"][0]["resources"]
        
        assert resources["limits"] == {"cpu": "1", "memory": "1Gi"}
        assert resources["requests"] == {"cpu": "1", "memory": "1Gi"}
    
    def test_create_workload_handles_empty_resource_limits(self, mock_k8s_client):
        """
        Test case: Verify resources not set when resource limits are empty
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "sandbox-test", "uid": "uid"}
        }
        
        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest"
        )
        
        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        container = body["spec"]["template"]["spec"]["containers"][0]
        
        assert "resources" not in container
    
    # ===== Workload Query Tests =====
    
    def test_get_workload_finds_existing_sandbox(
        self, mock_k8s_client, mock_batchsandbox_list_response
    ):
        """
        Test case: Verify successfully querying existing sandbox
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.return_value = mock_batchsandbox_list_response["items"][0]
        
        result = provider.get_workload("test-id", "test-ns")
        
        assert result is not None
        assert result["metadata"]["name"] == "test-id"
    
    def test_get_workload_returns_none_when_not_found(self, mock_k8s_client):
        """
        Test case: Verify None returned when not found
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.return_value = None
        
        result = provider.get_workload("test-id", "test-ns")
        
        assert result is None

    def test_get_workload_falls_back_to_legacy_name(self, mock_k8s_client):
        """
        Test case: Verify legacy sandbox-<id> name is used when primary lookup returns None
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.side_effect = [
            None,
            {"metadata": {"name": "sandbox-test-id"}},
        ]
        
        result = provider.get_workload("test-id", "test-ns")
        
        assert result["metadata"]["name"] == "sandbox-test-id"
        assert mock_k8s_client.get_custom_object.call_args_list[0].kwargs["name"] == "test-id"
        assert mock_k8s_client.get_custom_object.call_args_list[1].kwargs["name"] == "sandbox-test-id"
    
    def test_get_workload_handles_404_gracefully(self, mock_k8s_client):
        """
        Test case: Verify None returned when not found
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        
        mock_k8s_client.get_custom_object.return_value = None
        
        result = provider.get_workload("test-id", "test-ns")
        
        assert result is None
    
    def test_get_workload_reraises_non_404_exceptions(self, mock_k8s_client):
        """
        Test case: Verify non-404 exceptions are re-raised
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        
        # Mock 500 exception
        error = ApiException(status=500)
        mock_k8s_client.get_custom_object.side_effect = error
        
        with pytest.raises(ApiException) as exc_info:
            provider.get_workload("test-id", "test-ns")
        
        assert exc_info.value.status == 500

    def test_get_workload_prefers_informer_cache(self, mock_k8s_client):
        """
        Test case: get_workload calls k8s_client.get_custom_object and returns result
        """
        cached = {"metadata": {"name": "test-id"}}
        mock_k8s_client.get_custom_object.return_value = cached

        provider = BatchSandboxProvider(mock_k8s_client)

        result = provider.get_workload("test-id", "test-ns")

        assert result == cached
        mock_k8s_client.get_custom_object.assert_called()
    
    def test_get_workload_logs_unexpected_errors(self, mock_k8s_client):
        """
        Test case: Verify unexpected errors are re-raised
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.side_effect = RuntimeError("Unexpected")
        
        with pytest.raises(RuntimeError, match="Unexpected"):
            provider.get_workload("test-id", "test-ns")

    def test_create_workload_updates_informer_cache(self, mock_k8s_client):
        """
        Test case: create_workload returns name and uid from created resource
        """
        created_body = {"metadata": {"name": "test-id", "uid": "test-uid"}}
        mock_k8s_client.create_custom_object.return_value = created_body

        provider = BatchSandboxProvider(mock_k8s_client)

        expires_at = datetime(2025, 12, 31, tzinfo=timezone.utc)

        result = provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={"FOO": "bar"},
            resource_limits={"cpu": "1", "memory": "1Gi"},
            labels={"opensandbox.io/id": "test-id"},
            expires_at=expires_at,
            execd_image="execd:latest",
        )

        assert result == {"name": "test-id", "uid": "test-uid"}
    
    # ===== Workload List Tests =====
    
    def test_list_workloads_returns_items(
        self, mock_k8s_client, mock_batchsandbox_list_response
    ):
        """
        Test case: Verify list query returns results
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.list_custom_objects.return_value = mock_batchsandbox_list_response["items"]
        
        result = provider.list_workloads("test-ns", "opensandbox.io/id")
        
        assert len(result) == 1
        assert result[0]["metadata"]["name"] == "test-id"
    
    def test_list_workloads_returns_empty_on_404(self, mock_k8s_client):
        """
        Test case: Verify empty list returned when no items
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.list_custom_objects.return_value = []
        
        result = provider.list_workloads("test-ns", "opensandbox.io/id")
        
        assert result == []
    
    # ===== Workload Deletion Tests =====
    
    def test_delete_workload_deletes_existing_sandbox(
        self, mock_k8s_client, mock_batchsandbox_list_response
    ):
        """
        Test case: Verify successfully deleting existing sandbox
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.return_value = mock_batchsandbox_list_response["items"][0]
        
        provider.delete_workload("test-id", "test-ns")
        
        mock_k8s_client.delete_custom_object.assert_called_once_with(
            group="sandbox.opensandbox.io",
            version="v1alpha1",
            namespace="test-ns",
            plural="batchsandboxes",
            name="test-id",
            grace_period_seconds=0
        )
    
    def test_delete_workload_raises_when_not_found(self, mock_k8s_client):
        """
        Test case: Verify exception raised when not found
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.return_value = None
        
        with pytest.raises(Exception) as exc_info:
            provider.delete_workload("test-id", "test-ns")
        
        assert "not found" in str(exc_info.value)
    
    def test_delete_workload_sets_grace_period_zero(
        self, mock_k8s_client, mock_batchsandbox_list_response
    ):
        """
        Test case: Verify immediate deletion (grace period = 0)
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.return_value = mock_batchsandbox_list_response["items"][0]
        
        provider.delete_workload("test-id", "test-ns")
        
        call_kwargs = mock_k8s_client.delete_custom_object.call_args.kwargs
        assert call_kwargs["grace_period_seconds"] == 0
    
    # ===== Expiration Time Management Tests =====
    
    def test_update_expiration_patches_spec(
        self, mock_k8s_client, mock_batchsandbox_list_response
    ):
        """
        Test case: Verify expiration time update
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.get_custom_object.return_value = mock_batchsandbox_list_response["items"][0]
        
        expires_at = datetime(2025, 12, 31, 0, 0, 0, tzinfo=timezone.utc)
        provider.update_expiration("test-id", "test-ns", expires_at)
        
        call_kwargs = mock_k8s_client.patch_custom_object.call_args.kwargs
        assert call_kwargs["body"] == {
            "spec": {"expireTime": "2025-12-31T00:00:00+00:00"}
        }
    
    def test_get_expiration_parses_iso_format(self):
        """
        Test case: Verify parsing ISO format time
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "spec": {"expireTime": "2025-12-31T10:00:00+00:00"}
        }
        
        result = provider.get_expiration(workload)
        
        assert result == datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
    
    def test_get_expiration_handles_z_suffix(self):
        """
        Test case: Verify handling time with Z suffix
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "spec": {"expireTime": "2025-12-31T10:00:00Z"}
        }
        
        result = provider.get_expiration(workload)
        
        assert result == datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
    
    def test_get_expiration_returns_none_on_invalid_format(self):
        """
        Test case: Verify None returned on invalid format
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "spec": {"expireTime": "invalid-date"}
        }
        
        # Should return None and not raise exception
        result = provider.get_expiration(workload)
        
        assert result is None
    
    def test_get_expiration_returns_none_when_missing(self):
        """
        Test case: Verify None returned when missing
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {"spec": {}}
        
        result = provider.get_expiration(workload)
        
        assert result is None
    
    # ===== Status Retrieval Tests =====
    
    def test_get_status_running_with_ip(self):
        """
        Test case: Verify status when Pod is Ready and has IP
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "status": {"replicas": 1, "ready": 1, "allocated": 1},
            "metadata": {
                "annotations": {
                    "sandbox.opensandbox.io/endpoints": '["10.0.0.1"]'
                },
                "creationTimestamp": "2025-12-24T10:00:00Z"
            }
        }
        
        result = provider.get_status(workload)
        
        assert result["state"] == "Running"
        assert result["reason"] == "POD_READY_WITH_IP"
        assert "IP" in result["message"]
    
    def test_get_status_allocated_with_ip_not_ready(self):
        """
        Test case: Verify status when IP is assigned but Pod is not Ready (Allocated state)
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "status": {"replicas": 1, "ready": 0, "allocated": 1},
            "metadata": {
                "annotations": {
                    "sandbox.opensandbox.io/endpoints": '["10.0.0.1"]'
                },
                "creationTimestamp": "2025-12-24T10:00:00Z"
            }
        }
        
        result = provider.get_status(workload)
        
        assert result["state"] == "Allocated"
        assert result["reason"] == "IP_ASSIGNED"
    
    def test_get_status_pending_scheduled(self):
        """
        Test case: Verify Pod is scheduled but not Ready
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "status": {"replicas": 1, "ready": 0, "allocated": 1},
            "metadata": {"creationTimestamp": "2025-12-24T10:00:00Z"}
        }
        
        result = provider.get_status(workload)
        
        assert result["state"] == "Pending"
        assert result["reason"] == "POD_SCHEDULED"
    
    def test_get_status_pending_when_endpoints_invalid_json(self):
        """
        Test case: Verify Pending when endpoints annotation contains invalid JSON
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "status": {"replicas": 1, "ready": 0, "allocated": 1},
            "metadata": {
                "annotations": {
                    "sandbox.opensandbox.io/endpoints": "invalid-json"
                },
                "creationTimestamp": "2025-12-24T10:00:00Z"
            }
        }

        result = provider.get_status(workload)

        assert result["state"] == "Pending"
        assert result["reason"] == "POD_SCHEDULED"

    def test_get_status_pending_when_endpoints_empty_array(self):
        """
        Test case: Verify Pending when endpoints annotation is empty array
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "status": {"replicas": 1, "ready": 0, "allocated": 1},
            "metadata": {
                "annotations": {
                    "sandbox.opensandbox.io/endpoints": "[]"
                },
                "creationTimestamp": "2025-12-24T10:00:00Z"
            }
        }

        result = provider.get_status(workload)

        assert result["state"] == "Pending"
        assert result["reason"] == "POD_SCHEDULED"
    
    def test_get_status_pending_unallocated(self):
        """
        Test case: Verify Pod is not scheduled
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "status": {"replicas": 1, "ready": 0, "allocated": 0},
            "metadata": {"creationTimestamp": "2025-12-24T10:00:00Z"}
        }
        
        result = provider.get_status(workload)
        
        assert result["state"] == "Pending"
        assert result["reason"] == "BATCHSANDBOX_PENDING"
    
    # ===== Endpoint Information Tests =====
    
    def test_get_endpoint_info_parses_json_annotation(self):
        """
        Test case: Verify parsing IP from annotation
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "metadata": {
                "annotations": {
                    "sandbox.opensandbox.io/endpoints": '["10.0.0.1"]'
                }
            }
        }
        
        result = provider.get_endpoint_info(workload, 8080, "sandbox-123")
        
        assert result.endpoint == "10.0.0.1:8080"
        assert result.headers is None
    
    def test_get_endpoint_info_uses_first_ip(self):
        """
        Test case: Verify using first IP when multiple IPs exist
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "metadata": {
                "annotations": {
                    "sandbox.opensandbox.io/endpoints": '["10.0.0.1", "10.0.0.2"]'
                }
            }
        }
        
        result = provider.get_endpoint_info(workload, 8080, "sandbox-123")
        
        assert result.endpoint == "10.0.0.1:8080"
        assert result.headers is None
    
    def test_get_endpoint_info_returns_none_when_missing(self):
        """
        Test case: Verify None returned when annotation is missing
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {"metadata": {"annotations": {}}}
        
        result = provider.get_endpoint_info(workload, 8080, "sandbox-123")
        
        assert result is None
    
    def test_get_endpoint_info_returns_none_on_invalid_json(self):
        """
        Test case: Verify None returned on invalid JSON
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "metadata": {
                "annotations": {
                    "sandbox.opensandbox.io/endpoints": "invalid-json"
                }
            }
        }
        
        result = provider.get_endpoint_info(workload, 8080, "sandbox-123")
        
        assert result is None
    
    def test_get_endpoint_info_returns_none_on_empty_array(self):
        """
        Test case: Verify None returned on empty array
        """
        provider = BatchSandboxProvider(MagicMock())
        workload = {
            "metadata": {
                "annotations": {
                    "sandbox.opensandbox.io/endpoints": "[]"
                }
            }
        }
        
        result = provider.get_endpoint_info(workload, 8080, "sandbox-123")
        
        assert result is None

    # ===== Pool-based Creation Tests =====
    
    def test_create_workload_poolref_ignores_image_spec(self, mock_k8s_client):
        """
        Test that pool-based creation ignores image_spec parameter.
        
        Pool already defines the image, so image_spec is not used even if provided.
        This verifies backward compatibility - no error is raised.
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "sandbox-test-id", "uid": "test-uid"}
        }
        
        result = provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["python", "app.py"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest",
            extensions={"poolRef": "my-pool"}
        )
        
        # Should succeed and return workload info
        assert result == {"name": "sandbox-test-id", "uid": "test-uid"}
        
        # Verify poolRef is used
        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        assert body["spec"]["poolRef"] == "my-pool"
    
    def test_create_workload_poolref_ignores_resource_limits(self, mock_k8s_client):
        """
        Test that pool-based creation ignores resource_limits parameter.
        
        Pool already defines the resources, so resource_limits is not used even if provided.
        This verifies backward compatibility - no error is raised.
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "sandbox-test-id", "uid": "test-uid"}
        }
        
        result = provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri=""),
            entrypoint=["python", "app.py"],
            env={},
            resource_limits={"cpu": "1", "memory": "1Gi"},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest",
            extensions={"poolRef": "my-pool"}
        )
        
        # Should succeed and return workload info
        assert result == {"name": "sandbox-test-id", "uid": "test-uid"}
        
        # Verify poolRef is used
        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        assert body["spec"]["poolRef"] == "my-pool"
    
    def test_create_workload_poolref_allows_entrypoint_and_env(self, mock_k8s_client):
        """
        Test that pool-based creation allows customizing entrypoint and env.
        
        Verifies taskTemplate structure is correctly generated with user's entrypoint and env.
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "sandbox-test-id", "uid": "test-uid"}
        }
        
        result = provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri=""),
            entrypoint=["python", "app.py"],
            env={"FOO": "bar"},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest",
            extensions={"poolRef": "my-pool"}
        )
        
        assert result == {"name": "sandbox-test-id", "uid": "test-uid"}
        
        # Verify the call
        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        assert body["spec"]["poolRef"] == "my-pool"
        assert "taskTemplate" in body["spec"]
        
        # Verify taskTemplate structure
        task_template = body["spec"]["taskTemplate"]
        assert "spec" in task_template
        assert "process" in task_template["spec"]
        command = task_template["spec"]["process"]["command"]
        assert command[0] == "/bin/sh"
        assert command[1] == "-c"
        # Command should contain bootstrap.sh execution
        # Example: /opt/opensandbox/bin/bootstrap.sh python app.py &
        assert "/opt/opensandbox/bin/bootstrap.sh python app.py" in command[2]
        assert command[2].endswith(" &")
        assert task_template["spec"]["process"]["env"] == [{"name": "FOO", "value": "bar"}]
    
    def test_build_task_template_with_env(self, mock_k8s_client):
        """
        Test _build_task_template with environment variables.
        
        Verifies:
        - Command uses shell wrapper: /bin/sh -c "..."
        - Entrypoint executed via bootstrap.sh in background (&)
        - Env list formatted correctly for K8s
        
        Generated command example:
        /bin/sh -c "/opt/opensandbox/bin/bootstrap.sh /usr/bin/python app.py &"
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        
        result = provider._build_task_template(
            entrypoint=["/usr/bin/python", "app.py"],
            env={"KEY1": "value1", "KEY2": "value2"}
        )
        
        assert "spec" in result
        assert "process" in result["spec"]
        process_task = result["spec"]["process"]
        
        # Verify command structure
        command = process_task["command"]
        assert command[0] == "/bin/sh"
        assert command[1] == "-c"
        # Should execute via bootstrap.sh in background (&)
        assert "/opt/opensandbox/bin/bootstrap.sh" in command[2]
        assert "/usr/bin/python" in command[2]
        assert "app.py" in command[2]
        # Should end with & (run in background)
        assert command[2].endswith("&")
        
        # Verify env list
        assert process_task["env"] == [
            {"name": "KEY1", "value": "value1"},
            {"name": "KEY2", "value": "value2"}
        ]
    
    def test_build_task_template_without_env(self, mock_k8s_client):
        """
        Test _build_task_template without environment variables.
        
        Verifies command is wrapped in shell and executes via bootstrap.sh in background.
        
        Generated command example:
        /bin/sh -c "/opt/opensandbox/bin/bootstrap.sh /usr/bin/python app.py &"
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        
        result = provider._build_task_template(
            entrypoint=["/usr/bin/python", "app.py"],
            env={}
        )
        
        assert "spec" in result
        assert "process" in result["spec"]
        process_task = result["spec"]["process"]
        assert process_task["env"] == []
        # Without env, command directly calls bootstrap.sh in background
        command = process_task["command"]
        assert command[0] == "/bin/sh"
        assert command[1] == "-c"
        # Check escaped entrypoint
        assert "/opt/opensandbox/bin/bootstrap.sh" in command[2]
        assert "/usr/bin/python" in command[2]
        assert "app.py" in command[2]
        assert command[2].endswith(" &")
    
    def test_build_task_template_uses_default_env_path(self, mock_k8s_client):
        """
        Test that taskTemplate executes bootstrap.sh properly.
        
        Verifies:
        - Entrypoint is properly escaped
        - Command runs in background
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        
        result = provider._build_task_template(
            entrypoint=["python", "app.py"],
            env={"TEST_VAR": "test_value"}
        )
        
        command = result["spec"]["process"]["command"][2]
        # Should execute bootstrap.sh in background
        assert "/opt/opensandbox/bin/bootstrap.sh" in command
        assert "python" in command
        assert "app.py" in command
        assert command.endswith(" &")
    
    def test_build_task_template_escapes_special_characters(self, mock_k8s_client):
        """
        Test that taskTemplate properly escapes arguments with spaces, quotes, and special chars.
        
        This prevents shell injection and ensures arguments are preserved correctly.
        For example: ['python', '-c', 'print("a b")'] should work correctly.
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        
        result = provider._build_task_template(
            entrypoint=["python", "-c", 'print("hello world")'],
            env={"KEY": "value with spaces", "QUOTE": "it's fine"}
        )
        
        command = result["spec"]["process"]["command"][2]
        
        # Verify entrypoint args are properly escaped
        assert "python" in command
        assert "-c" in command
        # The python code with spaces and quotes should be properly escaped
        assert "'print(" in command or '"print(' in command  # Escaped
        
        # Verify env is passed through env list, not in command
        env_list = result["spec"]["process"]["env"]
        assert {"name": "KEY", "value": "value with spaces"} in env_list
        assert {"name": "QUOTE", "value": "it's fine"} in env_list
    
    def test_create_workload_poolref_builds_correct_manifest(self, mock_k8s_client):
        """
        Test complete pool-based BatchSandbox manifest structure.
        
        Verifies:
        - Basic metadata (apiVersion, kind, name, labels)
        - Pool-specific fields (poolRef, taskTemplate, expireTime)
        - No template field (pool mode doesn't use pod template)
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }
        
        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        
        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri=""),
            entrypoint=["python", "app.py"],
            env={"FOO": "bar"},
            resource_limits={},
            labels={"test": "label"},
            expires_at=expires_at,
            execd_image="execd:latest",
            extensions={"poolRef": "test-pool"}
        )
        
        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        
        # Verify basic structure
        assert body["apiVersion"] == "sandbox.opensandbox.io/v1alpha1"
        assert body["kind"] == "BatchSandbox"
        assert body["metadata"]["name"] == "test-id"
        assert body["metadata"]["labels"] == {"test": "label"}
        
        # Verify pool-specific fields
        assert body["spec"]["replicas"] == 1
        assert body["spec"]["poolRef"] == "test-pool"
        assert body["spec"]["expireTime"] == "2025-12-31T10:00:00+00:00"
        assert "taskTemplate" in body["spec"]
        
        # Verify no template field (pool-based doesn't use template)
        assert "template" not in body["spec"]


class TestBatchSandboxProviderEgress:
    """BatchSandboxProvider egress sidecar tests"""

    def test_create_workload_without_network_policy_no_sidecar(self, mock_k8s_client):
        """
        Test case: Verify no sidecar is added when network_policy is None
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=None,
            egress_image=None,
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]
        containers = pod_spec["containers"]
        
        # Should only have main container
        assert len(containers) == 1
        assert containers[0]["name"] == "sandbox"
        # Should not have securityContext with sysctls
        assert "securityContext" not in pod_spec or "sysctls" not in pod_spec.get("securityContext", {})

    def test_create_workload_with_network_policy_adds_sidecar(self, mock_k8s_client):
        """
        Test case: Verify egress sidecar is added when network_policy is provided
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="pypi.org")],
        )

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=network_policy,
            egress_image="opensandbox/egress:v1.0.3",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]
        containers = pod_spec["containers"]
        
        # Should have both main container and sidecar
        assert len(containers) == 2
        
        # Find sidecar container
        sidecar = next((c for c in containers if c["name"] == "egress"), None)
        assert sidecar is not None
        assert sidecar["image"] == "opensandbox/egress:v1.0.3"
        
        # Verify sidecar has environment variable
        env_vars = {e["name"]: e["value"] for e in sidecar.get("env", [])}
        assert "OPENSANDBOX_EGRESS_RULES" in env_vars
        assert env_vars["OPENSANDBOX_EGRESS_MODE"] == EGRESS_MODE_DNS

        caps = sidecar.get("securityContext", {}).get("capabilities", {})
        assert "NET_ADMIN" in caps.get("add", [])
        assert sidecar.get("securityContext", {}).get("privileged") is not True
        assert "command" not in sidecar

        inits = pod_spec.get("initContainers", [])
        assert len(inits) == 1
        execd_init = inits[0]
        assert execd_init["name"] == "execd-installer"
        assert execd_init["image"] == "execd:latest"
        assert execd_init.get("securityContext", {}).get("privileged") is True
        assert "/proc/sys/net/ipv6/conf/all/disable_ipv6" in execd_init["args"][0]

    def test_create_workload_with_network_policy_persists_annotation_and_sidecar_token(self, mock_k8s_client):
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=None,
            execd_image="execd:latest",
            network_policy=NetworkPolicy(default_action="deny", egress=[]),
            egress_image="opensandbox/egress:v1.0.3",
            annotations={SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY: "egress-token"},
            egress_auth_token="egress-token",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        assert body["metadata"]["annotations"][SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY] == "egress-token"

        containers = body["spec"]["template"]["spec"]["containers"]
        sidecar = next((c for c in containers if c["name"] == "egress"), None)
        assert sidecar is not None
        env_vars = {e["name"]: e["value"] for e in sidecar.get("env", [])}
        assert env_vars[OPENSANDBOX_EGRESS_TOKEN] == "egress-token"
        assert env_vars["OPENSANDBOX_EGRESS_MODE"] == EGRESS_MODE_DNS

    def test_create_workload_with_egress_mode_dns_nft(self, mock_k8s_client):
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=None,
            execd_image="execd:latest",
            network_policy=NetworkPolicy(default_action="deny", egress=[]),
            egress_image="opensandbox/egress:v1.0.3",
            egress_mode=EGRESS_MODE_DNS_NFT,
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        containers = body["spec"]["template"]["spec"]["containers"]
        sidecar = next((c for c in containers if c["name"] == "egress"), None)
        assert sidecar is not None
        env_vars = {e["name"]: e["value"] for e in sidecar.get("env", [])}
        assert env_vars["OPENSANDBOX_EGRESS_MODE"] == EGRESS_MODE_DNS_NFT

    def test_create_workload_with_network_policy_does_not_add_pod_ipv6_sysctls(self, mock_k8s_client):
        """IPv6 all.disable is applied in privileged execd init, not Pod sysctls."""
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=network_policy,
            egress_image="opensandbox/egress:v1.0.3",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]

        assert "securityContext" not in pod_spec or "sysctls" not in pod_spec.get("securityContext", {})

        sidecar = next(c for c in pod_spec["containers"] if c["name"] == "egress")
        assert "command" not in sidecar
        execd_init = pod_spec["initContainers"][0]
        assert execd_init["name"] == "execd-installer"
        assert "/proc/sys/net/ipv6/conf/all/disable_ipv6" in execd_init["args"][0]

    def test_create_workload_with_network_policy_drops_net_admin_from_main_container(self, mock_k8s_client):
        """
        Test case: Verify main container drops NET_ADMIN when network_policy is enabled
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=network_policy,
            egress_image="opensandbox/egress:v1.0.3",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]
        containers = pod_spec["containers"]
        
        # Find main container
        main_container = next((c for c in containers if c["name"] == "sandbox"), None)
        assert main_container is not None
        
        # Verify main container has securityContext
        assert "securityContext" in main_container
        assert "capabilities" in main_container["securityContext"]
        assert "drop" in main_container["securityContext"]["capabilities"]
        assert "NET_ADMIN" in main_container["securityContext"]["capabilities"]["drop"]

    def test_create_workload_without_egress_image_no_sidecar(self, mock_k8s_client):
        """
        Test case: Verify no sidecar is added when egress_image is None even if network_policy exists
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=network_policy,
            egress_image=None,
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]
        containers = pod_spec["containers"]
        
        # Should only have main container
        assert len(containers) == 1
        assert containers[0]["name"] == "sandbox"

    def test_egress_sidecar_contains_network_policy_in_env(self, mock_k8s_client):
        """
        Test case: Verify sidecar environment variable contains serialized network policy
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[
                NetworkRule(action="allow", target="pypi.org"),
                NetworkRule(action="deny", target="*.malicious.com"),
            ],
        )

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=network_policy,
            egress_image="opensandbox/egress:v1.0.3",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]
        containers = pod_spec["containers"]
        
        sidecar = next((c for c in containers if c["name"] == "egress"), None)
        assert sidecar is not None
        
        env_vars = {e["name"]: e["value"] for e in sidecar.get("env", [])}
        assert "OPENSANDBOX_EGRESS_RULES" in env_vars
        
        # Verify the environment variable contains valid JSON with network policy
        import json
        policy_json = json.loads(env_vars["OPENSANDBOX_EGRESS_RULES"])
        assert policy_json["defaultAction"] == "deny"
        assert len(policy_json["egress"]) == 2
        assert policy_json["egress"][0]["action"] == "allow"
        assert policy_json["egress"][0]["target"] == "pypi.org"

    def test_main_container_no_security_context_without_network_policy(self, mock_k8s_client):
        """
        Test case: Verify main container has no securityContext when network_policy is None
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=None,
            egress_image=None,
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]
        containers = pod_spec["containers"]
        
        main_container = containers[0]
        # Main container should not have securityContext when no network policy
        assert "securityContext" not in main_container

    def test_create_workload_with_network_policy_works_with_template(self, mock_k8s_client, tmp_path):
        """
        Test case: Verify egress sidecar works correctly when template is provided
        """
        template_file = tmp_path / "template.yaml"
        template_file.write_text(
            """
spec:
  template:
    spec:
      volumes:
        - name: sandbox-shared-data
          emptyDir: {}
"""
        )
        provider = BatchSandboxProvider(mock_k8s_client, _app_config_with_template(str(template_file)))
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            network_policy=network_policy,
            egress_image="opensandbox/egress:v1.0.3",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]
        containers = pod_spec["containers"]
        
        # Should have both main container and sidecar
        assert len(containers) == 2
        
        # Verify sidecar exists
        sidecar = next((c for c in containers if c["name"] == "egress"), None)
        assert sidecar is not None
        
        # Pod-level IPv6 sysctls are not injected for egress (sidecar startup handles all.disable)
        assert "securityContext" not in pod_spec or "sysctls" not in pod_spec.get("securityContext", {})

        # Verify template volumes are still merged
        volume_names = [v["name"] for v in pod_spec["volumes"]]
        assert "sandbox-shared-data" in volume_names
        assert "opensandbox-bin" in volume_names

    # ===== Image Auth Tests =====

    def test_supports_image_auth_returns_true(self, mock_k8s_client):
        """
        Test case: BatchSandboxProvider declares image auth support
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        assert provider.supports_image_auth() is True

    def test_create_workload_with_image_auth_injects_image_pull_secrets(self, mock_k8s_client):
        """
        Test case: imagePullSecrets is injected into pod spec when image auth is provided
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "uid-123"}
        }

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(
                uri="registry.example.com/img:tag",
                auth=ImageAuth(username="user", password="pass"),
            ),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest",
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pull_secrets = body["spec"]["template"]["spec"].get("imagePullSecrets")
        assert pull_secrets == [{"name": f"{IMAGE_AUTH_SECRET_PREFIX}-test-id"}]

    def test_create_workload_with_image_auth_creates_secret(self, mock_k8s_client):
        """
        Test case: a kubernetes.io/dockerconfigjson Secret is created with correct ownerReference
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "uid-abc"}
        }

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(
                uri="registry.example.com/img:tag",
                auth=ImageAuth(username="user", password="pass"),
            ),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest",
        )

        mock_k8s_client.create_secret.assert_called_once()
        call_kwargs = mock_k8s_client.create_secret.call_args.kwargs
        assert call_kwargs["namespace"] == "test-ns"
        secret = call_kwargs["body"]
        assert secret.type == "kubernetes.io/dockerconfigjson"
        ref = secret.metadata.owner_references[0]
        assert ref.uid == "uid-abc"
        assert ref.kind == "BatchSandbox"
        assert ref.name == "test-id"

    def test_create_workload_without_image_auth_skips_secret(self, mock_k8s_client):
        """
        Test case: no Secret is created when image auth is absent
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "uid-123"}
        }

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest",
        )

        mock_k8s_client.create_secret.assert_not_called()
        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        assert "imagePullSecrets" not in body["spec"]["template"]["spec"]

    def test_create_workload_with_image_auth_secret_failure_rolls_back_batchsandbox(self, mock_k8s_client):
        """
        Test case: BatchSandbox is deleted when Secret creation fails
        """
        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "uid-123"}
        }
        mock_k8s_client.create_secret.side_effect = ApiException(status=403)

        with pytest.raises(ApiException):
            provider.create_workload(
                sandbox_id="test-id",
                namespace="test-ns",
                image_spec=ImageSpec(
                    uri="registry.example.com/img:tag",
                    auth=ImageAuth(username="user", password="pass"),
                ),
                entrypoint=["/bin/bash"],
                env={},
                resource_limits={},
                labels={},
                expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
                execd_image="execd:latest",
            )

        mock_k8s_client.delete_custom_object.assert_called_once_with(
            group=provider.group,
            version=provider.version,
            namespace="test-ns",
            plural=provider.plural,
            name="test-id",
            grace_period_seconds=0,
        )

    # ===== Volume Support Tests =====

    def test_create_workload_with_pvc_volume(self, mock_k8s_client):
        """
        Test creating workload with PVC volume mount.

        Verifies:
        - PVC volume is correctly added to pod spec
        - Volume mount is added to main container
        - claimName is correctly set
        """
        from src.api.schema import Volume, PVC

        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        expires_at = datetime(2025, 12, 31, 10, 0, 0, tzinfo=timezone.utc)

        volumes = [
            Volume(
                name="data-volume",
                pvc=PVC(claim_name="my-pvc"),
                mount_path="/mnt/data",
                read_only=False,
            )
        ]

        result = provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=expires_at,
            execd_image="execd:latest",
            volumes=volumes,
        )

        assert result == {"name": "test-id", "uid": "test-uid"}

        # Verify API call
        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]

        # Check volume definition
        volumes_list = pod_spec.get("volumes", [])
        pvc_volume = next((v for v in volumes_list if v["name"] == "data-volume"), None)
        assert pvc_volume is not None
        assert pvc_volume["persistentVolumeClaim"]["claimName"] == "my-pvc"

        # Check volume mount in main container
        main_container = pod_spec["containers"][0]
        mounts = main_container.get("volumeMounts", [])
        data_mount = next((m for m in mounts if m["name"] == "data-volume"), None)
        assert data_mount is not None
        assert data_mount["mountPath"] == "/mnt/data"
        assert data_mount["readOnly"] is False

    def test_create_workload_with_pvc_volume_readonly(self, mock_k8s_client):
        """
        Test creating workload with read-only PVC volume mount.
        """
        from src.api.schema import Volume, PVC

        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        volumes = [
            Volume(
                name="models-volume",
                pvc=PVC(claim_name="models-pvc"),
                mount_path="/mnt/models",
                read_only=True,
            )
        ]

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest",
            volumes=volumes,
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]

        main_container = pod_spec["containers"][0]
        mounts = main_container.get("volumeMounts", [])
        models_mount = next((m for m in mounts if m["name"] == "models-volume"), None)
        assert models_mount is not None
        assert models_mount["readOnly"] is True

    def test_create_workload_with_pvc_volume_subpath(self, mock_k8s_client):
        """
        Test creating workload with PVC volume mount with subPath.
        """
        from src.api.schema import Volume, PVC

        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        volumes = [
            Volume(
                name="data-volume",
                pvc=PVC(claim_name="shared-pvc"),
                mount_path="/mnt/data",
                sub_path="task-001",
                read_only=False,
            )
        ]

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest",
            volumes=volumes,
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]

        main_container = pod_spec["containers"][0]
        mounts = main_container.get("volumeMounts", [])
        data_mount = next((m for m in mounts if m["name"] == "data-volume"), None)
        assert data_mount is not None
        assert data_mount.get("subPath") == "task-001"

    def test_create_workload_with_host_volume(self, mock_k8s_client):
        """
        Test creating workload with hostPath volume mount.
        """
        from src.api.schema import Volume, Host

        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        volumes = [
            Volume(
                name="host-volume",
                host=Host(path="/data/shared"),
                mount_path="/mnt/host",
                read_only=True,
            )
        ]

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest",
            volumes=volumes,
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]

        # Check volume definition
        volumes_list = pod_spec.get("volumes", [])
        host_volume = next((v for v in volumes_list if v["name"] == "host-volume"), None)
        assert host_volume is not None
        assert host_volume["hostPath"]["path"] == "/data/shared"
        assert host_volume["hostPath"]["type"] == "DirectoryOrCreate"

        # Check volume mount
        main_container = pod_spec["containers"][0]
        mounts = main_container.get("volumeMounts", [])
        host_mount = next((m for m in mounts if m["name"] == "host-volume"), None)
        assert host_mount is not None
        assert host_mount["mountPath"] == "/mnt/host"
        assert host_mount["readOnly"] is True

    def test_create_workload_with_multiple_volumes(self, mock_k8s_client):
        """
        Test creating workload with multiple volumes (PVC and hostPath).
        """
        from src.api.schema import Volume, PVC, Host

        provider = BatchSandboxProvider(mock_k8s_client)
        mock_k8s_client.create_custom_object.return_value = {
            "metadata": {"name": "test-id", "uid": "test-uid"}
        }

        volumes = [
            Volume(
                name="pvc-volume",
                pvc=PVC(claim_name="data-pvc"),
                mount_path="/mnt/data",
                read_only=False,
            ),
            Volume(
                name="host-volume",
                host=Host(path="/tmp/cache"),
                mount_path="/mnt/cache",
                read_only=True,
            ),
        ]

        provider.create_workload(
            sandbox_id="test-id",
            namespace="test-ns",
            image_spec=ImageSpec(uri="python:3.11"),
            entrypoint=["/bin/bash"],
            env={},
            resource_limits={},
            labels={},
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            execd_image="execd:latest",
            volumes=volumes,
        )

        body = mock_k8s_client.create_custom_object.call_args.kwargs["body"]
        pod_spec = body["spec"]["template"]["spec"]

        # Check both volumes exist
        volumes_list = pod_spec.get("volumes", [])
        assert len([v for v in volumes_list if v["name"] in ("pvc-volume", "host-volume")]) == 2

        # Check both mounts exist
        main_container = pod_spec["containers"][0]
        mounts = main_container.get("volumeMounts", [])
        mount_names = {m["name"] for m in mounts}
        assert "pvc-volume" in mount_names
        assert "host-volume" in mount_names

    def test_create_workload_pool_mode_rejects_volumes(self, mock_k8s_client):
        """
        Test that pool mode rejects volumes with clear error message.
        """
        from src.api.schema import Volume, PVC

        provider = BatchSandboxProvider(mock_k8s_client)

        volumes = [
            Volume(
                name="data-volume",
                pvc=PVC(claim_name="my-pvc"),
                mount_path="/mnt/data",
            )
        ]

        with pytest.raises(ValueError, match="Pool mode does not support volumes"):
            provider.create_workload(
                sandbox_id="test-id",
                namespace="test-ns",
                image_spec=ImageSpec(uri="python:3.11"),
                entrypoint=["/bin/bash"],
                env={},
                resource_limits={},
                labels={},
                expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
                execd_image="execd:latest",
                extensions={"poolRef": "my-pool"},
                volumes=volumes,
            )

    def test_apply_volumes_to_pod_spec_empty_volumes(self, mock_k8s_client):
        """
        Test apply_volumes_to_pod_spec with empty volumes list.
        """
        pod_spec = {
            "containers": [{"name": "main", "volumeMounts": []}],
            "volumes": [],
        }

        apply_volumes_to_pod_spec(pod_spec, [])

        # Should not modify pod_spec
        assert pod_spec["volumes"] == []
        assert pod_spec["containers"][0]["volumeMounts"] == []

    def test_apply_volumes_to_pod_spec_no_containers(self, mock_k8s_client):
        """
        Test apply_volumes_to_pod_spec with no containers returns early without error.
        """
        from src.api.schema import Volume, PVC

        pod_spec = {"volumes": []}
        volumes = [Volume(name="test", pvc=PVC(claim_name="pvc"), mount_path="/mnt")]

        # Should not raise exception
        apply_volumes_to_pod_spec(pod_spec, volumes)

        # Pod spec should remain unchanged (no containers to mount to)
        assert pod_spec["volumes"] == []

    def test_apply_volumes_to_pod_spec_duplicate_internal_volume(self, mock_k8s_client):
        """
        Test apply_volumes_to_pod_spec rejects volume names that collide with internal volumes.
        """
        from src.api.schema import Volume, PVC

        pod_spec = {
            "containers": [{"name": "sandbox", "volumeMounts": []}],
            "volumes": [{"name": "opensandbox-bin", "emptyDir": {}}],
        }
        volumes = [Volume(name="opensandbox-bin", pvc=PVC(claim_name="pvc"), mount_path="/mnt")]

        # Should raise ValueError for duplicate volume name
        with pytest.raises(ValueError) as exc_info:
            apply_volumes_to_pod_spec(pod_spec, volumes)

        assert "conflicts with an internal volume" in str(exc_info.value)

    def test_apply_volumes_to_pod_spec_same_pvc_multiple_mounts(self, mock_k8s_client):
        """
        When multiple Volume API objects share the same claim_name, only one
        Kubernetes volume is created; multiple volumeMounts reference it (avoids
        CSI driver issues from duplicate PVC volume definitions).
        """
        from src.api.schema import Volume, PVC

        pod_spec = {
            "containers": [{"name": "main", "volumeMounts": []}],
            "volumes": [],
        }
        volumes = [
            Volume(
                name="skills",
                pvc=PVC(claim_name="oss-pvc-r"),
                mount_path="/path/to/skills",
                sub_path="skill-hub/publish",
                read_only=True,
            ),
            Volume(
                name="draft",
                pvc=PVC(claim_name="oss-pvc-r"),
                mount_path="/path/to/draft",
                sub_path="skill-hub/draft",
                read_only=True,
            ),
        ]

        apply_volumes_to_pod_spec(pod_spec, volumes)

        # One volume definition for the shared PVC (first Volume name used)
        assert len(pod_spec["volumes"]) == 1
        assert pod_spec["volumes"][0]["name"] == "skills"
        assert pod_spec["volumes"][0]["persistentVolumeClaim"]["claimName"] == "oss-pvc-r"

        # Two volumeMounts, both referencing the same volume name
        mounts = pod_spec["containers"][0]["volumeMounts"]
        assert len(mounts) == 2
        by_path = {m["mountPath"]: m for m in mounts}
        assert by_path["/path/to/skills"]["name"] == "skills"
        assert by_path["/path/to/skills"].get("subPath") == "skill-hub/publish"
        assert by_path["/path/to/draft"]["name"] == "skills"
        assert by_path["/path/to/draft"].get("subPath") == "skill-hub/draft"


================================================
FILE: server/tests/k8s/test_batchsandbox_template.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Unit tests for BatchSandboxTemplateManager.
"""

import pytest
import yaml

from src.services.k8s.batchsandbox_template import BatchSandboxTemplateManager


class TestBatchSandboxTemplateManager:
    """BatchSandboxTemplateManager unit tests"""
    
    def test_load_valid_yaml_template_successfully(self, tmp_path):
        """
        Test case: Verify loading valid YAML template
        """
        # Create valid template file
        template_file = tmp_path / "valid_template.yaml"
        template_content = {
            "metadata": {"annotations": {"test": "value"}},
            "spec": {"template": {"spec": {"nodeSelector": {"env": "test"}}}}
        }
        template_file.write_text(yaml.dump(template_content))
        
        manager = BatchSandboxTemplateManager(str(template_file))
        
        assert manager._template == template_content
        assert manager.template_file_path == str(template_file)
    
    def test_load_nonexistent_file_raises_error(self):
        """
        Test case: Verify FileNotFoundError raised when file doesn't exist
        """
        # Should raise FileNotFoundError
        with pytest.raises(FileNotFoundError) as exc_info:
            BatchSandboxTemplateManager("/path/to/nonexistent.yaml")
        
        assert "not found" in str(exc_info.value)
    
    def test_load_invalid_yaml_raises_error(self, tmp_path):
        """
        Test case: Verify RuntimeError raised with invalid YAML file
        """
        # Create malformed YAML
        template_file = tmp_path / "invalid.yaml"
        template_file.write_text("invalid: yaml: [missing: bracket")
        
        # Should raise RuntimeError
        with pytest.raises(RuntimeError) as exc_info:
            BatchSandboxTemplateManager(str(template_file))
        
        assert "Failed to load" in str(exc_info.value)
    
    def test_load_non_dict_yaml_raises_error(self, tmp_path):
        """
        Test case: Verify ValueError raised when YAML content is not a dict
        """
        # Create YAML with list
        template_file = tmp_path / "list.yaml"
        template_file.write_text("- item1\n- item2")
        
        # Should raise ValueError
        with pytest.raises(ValueError) as exc_info:
            BatchSandboxTemplateManager(str(template_file))
        
        assert "must be a YAML object" in str(exc_info.value)
        assert "got list" in str(exc_info.value)
    
    def test_init_without_template_file_creates_empty_manager(self):
        """
        Test case: Verify empty manager created without template file
        """
        manager = BatchSandboxTemplateManager(None)
        
        assert manager._template is None
        assert manager.template_file_path is None
    
    def test_deep_merge_runtime_overrides_template(self):
        """
        Test case: Verify runtime values override template values
        """
        base = {"spec": {"replicas": 1, "expireTime": "old"}}
        override = {"spec": {"expireTime": "new"}}
        
        result = BatchSandboxTemplateManager._deep_merge(base, override)
        
        assert result == {"spec": {"replicas": 1, "expireTime": "new"}}
    
    def test_deep_merge_preserves_template_only_fields(self):
        """
        Test case: Verify template-only fields are preserved
        """
        base = {
            "spec": {
                "template": {
                    "spec": {
                        "nodeSelector": {"env": "prod"},
                        "tolerations": [{"key": "test"}]
                    }
                }
            }
        }
        override = {"spec": {"replicas": 1}}
        
        result = BatchSandboxTemplateManager._deep_merge(base, override)
        
        assert result["spec"]["replicas"] == 1
        assert result["spec"]["template"]["spec"]["nodeSelector"] == {"env": "prod"}
        assert result["spec"]["template"]["spec"]["tolerations"] == [{"key": "test"}]
    
    def test_deep_merge_nested_dicts_recursively(self):
        """
        Test case: Verify nested dicts are merged recursively
        """
        base = {"metadata": {"annotations": {"a": "1", "b": "2"}}}
        override = {"metadata": {"annotations": {"b": "3", "c": "4"}}}
        
        result = BatchSandboxTemplateManager._deep_merge(base, override)
        
        expected = {"metadata": {"annotations": {"a": "1", "b": "3", "c": "4"}}}
        assert result == expected
    
    def test_deep_merge_replaces_lists_not_merges(self):
        """
        Test case: Verify lists are replaced not merged
        """
        base = {"spec": {"tolerations": [{"key": "a"}]}}
        override = {"spec": {"tolerations": [{"key": "b"}]}}
        
        result = BatchSandboxTemplateManager._deep_merge(base, override)
        
        assert result == {"spec": {"tolerations": [{"key": "b"}]}}
    
    def test_deep_merge_none_values_do_not_override(self):
        """
        Test case: Verify None values don't override existing values
        """
        base = {"spec": {"expireTime": "2024-12-31"}}
        override = {"spec": {"expireTime": None}}
        
        result = BatchSandboxTemplateManager._deep_merge(base, override)
        
        assert result == {"spec": {"expireTime": "2024-12-31"}}
    
    def test_deep_copy_creates_independent_copies(self):
        """
        Test case: Verify deep copy creates independent copies
        """
        original = {
            "nested": {"list": [1, 2, 3], "dict": {"key": "value"}}
        }
        
        copy = BatchSandboxTemplateManager._deep_copy(original)
        
        # Modify copy
        copy["nested"]["list"].append(4)
        copy["nested"]["dict"]["key"] = "new_value"
        
        # Original should not be affected
        assert original["nested"]["list"] == [1, 2, 3]
        assert original["nested"]["dict"]["key"] == "value"
    
    def test_get_base_template_returns_copy(self, tmp_path):
        """
        Test case: Verify get_base_template returns a copy
        """
        template_file = tmp_path / "template.yaml"
        template_content = {"spec": {"replicas": 1}}
        template_file.write_text(yaml.dump(template_content))
        
        manager = BatchSandboxTemplateManager(str(template_file))
        
        template1 = manager.get_base_template()
        template2 = manager.get_base_template()
        
        # Same content but not same object
        assert template1 == template2
        assert template1 is not template2
    
    def test_get_base_template_returns_empty_dict_when_no_template(self):
        """
        Test case: Verify empty dict returned when no template
        """
        manager = BatchSandboxTemplateManager(None)
        
        result = manager.get_base_template()
        
        assert result == {}
    
    def test_merge_with_runtime_values_without_template(self):
        """
        Test case: Verify runtime manifest returned directly when no template
        """
        manager = BatchSandboxTemplateManager(None)
        runtime_manifest = {"spec": {"replicas": 1}}
        
        result = manager.merge_with_runtime_values(runtime_manifest)
        
        assert result == runtime_manifest
    
    def test_merge_with_runtime_values_with_template(self, tmp_path):
        """
        Test case: Verify correct merge when template exists
        """
        # Create template
        template_file = tmp_path / "template.yaml"
        template_content = {
            "spec": {
                "template": {
                    "spec": {
                        "nodeSelector": {"workload": "sandbox"},
                        "tolerations": [{"operator": "Exists"}]
                    }
                }
            }
        }
        template_file.write_text(yaml.dump(template_content))
        
        manager = BatchSandboxTemplateManager(str(template_file))
        
        # Runtime manifest
        runtime_manifest = {
            "spec": {
                "replicas": 1,
                "template": {
                    "spec": {
                        "containers": [{"name": "test"}],
                        "volumes": [{"name": "vol"}]
                    }
                }
            }
        }
        
        result = manager.merge_with_runtime_values(runtime_manifest)
        
        # Verify template fields are preserved
        assert result["spec"]["template"]["spec"]["nodeSelector"] == {"workload": "sandbox"}
        assert result["spec"]["template"]["spec"]["tolerations"] == [{"operator": "Exists"}]
        # Verify runtime fields are added
        assert result["spec"]["replicas"] == 1
        assert result["spec"]["template"]["spec"]["containers"] == [{"name": "test"}]
        assert result["spec"]["template"]["spec"]["volumes"] == [{"name": "vol"}]


================================================
FILE: server/tests/k8s/test_egress_helper.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Unit tests for egress helper functions.
"""

import json
from typing import Optional

from src.api.schema import NetworkPolicy, NetworkRule
from src.config import EGRESS_MODE_DNS, EGRESS_MODE_DNS_NFT
from src.services.constants import EGRESS_MODE_ENV, EGRESS_RULES_ENV, OPENSANDBOX_EGRESS_TOKEN
from src.services.k8s.egress_helper import (
    apply_egress_to_spec,
    build_security_context_for_sandbox_container,
    prep_execd_init_for_egress,
)


def _egress_container(
    egress_image: str,
    network_policy: NetworkPolicy,
    *,
    egress_auth_token: Optional[str] = None,
    egress_mode: str = EGRESS_MODE_DNS,
) -> dict:
    """Sidecar dict produced by ``apply_egress_to_spec``."""
    containers: list = []
    apply_egress_to_spec(
        containers,
        network_policy,
        egress_image,
        egress_auth_token=egress_auth_token,
        egress_mode=egress_mode,
    )
    return containers[0]


class TestEgressSidecarViaApply:
    """Egress sidecar shape (via ``apply_egress_to_spec``)."""

    def test_builds_container_with_basic_config(self):
        """Test that container is built with correct basic configuration."""
        egress_image = "opensandbox/egress:v1.0.3"
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[
                NetworkRule(action="allow", target="pypi.org"),
            ],
        )

        container = _egress_container(egress_image, network_policy)

        assert container["name"] == "egress"
        assert container["image"] == egress_image
        assert "env" in container
        assert "securityContext" in container

    def test_contains_egress_rules_environment_variable(self):
        """Test that container includes OPENSANDBOX_EGRESS_RULES environment variable."""
        egress_image = "opensandbox/egress:v1.0.3"
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        container = _egress_container(egress_image, network_policy)

        env_vars = container["env"]
        assert len(env_vars) == 2
        assert env_vars[0]["name"] == EGRESS_RULES_ENV
        assert env_vars[0]["value"] is not None
        assert env_vars[1]["name"] == EGRESS_MODE_ENV
        assert env_vars[1]["value"] == EGRESS_MODE_DNS

    def test_contains_egress_token_when_provided(self):
        egress_image = "opensandbox/egress:v1.0.3"
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        container = _egress_container(
            egress_image,
            network_policy,
            egress_auth_token="egress-token",
        )

        env_vars = {env["name"]: env["value"] for env in container["env"]}
        assert env_vars[OPENSANDBOX_EGRESS_TOKEN] == "egress-token"
        assert env_vars[EGRESS_MODE_ENV] == EGRESS_MODE_DNS

    def test_egress_mode_dns_nft(self):
        egress_image = "opensandbox/egress:v1.0.3"
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        container = _egress_container(
            egress_image,
            network_policy,
            egress_mode=EGRESS_MODE_DNS_NFT,
        )

        env_vars = {env["name"]: env["value"] for env in container["env"]}
        assert env_vars[EGRESS_MODE_ENV] == EGRESS_MODE_DNS_NFT

    def test_serializes_network_policy_correctly(self):
        """Test that network policy is correctly serialized to JSON."""
        egress_image = "opensandbox/egress:v1.0.3"
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[
                NetworkRule(action="allow", target="pypi.org"),
                NetworkRule(action="deny", target="*.malicious.com"),
            ],
        )

        container = _egress_container(egress_image, network_policy)

        env_value = container["env"][0]["value"]
        policy_dict = json.loads(env_value)

        assert "defaultAction" in policy_dict
        assert policy_dict["defaultAction"] == "deny"
        assert "egress" in policy_dict
        assert len(policy_dict["egress"]) == 2
        assert policy_dict["egress"][0]["action"] == "allow"
        assert policy_dict["egress"][0]["target"] == "pypi.org"
        assert policy_dict["egress"][1]["action"] == "deny"
        assert policy_dict["egress"][1]["target"] == "*.malicious.com"

    def test_handles_empty_egress_rules(self):
        """Test that empty egress rules are handled correctly."""
        egress_image = "opensandbox/egress:v1.0.3"
        network_policy = NetworkPolicy(
            default_action="allow",
            egress=[],
        )

        container = _egress_container(egress_image, network_policy)

        env_value = container["env"][0]["value"]
        policy_dict = json.loads(env_value)

        assert policy_dict["defaultAction"] == "allow"
        assert policy_dict["egress"] == []

    def test_handles_missing_default_action(self):
        """Test that missing default_action is handled (exclude_none=True)."""
        egress_image = "opensandbox/egress:v1.0.3"
        network_policy = NetworkPolicy(
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        container = _egress_container(egress_image, network_policy)

        env_value = container["env"][0]["value"]
        policy_dict = json.loads(env_value)

        assert "defaultAction" not in policy_dict or policy_dict.get("defaultAction") is None
        assert "egress" in policy_dict

    def test_security_context_adds_net_admin_not_privileged(self):
        """Egress sidecar uses NET_ADMIN only (IPv6 is disabled in execd init when egress is on)."""
        egress_image = "opensandbox/egress:v1.0.3"
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[],
        )

        container = _egress_container(egress_image, network_policy)

        security_context = container["securityContext"]
        assert security_context.get("privileged") is not True
        assert "NET_ADMIN" in security_context.get("capabilities", {}).get("add", [])

    def test_no_command_uses_image_entrypoint(self):
        container = _egress_container(
            "opensandbox/egress:v1.0.3",
            NetworkPolicy(default_action="deny", egress=[]),
        )
        assert "command" not in container

    def test_container_spec_is_valid_kubernetes_format(self):
        """Test that returned container spec is in valid Kubernetes format."""
        egress_image = "opensandbox/egress:v1.0.3"
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        container = _egress_container(egress_image, network_policy)

        assert "name" in container
        assert "image" in container
        assert "env" in container
        assert "securityContext" in container

        assert isinstance(container["env"], list)
        assert len(container["env"]) > 0
        assert "name" in container["env"][0]
        assert "value" in container["env"][0]
        assert "command" not in container

    def test_handles_wildcard_domains(self):
        """Test that wildcard domains in egress rules are handled correctly."""
        egress_image = "opensandbox/egress:v1.0.3"
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[
                NetworkRule(action="allow", target="*.python.org"),
                NetworkRule(action="allow", target="pypi.org"),
            ],
        )

        container = _egress_container(egress_image, network_policy)

        env_value = container["env"][0]["value"]
        policy_dict = json.loads(env_value)

        assert len(policy_dict["egress"]) == 2
        assert policy_dict["egress"][0]["target"] == "*.python.org"
        assert policy_dict["egress"][1]["target"] == "pypi.org"


class TestBuildSecurityContextForMainContainer:
    """Tests for build_security_context_for_sandbox_container function."""

    def test_returns_empty_dict_when_no_network_policy(self):
        """Test that empty dict is returned when network policy is disabled."""
        result = build_security_context_for_sandbox_container(has_network_policy=False)
        assert result == {}

    def test_drops_net_admin_when_network_policy_enabled(self):
        """Test that NET_ADMIN is dropped when network policy is enabled."""
        result = build_security_context_for_sandbox_container(has_network_policy=True)

        assert "capabilities" in result
        assert "drop" in result["capabilities"]
        assert "NET_ADMIN" in result["capabilities"]["drop"]


class TestApplyEgressToSpec:
    """Tests for apply_egress_to_spec function."""

    def test_adds_egress_sidecar_container(self):
        """Test that egress sidecar container is added to containers list."""
        containers: list = []
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )
        egress_image = "opensandbox/egress:v1.0.3"

        apply_egress_to_spec(
            containers,
            network_policy,
            egress_image,
        )

        assert len(containers) == 1
        assert containers[0]["name"] == "egress"
        assert containers[0]["image"] == egress_image

    def test_does_not_touch_unrelated_pod_state(self):
        """apply_egress_to_spec only appends to containers (no pod_spec parameter)."""
        containers: list = []
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )
        egress_image = "opensandbox/egress:v1.0.3"

        apply_egress_to_spec(
            containers,
            network_policy,
            egress_image,
        )

        assert len(containers) == 1

    def test_preserves_existing_pod_sysctls_when_not_passed_in(self):
        """Callers keep pod sysctls in their own dict; apply does not mutate them."""
        pod_spec: dict = {
            "securityContext": {
                "sysctls": [
                    {"name": "net.core.somaxconn", "value": "1024"},
                    {"name": "net.ipv6.conf.all.disable_ipv6", "value": "0"},
                ]
            }
        }
        containers: list = []
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )
        egress_image = "opensandbox/egress:v1.0.3"

        apply_egress_to_spec(
            containers,
            network_policy,
            egress_image,
        )

        sysctls = pod_spec["securityContext"]["sysctls"]
        sysctl_dict = {s["name"]: s["value"] for s in sysctls}

        assert sysctl_dict["net.core.somaxconn"] == "1024"
        assert sysctl_dict["net.ipv6.conf.all.disable_ipv6"] == "0"
        assert len(sysctls) == 2

    def test_no_op_when_no_network_policy(self):
        """Test that function does nothing when network_policy is None."""
        containers: list = []

        apply_egress_to_spec(
            containers,
            None,
            "opensandbox/egress:v1.0.3",
        )

        assert len(containers) == 0

    def test_no_op_when_no_egress_image(self):
        """Test that function does nothing when egress_image is None."""
        containers: list = []
        network_policy = NetworkPolicy(
            default_action="deny",
            egress=[NetworkRule(action="allow", target="example.com")],
        )

        apply_egress_to_spec(
            containers,
            network_policy,
            None,
        )

        assert len(containers) == 0


class TestPrepExecdInitForEgress:
    def test_returns_privileged_security_dict_and_prefixed_script(self):
        base = "cp ./execd /opt/opensandbox/bin/execd"
        script, sc = prep_execd_init_for_egress(base)
        assert sc == {"privileged": True}
        assert "/proc/sys/net/ipv6/conf/all/disable_ipv6" in script
        assert script.endswith(base)


================================================
FILE: server/tests/k8s/test_image_pull_secret_helper.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Unit tests for image_pull_secret_helper.
"""

import base64
import json

from src.api.schema import ImageAuth
from src.services.k8s.image_pull_secret_helper import (
    IMAGE_AUTH_SECRET_PREFIX,
    build_image_pull_secret,
    build_image_pull_secret_name,
)


class TestBuildImagePullSecretName:

    def test_returns_deterministic_name(self):
        assert build_image_pull_secret_name("abc123") == f"{IMAGE_AUTH_SECRET_PREFIX}-abc123"

    def test_different_ids_produce_different_names(self):
        assert build_image_pull_secret_name("id-1") != build_image_pull_secret_name("id-2")


class TestBuildImagePullSecret:

    def _auth(self, username="user", password="pass") -> ImageAuth:
        return ImageAuth(username=username, password=password)

    def _decode_docker_config(self, secret) -> dict:
        raw = base64.b64decode(secret.data[".dockerconfigjson"])
        return json.loads(raw)

    def test_secret_metadata(self):
        secret = build_image_pull_secret(
            sandbox_id="sid",
            image_uri="registry.example.com/ns/img:tag",
            auth=self._auth(),
            owner_uid="uid-1",
            owner_api_version="sandbox.opensandbox.io/v1alpha1",
            owner_kind="BatchSandbox",
        )
        assert secret.metadata.name == f"{IMAGE_AUTH_SECRET_PREFIX}-sid"
        assert secret.type == "kubernetes.io/dockerconfigjson"
        assert secret.api_version == "v1"
        assert secret.kind == "Secret"

    def test_owner_reference(self):
        secret = build_image_pull_secret(
            sandbox_id="sid",
            image_uri="registry.example.com/img:tag",
            auth=self._auth(),
            owner_uid="uid-abc",
            owner_api_version="sandbox.opensandbox.io/v1alpha1",
            owner_kind="BatchSandbox",
        )
        refs = secret.metadata.owner_references
        assert len(refs) == 1
        ref = refs[0]
        assert ref.uid == "uid-abc"
        assert ref.api_version == "sandbox.opensandbox.io/v1alpha1"
        assert ref.kind == "BatchSandbox"
        assert ref.name == "sid"
        assert ref.controller is False

    def test_private_registry_extracted_from_image_uri(self):
        secret = build_image_pull_secret(
            sandbox_id="sid",
            image_uri="registry.example.com/ns/img:tag",
            auth=self._auth("u", "p"),
            owner_uid="uid",
            owner_api_version="sandbox.opensandbox.io/v1alpha1",
            owner_kind="BatchSandbox",
        )
        config = self._decode_docker_config(secret)
        assert "registry.example.com" in config["auths"]

    def test_docker_hub_image_uses_default_registry(self):
        secret = build_image_pull_secret(
            sandbox_id="sid",
            image_uri="python:3.11",
            auth=self._auth("u", "p"),
            owner_uid="uid",
            owner_api_version="sandbox.opensandbox.io/v1alpha1",
            owner_kind="BatchSandbox",
        )
        config = self._decode_docker_config(secret)
        assert "https://index.docker.io/v1/" in config["auths"]

    def test_auth_credentials_encoded_correctly(self):
        secret = build_image_pull_secret(
            sandbox_id="sid",
            image_uri="registry.example.com/img:tag",
            auth=self._auth("myuser", "mypass"),
            owner_uid="uid",
            owner_api_version="sandbox.opensandbox.io/v1alpha1",
            owner_kind="BatchSandbox",
        )
        config = self._decode_docker_config(secret)
        registry_config = config["auths"]["registry.example.com"]
        assert registry_config["username"] == "myuser"
        assert registry_config["password"] == "mypass"
        expected_auth = base64.b64encode(b"myuser:mypass").decode()
        assert registry_config["auth"] == expected_auth

    def test_image_with_port_uses_host_port_as_registry(self):
        secret = build_image_pull_secret(
            sandbox_id="sid",
            image_uri="localhost:5000/myimage:latest",
            auth=self._auth(),
            owner_uid="uid",
            owner_api_version="v1alpha1",
            owner_kind="BatchSandbox",
        )
        config = self._decode_docker_config(secret)
        assert "localhost:5000" in config["auths"]


================================================
FILE: server/tests/k8s/test_informer.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Unit tests for WorkloadInformer."""

import time
from unittest.mock import MagicMock

from src.services.k8s.informer import WorkloadInformer


def _make_informer(**kwargs) -> WorkloadInformer:
    """Return a WorkloadInformer with a mocked list_fn (watch disabled)."""
    list_fn = kwargs.pop("list_fn", MagicMock(return_value={"items": [], "metadata": {}}))
    return WorkloadInformer(list_fn=list_fn, enable_watch=False, **kwargs)


def _list_response(*names: str) -> dict:
    """Build a fake CustomObjects list API response."""
    return {
        "metadata": {"resourceVersion": "42"},
        "items": [{"metadata": {"name": n, "resourceVersion": "1"}} for n in names],
    }


class TestWorkloadInformerInit:
    """Construction and property defaults."""

    def test_has_synced_is_false_before_start(self):
        """has_synced starts as False before the first list completes."""
        informer = _make_informer()
        assert informer.has_synced is False

    def test_get_returns_none_before_sync(self):
        """get() returns None before the cache is populated."""
        informer = _make_informer()
        assert informer.get("anything") is None

    def test_resync_and_watch_params_stored(self):
        """Constructor stores resync and watch timeout parameters."""
        informer = _make_informer(resync_period_seconds=120, watch_timeout_seconds=30)
        assert informer.resync_period_seconds == 120
        assert informer.watch_timeout_seconds == 30

    def test_custom_thread_name_is_stored(self):
        """thread_name parameter is stored and used when start() is called."""
        informer = _make_informer(thread_name="informer-foos-default")
        assert informer._thread_name == "informer-foos-default"

    def test_default_thread_name(self):
        """Default thread_name is 'workload-informer' when not specified."""
        informer = _make_informer()
        assert informer._thread_name == "workload-informer"


class TestWorkloadInformerFullResync:
    """_full_resync populates the cache correctly."""

    def test_full_resync_populates_cache(self):
        """After _full_resync, objects from list_fn are accessible via get()."""
        list_fn = MagicMock(return_value=_list_response("alpha", "beta"))
        informer = _make_informer(list_fn=list_fn)
        informer._full_resync()

        assert informer.get("alpha") is not None
        assert informer.get("beta") is not None
        assert informer.get("gamma") is None

    def test_full_resync_sets_has_synced(self):
        """_full_resync marks the informer as synced."""
        list_fn = MagicMock(return_value=_list_response("x"))
        informer = _make_informer(list_fn=list_fn)
        informer._full_resync()
        assert informer.has_synced is True

    def test_full_resync_stores_resource_version(self):
        """_full_resync saves the resourceVersion from the list metadata."""
        list_fn = MagicMock(return_value=_list_response("x"))
        informer = _make_informer(list_fn=list_fn)
        informer._full_resync()
        assert informer._resource_version == "42"

    def test_full_resync_replaces_stale_cache(self):
        """A second _full_resync replaces the previous cache contents."""
        list_fn = MagicMock(return_value=_list_response("old"))
        informer = _make_informer(list_fn=list_fn)
        informer._full_resync()
        assert informer.get("old") is not None

        list_fn.return_value = _list_response("new")
        informer._full_resync()
        assert informer.get("old") is None
        assert informer.get("new") is not None


class TestWorkloadInformerUpdateCache:
    """update_cache upserts objects into the cache."""

    def test_update_cache_adds_new_object(self):
        """update_cache makes a previously missing object retrievable."""
        informer = _make_informer()
        obj = {"metadata": {"name": "foo", "resourceVersion": "5"}}
        informer.update_cache(obj)
        assert informer.get("foo") == obj

    def test_update_cache_overwrites_existing_object(self):
        """update_cache replaces the cached version of an object."""
        informer = _make_informer()
        informer.update_cache({"metadata": {"name": "foo", "resourceVersion": "1"}})
        updated = {"metadata": {"name": "foo", "resourceVersion": "2"}}
        informer.update_cache(updated)
        assert informer.get("foo") == updated

    def test_update_cache_ignores_object_without_name(self):
        """update_cache silently ignores objects that lack a metadata.name."""
        informer = _make_informer()
        informer.update_cache({"metadata": {}})
        # Cache remains empty — no exception raised
        assert informer._cache == {}

    def test_update_cache_updates_resource_version(self):
        """update_cache advances _resource_version from object metadata."""
        informer = _make_informer()
        informer.update_cache({"metadata": {"name": "foo", "resourceVersion": "99"}})
        assert informer._resource_version == "99"

    def test_update_cache_does_not_downgrade_resource_version(self):
        """update_cache never rolls back _resource_version to an older value."""
        informer = _make_informer()
        informer._resource_version = "200"
        informer.update_cache({"metadata": {"name": "foo", "resourceVersion": "100"}})
        assert informer._resource_version == "200"

    def test_update_cache_advances_resource_version_when_newer(self):
        """update_cache advances _resource_version when the incoming value is strictly newer."""
        informer = _make_informer()
        informer._resource_version = "50"
        informer.update_cache({"metadata": {"name": "foo", "resourceVersion": "99"}})
        assert informer._resource_version == "99"


class TestWorkloadInformerHandleEvent:
    """_handle_event applies watch events to the cache."""

    def test_handle_added_event_inserts_object(self):
        """ADDED event inserts the object into the cache."""
        informer = _make_informer()
        obj = {"metadata": {"name": "bar", "resourceVersion": "10"}}
        informer._handle_event({"type": "ADDED", "object": obj})
        assert informer.get("bar") == obj

    def test_handle_modified_event_replaces_object(self):
        """MODIFIED event replaces the cached object."""
        informer = _make_informer()
        informer._cache["bar"] = {"metadata": {"name": "bar", "resourceVersion": "1"}}
        updated = {"metadata": {"name": "bar", "resourceVersion": "2"}}
        informer._handle_event({"type": "MODIFIED", "object": updated})
        assert informer.get("bar") == updated

    def test_handle_deleted_event_removes_object(self):
        """DELETED event removes the object from the cache."""
        informer = _make_informer()
        informer._cache["bar"] = {"metadata": {"name": "bar"}}
        informer._handle_event({"type": "DELETED", "object": {"metadata": {"name": "bar"}}})
        assert informer.get("bar") is None

    def test_handle_event_ignores_none_object(self):
        """Events with a None object are silently ignored."""
        informer = _make_informer()
        informer._handle_event({"type": "ADDED", "object": None})
        assert informer._cache == {}

    def test_handle_event_ignores_object_without_name(self):
        """Events whose object has no metadata.name are silently ignored."""
        informer = _make_informer()
        informer._handle_event({"type": "ADDED", "object": {"metadata": {}}})
        assert informer._cache == {}

    def test_handle_event_converts_non_dict_object(self):
        """Non-dict objects are converted via to_dict() before caching."""
        informer = _make_informer()
        sdk_obj = MagicMock()
        sdk_obj.to_dict.return_value = {"metadata": {"name": "sdk-obj", "resourceVersion": "3"}}
        informer._handle_event({"type": "ADDED", "object": sdk_obj})
        assert informer.get("sdk-obj") is not None

    def test_handle_event_updates_resource_version(self):
        """_handle_event advances _resource_version from the object metadata."""
        informer = _make_informer()
        informer._handle_event({
            "type": "ADDED",
            "object": {"metadata": {"name": "foo", "resourceVersion": "77"}},
        })
        assert informer._resource_version == "77"

    def test_handle_event_does_not_downgrade_resource_version(self):
        """_handle_event never rolls back _resource_version to an older value."""
        informer = _make_informer()
        informer._resource_version = "200"
        informer._handle_event({
            "type": "MODIFIED",
            "object": {"metadata": {"name": "foo", "resourceVersion": "50"}},
        })
        assert informer._resource_version == "200"


class TestWorkloadInformerStartStop:
    """start/stop thread lifecycle."""

    def test_start_launches_daemon_thread(self):
        """start() spawns a daemon thread that is alive."""
        list_fn = MagicMock(return_value={"items": [], "metadata": {}})
        informer = WorkloadInformer(list_fn=list_fn, enable_watch=False,
                                    resync_period_seconds=9999)
        informer.start()
        assert informer._thread is not None
        assert informer._thread.is_alive()
        informer.stop()

    def test_start_is_idempotent(self):
        """Calling start() twice does not create a second thread."""
        list_fn = MagicMock(return_value={"items": [], "metadata": {}})
        informer = WorkloadInformer(list_fn=list_fn, enable_watch=False,
                                    resync_period_seconds=9999)
        informer.start()
        first_thread = informer._thread
        informer.start()
        assert informer._thread is first_thread
        informer.stop()

    def test_stop_signals_stop_event(self):
        """stop() sets the internal stop event."""
        informer = _make_informer()
        informer.stop()
        assert informer._stop_event.is_set()

    def test_poll_mode_resets_has_synced_after_wait(self):
        """In poll mode (enable_watch=False), _has_synced is reset after each wait so the
        cache is refreshed on the next loop iteration."""
        call_count = 0

        def list_fn():
            nonlocal call_count
            call_count += 1
            return {"items": [], "metadata": {"resourceVersion": str(call_count)}}

        informer = WorkloadInformer(
            list_fn=list_fn,
            enable_watch=False,
            resync_period_seconds=0,  # no wait, loop immediately
        )
        informer.start()

        # Give the thread time to execute at least two full loops
        deadline = time.monotonic() + 2.0
        while call_count < 2 and time.monotonic() < deadline:
            time.sleep(0.01)

        informer.stop()
        assert call_count >= 2, "list_fn should be called more than once in poll mode"


================================================
FILE: server/tests/k8s/test_k8s_client.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Unit tests for K8sClient.
"""

import pytest
from unittest.mock import MagicMock, patch

from kubernetes.client import ApiException

from src.config import KubernetesRuntimeConfig
from src.services.k8s.client import K8sClient


class TestK8sClient:
    """K8sClient unit tests"""
    
    def test_init_with_kubeconfig_loads_successfully(self, k8s_runtime_config):
        """Verify successful initialization with kubeconfig path."""
        with patch('kubernetes.config.load_kube_config') as mock_load:
            client = K8sClient(k8s_runtime_config)

            assert client.config == k8s_runtime_config
            mock_load.assert_called_once_with(
                config_file=k8s_runtime_config.kubeconfig_path
            )

    def test_init_with_incluster_config_loads_successfully(self):
        """Verify successful initialization with in-cluster config."""
        config = KubernetesRuntimeConfig(
            kubeconfig_path=None,
            namespace="test-ns"
        )

        with patch('kubernetes.config.load_incluster_config') as mock_load:
            client = K8sClient(config)

            assert client.config == config
            mock_load.assert_called_once()

    def test_init_with_invalid_kubeconfig_raises_exception(self):
        """Verify exception raised with invalid config file."""
        config = KubernetesRuntimeConfig(
            kubeconfig_path="/invalid/path",
            namespace="test-ns"
        )

        with patch('kubernetes.config.load_kube_config') as mock_load:
            mock_load.side_effect = Exception("Config file not found")

            with pytest.raises(Exception) as exc_info:
                K8sClient(config)

            assert "Failed to load Kubernetes configuration" in str(exc_info.value)

    def test_get_core_v1_api_returns_singleton(self, k8s_runtime_config):
        """Verify CoreV1Api returns singleton."""
        with patch('kubernetes.config.load_kube_config'), \
             patch('kubernetes.client.CoreV1Api') as mock_api_class:

            mock_api_instance = MagicMock()
            mock_api_class.return_value = mock_api_instance

            client = K8sClient(k8s_runtime_config)

            api1 = client.get_core_v1_api()
            api2 = client.get_core_v1_api()

            assert api1 is api2
            assert mock_api_class.call_count == 1

    def test_get_custom_objects_api_returns_singleton(self, k8s_runtime_config):
        """Verify CustomObjectsApi returns singleton."""
        with patch('kubernetes.config.load_kube_config'), \
             patch('kubernetes.client.CustomObjectsApi') as mock_api_class:

            mock_api_instance = MagicMock()
            mock_api_class.return_value = mock_api_instance

            client = K8sClient(k8s_runtime_config)

            api1 = client.get_custom_objects_api()
            api2 = client.get_custom_objects_api()

            assert api1 is api2
            assert mock_api_class.call_count == 1
    
    def test_get_core_v1_api_creates_on_first_call(self, k8s_runtime_config):
        """Verify API client is created on first call, not at init time."""
        with patch('kubernetes.config.load_kube_config'), \
             patch('kubernetes.client.CoreV1Api') as mock_api_class:

            client = K8sClient(k8s_runtime_config)

            assert mock_api_class.call_count == 0
            client.get_core_v1_api()
            assert mock_api_class.call_count == 1

    # ------------------------------------------------------------------
    # Rate limiter initialization
    # ------------------------------------------------------------------

    def test_no_rate_limiters_when_qps_is_zero(self, k8s_runtime_config):
        """read_qps=0 and write_qps=0 means no rate limiters are created."""
        with patch('kubernetes.config.load_kube_config'):
            client = K8sClient(k8s_runtime_config)
            assert client._read_limiter is None
            assert client._write_limiter is None

    def test_read_limiter_created_when_read_qps_set(self):
        """read_qps > 0 creates a read rate limiter."""
        config = KubernetesRuntimeConfig(read_qps=10.0, read_burst=20)
        with patch('kubernetes.config.load_incluster_config'):
            client = K8sClient(config)
            assert client._read_limiter is not None
            assert client._write_limiter is None

    def test_write_limiter_created_when_write_qps_set(self):
        """write_qps > 0 creates a write rate limiter."""
        config = KubernetesRuntimeConfig(write_qps=5.0, write_burst=10)
        with patch('kubernetes.config.load_incluster_config'):
            client = K8sClient(config)
            assert client._read_limiter is None
            assert client._write_limiter is not None

    # ------------------------------------------------------------------
    # CustomObject CRUD
    # ------------------------------------------------------------------

    def _make_client(self, k8s_runtime_config):
        """Return a K8sClient with mocked kubeconfig and raw API handles."""
        with patch('kubernetes.config.load_kube_config'):
            c = K8sClient(k8s_runtime_config)
        c._custom_objects_api = MagicMock()
        c._core_v1_api = MagicMock()
        c._node_v1_api = MagicMock()
        return c

    def test_create_custom_object_delegates_to_api(self, k8s_runtime_config):
        """create_custom_object forwards arguments to the raw API."""
        c = self._make_client(k8s_runtime_config)
        body = {"metadata": {"name": "foo"}}
        c.create_custom_object("g", "v1", "ns", "foos", body)
        c._custom_objects_api.create_namespaced_custom_object.assert_called_once_with(
            group="g", version="v1", namespace="ns", plural="foos", body=body
        )

    def test_get_custom_object_returns_none_on_404(self, k8s_runtime_config):
        """get_custom_object returns None when the API raises a 404."""
        c = self._make_client(k8s_runtime_config)
        c._custom_objects_api.get_namespaced_custom_object.side_effect = ApiException(status=404)
        result = c.get_custom_object("g", "v1", "ns", "foos", "foo-1")
        assert result is None

    def test_get_custom_object_returns_object(self, k8s_runtime_config):
        """get_custom_object returns the object from the API on a successful call."""
        c = self._make_client(k8s_runtime_config)
        obj = {"metadata": {"name": "foo-1"}}
        c._custom_objects_api.get_namespaced_custom_object.return_value = obj
        result = c.get_custom_object("g", "v1", "ns", "foos", "foo-1")
        assert result == obj

    def test_get_custom_object_updates_informer_cache_on_api_hit(self, k8s_runtime_config):
        """get_custom_object calls informer.update_cache with the returned object."""
        c = self._make_client(k8s_runtime_config)
        obj = {"metadata": {"name": "foo-1", "resourceVersion": "10"}}
        c._custom_objects_api.get_namespaced_custom_object.return_value = obj
        fake_informer = MagicMock()
        fake_informer.has_synced = False
        c._informers[("g", "v1", "foos", "ns")] = fake_informer
        c.config = MagicMock(informer_enabled=True,
                             informer_resync_seconds=300,
                             informer_watch_timeout_seconds=60,
                             read_qps=0.0, write_qps=0.0)
        c.get_custom_object("g", "v1", "ns", "foos", "foo-1")
        fake_informer.update_cache.assert_called_once_with(obj)

    def test_get_custom_object_reraises_non_404(self, k8s_runtime_config):
        """get_custom_object re-raises non-404 API exceptions."""
        c = self._make_client(k8s_runtime_config)
        c._custom_objects_api.get_namespaced_custom_object.side_effect = ApiException(status=500)
        with pytest.raises(ApiException):
            c.get_custom_object("g", "v1", "ns", "foos", "foo-1")

    def test_get_custom_object_returns_cached_when_synced(self, k8s_runtime_config):
        """get_custom_object returns cached value and skips API when informer is synced."""
        c = self._make_client(k8s_runtime_config)
        cached_obj = {"metadata": {"name": "foo-1"}}
        fake_informer = MagicMock()
        fake_informer.has_synced = True
        fake_informer.get.return_value = cached_obj
        c._informers[("g", "v1", "foos", "ns")] = fake_informer
        # Disable real informer creation
        c.config = MagicMock(informer_enabled=True,
                             informer_resync_seconds=300,
                             informer_watch_timeout_seconds=60,
                             read_qps=0.0, write_qps=0.0)

        result = c.get_custom_object("g", "v1", "ns", "foos", "foo-1")

        assert result is cached_obj
        c._custom_objects_api.get_namespaced_custom_object.assert_not_called()

    def test_get_custom_object_skips_informer_when_disabled(self, k8s_runtime_config):
        """get_custom_object bypasses informer and calls API when informer_enabled=False."""
        c = self._make_client(k8s_runtime_config)
        c.config = MagicMock(informer_enabled=False, read_qps=0.0)
        obj = {"metadata": {"name": "foo-1"}}
        c._custom_objects_api.get_namespaced_custom_object.return_value = obj
        result = c.get_custom_object("g", "v1", "ns", "foos", "foo-1")
        assert result == obj
        c._custom_objects_api.get_namespaced_custom_object.assert_called_once()

    def test_list_custom_objects_returns_items(self, k8s_runtime_config):
        """list_custom_objects returns the items list from the API response."""
        c = self._make_client(k8s_runtime_config)
        c._custom_objects_api.list_namespaced_custom_object.return_value = {
            "items": [{"metadata": {"name": "a"}}, {"metadata": {"name": "b"}}]
        }
        result = c.list_custom_objects("g", "v1", "ns", "foos")
        assert len(result) == 2

    def test_list_custom_objects_returns_empty_on_404(self, k8s_runtime_config):
        """list_custom_objects returns [] when the API raises a 404."""
        c = self._make_client(k8s_runtime_config)
        c._custom_objects_api.list_namespaced_custom_object.side_effect = ApiException(status=404)
        result = c.list_custom_objects("g", "v1", "ns", "foos")
        assert result == []

    def test_list_custom_objects_reraises_non_404(self, k8s_runtime_config):
        """list_custom_objects re-raises non-404 API exceptions."""
        c = self._make_client(k8s_runtime_config)
        c._custom_objects_api.list_namespaced_custom_object.side_effect = ApiException(status=500)
        with pytest.raises(ApiException):
            c.list_custom_objects("g", "v1", "ns", "foos")

    def test_delete_custom_object_delegates_to_api(self, k8s_runtime_config):
        """delete_custom_object forwards arguments to the raw API."""
        c = self._make_client(k8s_runtime_config)
        c.delete_custom_object("g", "v1", "ns", "foos", "foo-1", grace_period_seconds=0)
        c._custom_objects_api.delete_namespaced_custom_object.assert_called_once_with(
            group="g", version="v1", namespace="ns", plural="foos",
            name="foo-1", grace_period_seconds=0
        )

    def test_patch_custom_object_delegates_to_api(self, k8s_runtime_config):
        """patch_custom_object forwards arguments to the raw API."""
        c = self._make_client(k8s_runtime_config)
        body = {"spec": {"replicas": 2}}
        c.patch_custom_object("g", "v1", "ns", "foos", "foo-1", body)
        c._custom_objects_api.patch_namespaced_custom_object.assert_called_once_with(
            group="g", version="v1", namespace="ns", plural="foos",
            name="foo-1", body=body
        )

    # ------------------------------------------------------------------
    # Secret / Pod / RuntimeClass
    # ------------------------------------------------------------------

    def test_create_secret_delegates_to_api(self, k8s_runtime_config):
        """create_secret forwards to CoreV1Api.create_namespaced_secret."""
        c = self._make_client(k8s_runtime_config)
        body = {"metadata": {"name": "my-secret"}}
        c.create_secret("ns", body)
        c._core_v1_api.create_namespaced_secret.assert_called_once_with(
            namespace="ns", body=body
        )

    def test_list_pods_returns_items(self, k8s_runtime_config):
        """list_pods returns the items attribute from the API response."""
        c = self._make_client(k8s_runtime_config)
        mock_pod = MagicMock()
        c._core_v1_api.list_namespaced_pod.return_value = MagicMock(items=[mock_pod])
        result = c.list_pods("ns", label_selector="app=foo")
        assert result == [mock_pod]
        c._core_v1_api.list_namespaced_pod.assert_called_once_with(
            namespace="ns", label_selector="app=foo"
        )

    def test_list_pods_returns_empty_list_on_exception(self, k8s_runtime_config):
        """list_pods re-raises exceptions from the API."""
        c = self._make_client(k8s_runtime_config)
        c._core_v1_api.list_namespaced_pod.side_effect = Exception("network error")
        with pytest.raises(Exception, match="network error"):
            c.list_pods("ns")

    def test_read_runtime_class_delegates_to_api(self, k8s_runtime_config):
        """read_runtime_class forwards to NodeV1Api.read_runtime_class."""
        c = self._make_client(k8s_runtime_config)
        c._node_v1_api.read_runtime_class.return_value = MagicMock(metadata=MagicMock(name="gvisor"))
        result = c.read_runtime_class("gvisor")
        c._node_v1_api.read_runtime_class.assert_called_once_with("gvisor")
        assert result is not None

    # ------------------------------------------------------------------
    # Write limiter integration
    # ------------------------------------------------------------------

    def test_write_limiter_called_on_create(self, k8s_runtime_config):
        """create_custom_object acquires the write limiter before calling the API."""
        c = self._make_client(k8s_runtime_config)
        mock_limiter = MagicMock()
        c._write_limiter = mock_limiter
        c.create_custom_object("g", "v1", "ns", "foos", {})
        mock_limiter.acquire.assert_called_once()

    def test_write_limiter_called_on_delete(self, k8s_runtime_config):
        """delete_custom_object acquires the write limiter before calling the API."""
        c = self._make_client(k8s_runtime_config)
        mock_limiter = MagicMock()
        c._write_limiter = mock_limiter
        c.delete_custom_object("g", "v1", "ns", "foos", "foo-1")
        mock_limiter.acquire.assert_called_once()

    def test_write_limiter_called_on_patch(self, k8s_runtime_config):
        """patch_custom_object acquires the write limiter before calling the API."""
        c = self._make_client(k8s_runtime_config)
        mock_limiter = MagicMock()
        c._write_limiter = mock_limiter
        c.patch_custom_object("g", "v1", "ns", "foos", "foo-1", {})
        mock_limiter.acquire.assert_called_once()

    def test_write_limiter_called_on_create_secret(self, k8s_runtime_config):
        """create_secret acquires the write limiter before calling the API."""
        c = self._make_client(k8s_runtime_config)
        mock_limiter = MagicMock()
        c._write_limiter = mock_limiter
        c.create_secret("ns", {})
        mock_limiter.acquire.assert_called_once()

    def test_read_limiter_called_on_get(self, k8s_runtime_config):
        """get_custom_object acquires the read limiter before calling the API."""
        c = self._make_client(k8s_runtime_config)
        c.config = MagicMock(informer_enabled=False, read_qps=0.0)
        c._custom_objects_api.get_namespaced_custom_object.return_value = {}
        mock_limiter = MagicMock()
        c._read_limiter = mock_limiter
        c.get_custom_object("g", "v1", "ns", "foos", "foo-1")
        mock_limiter.acquire.assert_called_once()

    def test_read_limiter_called_on_list(self, k8s_runtime_config):
        """list_custom_objects acquires the read limiter before calling the API."""
        c = self._make_client(k8s_runtime_config)
        c._custom_objects_api.list_namespaced_custom_object.return_value = {"items": []}
        mock_limiter = MagicMock()
        c._read_limiter = mock_limiter
        c.list_custom_objects("g", "v1", "ns", "foos")
        mock_limiter.acquire.assert_called_once()

    def test_read_limiter_called_on_list_pods(self, k8s_runtime_config):
        """list_pods acquires the read limiter before calling the API."""
        c = self._make_client(k8s_runtime_config)
        c._core_v1_api.list_namespaced_pod.return_value = MagicMock(items=[])
        mock_limiter = MagicMock()
        c._read_limiter = mock_limiter
        c.list_pods("ns")
        mock_limiter.acquire.assert_called_once()

    def test_read_limiter_called_on_read_runtime_class(self, k8s_runtime_config):
        """read_runtime_class acquires the read limiter before calling the API."""
        c = self._make_client(k8s_runtime_config)
        mock_limiter = MagicMock()
        c._read_limiter = mock_limiter
        c.read_runtime_class("gvisor")
        mock_limiter.acquire.assert_called_once()


================================================
FILE: server/tests/k8s/test_kubernetes_service.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Unit tests for KubernetesSandboxService.
"""

import pytest
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, patch
from fastapi import HTTPException

from src.services.k8s.kubernetes_service import KubernetesSandboxService
from src.services.constants import (
    OPEN_SANDBOX_EGRESS_AUTH_HEADER,
    SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY,
    SANDBOX_MANUAL_CLEANUP_LABEL,
    SandboxErrorCodes,
)
from src.api.schema import ImageAuth, ListSandboxesRequest, NetworkPolicy
from src.config import EGRESS_MODE_DNS, EGRESS_MODE_DNS_NFT, EgressConfig
from src.api.schema import Endpoint


class TestKubernetesSandboxServiceInit:
    """KubernetesSandboxService initialization tests"""
    
    def test_init_with_valid_config_succeeds(self, k8s_app_config):
        """
        Test case: Successful initialization with valid config
        
        Purpose: Verify that service can be successfully initialized with valid Kubernetes config
        """
        with patch('src.services.k8s.kubernetes_service.K8sClient') as mock_k8s_client, \
             patch('src.services.k8s.kubernetes_service.create_workload_provider') as mock_create_provider:
            
            mock_provider = MagicMock()
            mock_create_provider.return_value = mock_provider
            
            service = KubernetesSandboxService(k8s_app_config)
            
            assert service.namespace == k8s_app_config.kubernetes.namespace
            assert service.execd_image == k8s_app_config.runtime.execd_image
            mock_k8s_client.assert_called_once_with(k8s_app_config.kubernetes)
            mock_create_provider.assert_called_once()
    
    def test_init_without_kubernetes_config_raises_error(self, app_config_no_k8s):
        """
        Test case: Raises exception when Kubernetes config is missing
        
        Purpose: Verify that ValueError is raised when kubernetes section is missing from config
        """
        # app_config_no_k8s still has kubernetes config, just without kubeconfig
        # This will cause K8sClient initialization to fail and raise HTTPException
        with pytest.raises(HTTPException) as exc_info:
            KubernetesSandboxService(app_config_no_k8s)
        
        assert exc_info.value.status_code == 503
        assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_INITIALIZATION_ERROR
    
    def test_init_with_wrong_runtime_type_raises_error(self, app_config_docker):
        """
        Test case: Raises exception with wrong runtime type
        
        Purpose: Verify that ValueError is raised when runtime.type is not 'kubernetes'
        """
        with pytest.raises(ValueError, match="requires runtime.type = 'kubernetes'"):
            KubernetesSandboxService(app_config_docker)
    
    def test_init_with_k8s_client_failure_raises_http_exception(self, k8s_app_config):
        """
        Test case: Raises HTTPException when K8sClient initialization fails
        
        Purpose: Verify that correct HTTPException is raised when K8sClient initialization fails
        """
        with patch('src.services.k8s.kubernetes_service.K8sClient') as mock_k8s_client:
            mock_k8s_client.side_effect = Exception("Failed to load kubeconfig")
            
            with pytest.raises(HTTPException) as exc_info:
                KubernetesSandboxService(k8s_app_config)
            
            assert exc_info.value.status_code == 503
            assert "code" in exc_info.value.detail
            assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_INITIALIZATION_ERROR


class TestKubernetesSandboxServiceCreate:
    """KubernetesSandboxService create_sandbox tests"""
    
    @pytest.mark.asyncio
    async def test_create_sandbox_with_valid_request_succeeds(
        self, k8s_service, create_sandbox_request, mock_workload
    ):
        """
        Test case: Successfully create sandbox with valid request
        
        Purpose: Verify that sandbox can be successfully created with valid CreateSandboxRequest
        """
        # Mock workload provider
        k8s_service.workload_provider.create_workload.return_value = {
            "name": "test-sandbox-123",
            "uid": "abc-123",
        }
        k8s_service.workload_provider.get_workload.return_value = mock_workload
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Running",
            "reason": "",
            "message": "Pod is running",
            "last_transition_at": datetime.now(timezone.utc),
        }
        k8s_service.workload_provider.get_endpoint_info.return_value = "10.244.0.5:8080"
        k8s_service.workload_provider.get_expiration.return_value = datetime.now(timezone.utc) + timedelta(hours=1)
        
        response = await k8s_service.create_sandbox(create_sandbox_request)
        
        # CreateSandboxResponse uses 'id' field
        assert response.id is not None
        assert response.status.state == "Running"
        k8s_service.workload_provider.create_workload.assert_called_once()

    @pytest.mark.asyncio
    async def test_create_sandbox_uses_configured_timeout_and_poll_interval(
        self, k8s_service, create_sandbox_request, mock_workload
    ):
        """
        Test case: create_sandbox uses timeout and poll_interval from config

        Purpose: Verify that sandbox_create_timeout_seconds and
        sandbox_create_poll_interval_seconds are read from KubernetesRuntimeConfig
        and forwarded to _wait_for_sandbox_ready.
        """


        k8s_service.workload_provider.create_workload.return_value = {
            "name": "test-sandbox-123",
            "uid": "abc-123",
        }
        k8s_service.workload_provider.get_workload.return_value = mock_workload
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Running",
            "reason": "",
            "message": "Pod is running",
            "last_transition_at": datetime.now(timezone.utc),
        }

        # Override config values
        k8s_service.app_config.kubernetes.sandbox_create_timeout_seconds = 120
        k8s_service.app_config.kubernetes.sandbox_create_poll_interval_seconds = 0.5

        with patch.object(k8s_service, "_wait_for_sandbox_ready", wraps=k8s_service._wait_for_sandbox_ready) as mock_wait:
            await k8s_service.create_sandbox(create_sandbox_request)

        mock_wait.assert_called_once()
        _, kwargs = mock_wait.call_args
        assert kwargs["timeout_seconds"] == 120
        assert kwargs["poll_interval_seconds"] == 0.5

    @pytest.mark.asyncio
    async def test_create_sandbox_rejects_image_auth_when_provider_not_supported(
        self, k8s_service, create_sandbox_request
    ):
        k8s_service.workload_provider.supports_image_auth.return_value = False
        create_sandbox_request.image.auth = ImageAuth(
            username="registry-user",
            password="registry-pass",
        )

        with pytest.raises(HTTPException) as exc_info:
            await k8s_service.create_sandbox(create_sandbox_request)

        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_PARAMETER
        k8s_service.workload_provider.create_workload.assert_not_called()

    @pytest.mark.asyncio
    async def test_create_sandbox_allows_image_auth_when_provider_supported(
        self, k8s_service, create_sandbox_request
    ):
        k8s_service.workload_provider.supports_image_auth.return_value = True
        create_sandbox_request.image.auth = ImageAuth(
            username="registry-user",
            password="registry-pass",
        )
        k8s_service.workload_provider.create_workload.return_value = {
            "name": "test-id", "uid": "uid-1"
        }
        k8s_service.workload_provider.get_workload.return_value = MagicMock()
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Running", "reason": "", "message": "",
            "last_transition_at": datetime.now(timezone.utc),
        }

        # Should not raise
        await k8s_service.create_sandbox(create_sandbox_request)
        k8s_service.workload_provider.create_workload.assert_called_once()

    @pytest.mark.asyncio
    async def test_create_sandbox_with_no_timeout_calls_provider_with_expires_at_none_and_manual_cleanup_label(
        self, k8s_service, create_sandbox_request
    ):
        """When timeout is None (manual cleanup), provider receives expires_at=None and manual-cleanup label."""
        create_sandbox_request.timeout = None
        k8s_service.workload_provider.create_workload.return_value = {
            "name": "test-id", "uid": "uid-1"
        }
        k8s_service.workload_provider.get_workload.return_value = MagicMock()
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Running", "reason": "", "message": "",
            "last_transition_at": datetime.now(timezone.utc),
        }

        await k8s_service.create_sandbox(create_sandbox_request)

        k8s_service.workload_provider.create_workload.assert_called_once()
        _, kwargs = k8s_service.workload_provider.create_workload.call_args
        assert kwargs["expires_at"] is None
        assert kwargs["labels"].get(SANDBOX_MANUAL_CLEANUP_LABEL) == "true"

    @pytest.mark.asyncio
    async def test_create_sandbox_with_network_policy_passes_egress_token_and_annotations(
        self, k8s_service, create_sandbox_request
    ):
        create_sandbox_request.network_policy = NetworkPolicy(default_action="deny", egress=[])
        k8s_service.app_config.egress = EgressConfig(image="opensandbox/egress:v1.0.3")
        k8s_service.workload_provider.create_workload.return_value = {
            "name": "test-id", "uid": "uid-1"
        }
        k8s_service.workload_provider.get_workload.return_value = MagicMock()
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Running", "reason": "", "message": "",
            "last_transition_at": datetime.now(timezone.utc),
        }

        with patch(
            "src.services.k8s.kubernetes_service.generate_egress_token",
            return_value="egress-token",
        ):
            await k8s_service.create_sandbox(create_sandbox_request)

        _, kwargs = k8s_service.workload_provider.create_workload.call_args
        assert kwargs["egress_auth_token"] == "egress-token"
        assert kwargs["egress_mode"] == EGRESS_MODE_DNS
        assert kwargs["annotations"][SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY] == "egress-token"

    @pytest.mark.asyncio
    async def test_create_sandbox_with_network_policy_passes_egress_mode_dns_nft_from_config(
        self, k8s_service, create_sandbox_request
    ):
        create_sandbox_request.network_policy = NetworkPolicy(default_action="deny", egress=[])
        k8s_service.app_config.egress = EgressConfig(
            image="opensandbox/egress:v1.0.3",
            mode=EGRESS_MODE_DNS_NFT,
        )
        k8s_service.workload_provider.create_workload.return_value = {
            "name": "test-id", "uid": "uid-1"
        }
        k8s_service.workload_provider.get_workload.return_value = MagicMock()
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Running", "reason": "", "message": "",
            "last_transition_at": datetime.now(timezone.utc),
        }

        with patch(
            "src.services.k8s.kubernetes_service.generate_egress_token",
            return_value="egress-token",
        ):
            await k8s_service.create_sandbox(create_sandbox_request)

        _, kwargs = k8s_service.workload_provider.create_workload.call_args
        assert kwargs["egress_mode"] == EGRESS_MODE_DNS_NFT

    def test_get_endpoint_merges_egress_auth_header_from_instance_metadata(
        self, k8s_service
    ):
        k8s_service.workload_provider.get_workload.return_value = {
            "metadata": {
                "annotations": {
                    SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY: "egress-token",
                }
            }
        }
        k8s_service.workload_provider.get_endpoint_info.return_value = Endpoint(
            endpoint="gateway.example.com",
            headers={"OpenSandbox-Ingress-To": "sbx-123-44772"},
        )

        endpoint = k8s_service.get_endpoint("sbx-123", 44772)

        assert endpoint.endpoint == "gateway.example.com"
        assert endpoint.headers == {
            "OpenSandbox-Ingress-To": "sbx-123-44772",
            OPEN_SANDBOX_EGRESS_AUTH_HEADER: "egress-token",
        }

    @pytest.mark.asyncio
    async def test_create_sandbox_rejects_timeout_above_configured_maximum(
        self, k8s_service, create_sandbox_request
    ):
        k8s_service.app_config.server.max_sandbox_timeout_seconds = 3600
        create_sandbox_request.timeout = 7200

        with pytest.raises(HTTPException) as exc_info:
            await k8s_service.create_sandbox(create_sandbox_request)

        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_PARAMETER
        assert "configured maximum of 3600s" in exc_info.value.detail["message"]
        k8s_service.workload_provider.create_workload.assert_not_called()


class TestWaitForSandboxReady:
    """_wait_for_sandbox_ready method tests"""
    
    @pytest.mark.asyncio
    async def test_wait_for_running_pod_succeeds(self, k8s_service, mock_workload):
        """
        Test case: Successfully wait for Running Pod
        
        Purpose: Verify that it returns immediately when Pod enters Running state
        """
        k8s_service.workload_provider.get_workload.return_value = mock_workload
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Running",
            "reason": "",
            "message": "Pod is running",
            "last_transition_at": datetime.now(timezone.utc),
        }
        
        result = await k8s_service._wait_for_sandbox_ready("test-sandbox-id", timeout_seconds=10)
        
        assert result == mock_workload
    
    @pytest.mark.asyncio
    async def test_wait_for_pending_then_running_succeeds(self, k8s_service, mock_workload):
        """
        Test case: Successfully wait from Pending to Allocated to Running
        
        Purpose: Verify normal waiting when Pod transitions through Pending -> Allocated -> Running
        """
        # Mock state transition: Pending -> Allocated -> Running
        status_sequence = [
            {"state": "Pending", "reason": "", "message": "Pending", "last_transition_at": datetime.now(timezone.utc)},
            {"state": "Allocated", "reason": "IP_ASSIGNED", "message": "IP assigned", "last_transition_at": datetime.now(timezone.utc)},
            {"state": "Running", "reason": "", "message": "Running", "last_transition_at": datetime.now(timezone.utc)},
        ]
        
        k8s_service.workload_provider.get_workload.return_value = mock_workload
        k8s_service.workload_provider.get_status.side_effect = status_sequence
        
        result = await k8s_service._wait_for_sandbox_ready("test-sandbox-id", timeout_seconds=10, poll_interval_seconds=0.1)
        
        assert result == mock_workload
        assert k8s_service.workload_provider.get_status.call_count == 2
    
    @pytest.mark.asyncio
    async def test_wait_for_allocated_pod_returns_immediately(self, k8s_service, mock_workload):
        """
        Test case: Returns immediately when Pod reaches Allocated state (IP assigned)
        
        Purpose: Verify that Allocated state (IP assigned) is treated as ready
        """
        k8s_service.workload_provider.get_workload.return_value = mock_workload
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Allocated",
            "reason": "IP_ASSIGNED",
            "message": "Pod has IP assigned",
            "last_transition_at": datetime.now(timezone.utc),
        }
        
        result = await k8s_service._wait_for_sandbox_ready("test-sandbox-id", timeout_seconds=10)
        
        assert result == mock_workload
    
    @pytest.mark.asyncio
    async def test_wait_timeout_raises_exception(self, k8s_service, mock_workload):
        """
        Test case: Raises exception on wait timeout
        
        Purpose: Verify that HTTPException is raised when wait times out
        """
        k8s_service.workload_provider.get_workload.return_value = mock_workload
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Pending",
            "reason": "",
            "message": "Still pending",
            "last_transition_at": datetime.now(timezone.utc),
        }
        
        with pytest.raises(HTTPException) as exc_info:
            await k8s_service._wait_for_sandbox_ready("test-sandbox-id", timeout_seconds=1, poll_interval_seconds=0.5)
        
        assert exc_info.value.status_code == 504  # Gateway Timeout
        assert "timeout" in exc_info.value.detail["message"].lower()


class TestKubernetesSandboxServiceRenew:
    def test_renew_expiration_rejects_manual_cleanup_sandbox(self, k8s_service):
        k8s_service.workload_provider.get_workload.return_value = MagicMock()
        k8s_service.workload_provider.get_expiration.return_value = None
        request = MagicMock(expires_at=datetime.now(timezone.utc) + timedelta(hours=1))

        with pytest.raises(HTTPException) as exc_info:
            k8s_service.renew_expiration("test-sandbox-id", request)

        assert exc_info.value.status_code == 409
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_EXPIRATION
        assert (
            exc_info.value.detail["message"]
            == "Sandbox test-sandbox-id does not have automatic expiration enabled."
        )


class TestGetSandbox:
    """get_sandbox method tests"""
    
    def test_get_existing_sandbox_succeeds(self, k8s_service, mock_workload):
        """
        Test case: Successfully get existing sandbox
        
        Purpose: Verify that existing sandbox details can be successfully retrieved
        """
        k8s_service.workload_provider.get_workload.return_value = mock_workload
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Running",
            "reason": "",
            "message": "Running",
            "last_transition_at": datetime.now(timezone.utc),
        }
        k8s_service.workload_provider.get_endpoint_info.return_value = "10.0.0.1:8080"
        k8s_service.workload_provider.get_expiration.return_value = datetime.now(timezone.utc) + timedelta(hours=1)
        
        # Use sandbox_id from mock_workload
        sandbox = k8s_service.get_sandbox("test-sandbox-123")
        
        # Sandbox uses 'id' field
        assert sandbox.id == "test-sandbox-123"
        assert sandbox.status.state == "Running"
    
    def test_get_nonexistent_sandbox_raises_404(self, k8s_service):
        """
        Test case: Raises 404 for nonexistent sandbox
        
        Purpose: Verify that 404 exception is raised when getting nonexistent sandbox
        """
        k8s_service.workload_provider.get_workload.return_value = None
        
        with pytest.raises(HTTPException) as exc_info:
            k8s_service.get_sandbox("nonexistent-id")
        
        assert exc_info.value.status_code == 404
        assert "not found" in exc_info.value.detail["message"].lower()


class TestDeleteSandbox:
    """delete_sandbox method tests"""
    
    def test_delete_existing_sandbox_succeeds(self, k8s_service, mock_workload):
        """
        Test case: Successfully delete existing sandbox
        
        Purpose: Verify that existing sandbox can be successfully deleted
        """
        k8s_service.workload_provider.get_workload.return_value = mock_workload
        k8s_service.workload_provider.delete_workload.return_value = None
        
        k8s_service.delete_sandbox("test-sandbox-id")
        
        k8s_service.workload_provider.delete_workload.assert_called_once_with(
            sandbox_id="test-sandbox-id",
            namespace=k8s_service.namespace
        )
    
    def test_delete_nonexistent_sandbox_raises_404(self, k8s_service):
        """
        Test case: Raises 404 when deleting nonexistent sandbox
        
        Purpose: Verify that 404 exception is raised when deleting nonexistent sandbox
        """
        # Mock delete_workload to raise exception containing "not found"
        k8s_service.workload_provider.delete_workload.side_effect = Exception("Sandbox not found")
        
        with pytest.raises(HTTPException) as exc_info:
            k8s_service.delete_sandbox("nonexistent-id")
        
        assert exc_info.value.status_code == 404


class TestListSandboxes:
    """list_sandboxes method tests"""
    
    def test_list_all_sandboxes_succeeds(self, k8s_service, mock_workload):
        """
        Test case: Successfully list all sandboxes
        
        Purpose: Verify that all sandboxes can be successfully listed
        """
        k8s_service.workload_provider.list_workloads.return_value = [mock_workload]
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Running",
            "reason": "",
            "message": "Running",
            "last_transition_at": datetime.now(timezone.utc),
        }
        k8s_service.workload_provider.get_endpoint_info.return_value = "10.0.0.1:8080"
        k8s_service.workload_provider.get_expiration.return_value = datetime.now(timezone.utc) + timedelta(hours=1)
        
        from src.api.schema import PaginationRequest
        request = ListSandboxesRequest(pagination=PaginationRequest(page=1, page_size=20))
        response = k8s_service.list_sandboxes(request)
        
        # Sandbox in items uses 'id' field
        assert len(response.items) == 1
        assert response.items[0].id == "test-sandbox-123"
        assert response.pagination.total_items == 1
    
    def test_list_sandboxes_with_pagination(self, k8s_service, mock_workload):
        """
        Test case: List sandboxes with pagination
        
        Purpose: Verify that pagination functionality works correctly
        """
        # Create multiple mock workloads using mock_workload as template
        workloads = []
        for i in range(10):
            workload = {
                "metadata": {
                    "name": f"sandbox-{i}",
                    "uid": f"uid-{i}",
                    "labels": {
                        "opensandbox.io/id": f"sandbox-{i}",
                    },
                    "annotations": mock_workload["metadata"]["annotations"].copy(),
                    "creationTimestamp": datetime.now(timezone.utc).isoformat(),
                },
                "spec": {},
                "status": {},
            }
            workloads.append(workload)
        
        k8s_service.workload_provider.list_workloads.return_value = workloads
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Running",
            "reason": "",
            "message": "Running",
            "last_transition_at": datetime.now(timezone.utc),
        }
        k8s_service.workload_provider.get_endpoint_info.return_value = "10.0.0.1:8080"
        k8s_service.workload_provider.get_expiration.return_value = datetime.now(timezone.utc) + timedelta(hours=1)
        
        from src.api.schema import PaginationRequest
        request = ListSandboxesRequest(pagination=PaginationRequest(page=1, page_size=5))
        response = k8s_service.list_sandboxes(request)
        
        assert len(response.items) == 5
        assert response.pagination.page == 1
        assert response.pagination.page_size == 5
        assert response.pagination.total_items == 10
        assert response.pagination.total_pages == 2
    
    def test_list_sandboxes_sorted_by_creation_time(self, k8s_service, mock_workload):
        """
        Test case: Verify sandboxes are sorted by creation time (newest first)
        
        Purpose: Verify that list_sandboxes returns sandboxes sorted by created_at in descending order
        """
        # Create workloads with different creation times
        base_time = datetime.now(timezone.utc)
        workloads = []
        
        # Create sandboxes with specific creation times
        # We'll create them in random order to verify sorting works
        creation_times = [
            base_time - timedelta(hours=5),  # Oldest
            base_time - timedelta(hours=2),
            base_time - timedelta(hours=1),
            base_time - timedelta(minutes=30),
            base_time,  # Newest
        ]
        
        for i, created_at in enumerate(creation_times):
            workload = {
                "metadata": {
                    "name": f"sandbox-{i}",
                    "uid": f"uid-{i}",
                    "labels": {
                        "opensandbox.io/id": f"sandbox-{i}",
                    },
                    "annotations": mock_workload["metadata"]["annotations"].copy(),
                    "creationTimestamp": created_at.isoformat(),
                },
                "spec": {},
                "status": {},
            }
            workloads.append(workload)
        
        k8s_service.workload_provider.list_workloads.return_value = workloads
        k8s_service.workload_provider.get_status.return_value = {
            "state": "Running",
            "reason": "",
            "message": "Running",
            "last_transition_at": datetime.now(timezone.utc),
        }
        k8s_service.workload_provider.get_endpoint_info.return_value = "10.0.0.1:8080"
        k8s_service.workload_provider.get_expiration.return_value = datetime.now(timezone.utc) + timedelta(hours=1)
        
        from src.api.schema import PaginationRequest
        request = ListSandboxesRequest(pagination=PaginationRequest(page=1, page_size=10))
        response = k8s_service.list_sandboxes(request)
        
        # Verify all items are returned
        assert len(response.items) == 5
        
        # Verify they are sorted by creation time (newest first)
        # The order should be: index 4 (newest), 3, 2, 1, 0 (oldest)
        assert response.items[0].id == "sandbox-4"  # Newest
        assert response.items[1].id == "sandbox-3"
        assert response.items[2].id == "sandbox-2"
        assert response.items[3].id == "sandbox-1"
        assert response.items[4].id == "sandbox-0"  # Oldest
        
        # Also verify the creation times are in descending order
        for i in range(len(response.items) - 1):
            assert response.items[i].created_at >= response.items[i + 1].created_at


class TestRenewExpiration:
    """renew_sandbox_expiration method tests"""
    
    def test_renew_expiration_succeeds(self, k8s_service, mock_workload):
        """
        Test case: Successfully renew expiration
        
        Purpose: Verify that sandbox expiration can be successfully renewed
        """
        new_expiration = datetime.now(timezone.utc) + timedelta(hours=2)
        
        k8s_service.workload_provider.get_workload.return_value = mock_workload
        k8s_service.workload_provider.update_expiration.return_value = None
        k8s_service.workload_provider.get_expiration.return_value = new_expiration
        
        from src.api.schema import RenewSandboxExpirationRequest
        request = RenewSandboxExpirationRequest(expires_at=new_expiration)
        
        response = k8s_service.renew_expiration("test-sandbox-id", request)
        
        assert response.expires_at == new_expiration
        k8s_service.workload_provider.update_expiration.assert_called_once_with(
            sandbox_id="test-sandbox-id",
            namespace=k8s_service.namespace,
            expires_at=new_expiration
        )
    
    def test_renew_with_past_time_raises_error(self, k8s_service, mock_workload):
        """
        Test case: Raises exception when renewing with past time
        
        Purpose: Verify that HTTPException is raised when renewing with past time
        """
        past_time = datetime.now(timezone.utc) - timedelta(hours=1)
        
        k8s_service.workload_provider.get_workload.return_value = mock_workload
        
        from src.api.schema import RenewSandboxExpirationRequest
        request = RenewSandboxExpirationRequest(expires_at=past_time)
        
        with pytest.raises(HTTPException) as exc_info:
            k8s_service.renew_expiration("test-sandbox-id", request)
        
        assert exc_info.value.status_code == 400

    def test_renew_returns_409_when_sandbox_has_no_expiration(self, k8s_service):
        """Renew is rejected with 409 when sandbox has no TTL (manual cleanup)."""
        k8s_service.workload_provider.get_workload.return_value = MagicMock()
        k8s_service.workload_provider.get_expiration.return_value = None
        from src.api.schema import RenewSandboxExpirationRequest
        request = RenewSandboxExpirationRequest(
            expires_at=datetime.now(timezone.utc) + timedelta(hours=1)
        )

        with pytest.raises(HTTPException) as exc_info:
            k8s_service.renew_expiration("no-ttl-sandbox", request)

        assert exc_info.value.status_code == 409
        assert "does not have automatic expiration" in exc_info.value.detail["message"]
        k8s_service.workload_provider.update_expiration.assert_not_called()


================================================
FILE: server/tests/k8s/test_provider_factory.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Unit tests for provider_factory.
"""

import pytest
from unittest.mock import patch

from src.config import AgentSandboxRuntimeConfig
from src.services.k8s.provider_factory import (
    register_provider,
    create_workload_provider,
    list_available_providers,
    PROVIDER_TYPE_BATCHSANDBOX,
    PROVIDER_TYPE_AGENT_SANDBOX,
)
from src.services.k8s.workload_provider import WorkloadProvider
from src.services.k8s.batchsandbox_provider import BatchSandboxProvider
from src.services.k8s.agent_sandbox_provider import AgentSandboxProvider


class TestProviderFactory:
    """provider_factory unit tests"""
    
    def test_register_and_create_batchsandbox_provider(self, mock_k8s_client, k8s_app_config):
        """Test case: Register and create BatchSandbox provider

        Purpose: Verify that BatchSandbox provider can be created through factory method
        """
        provider = create_workload_provider(
            PROVIDER_TYPE_BATCHSANDBOX,
            mock_k8s_client,
            k8s_app_config,
        )
        
        assert isinstance(provider, BatchSandboxProvider)
        assert provider.k8s_client == mock_k8s_client

    def test_register_and_create_agent_sandbox_provider(
        self,
        mock_k8s_client,
        agent_sandbox_app_config,
        tmp_path,
    ):
        """Test case: Register and create agent-sandbox provider

        Purpose: Verify that AgentSandbox provider can be created through factory method
        """
        template_file = tmp_path / "agent_sandbox_template.yaml"
        template_file.write_text(
            """
metadata:
  annotations:
    managed-by: opensandbox
spec:
  podTemplate:
    spec:
      nodeSelector:
        workload: sandbox
"""
        )

        agent_config = AgentSandboxRuntimeConfig(
            template_file=str(template_file),
            shutdown_policy="Retain",
            ingress_enabled=True,
        )
        agent_sandbox_app_config.agent_sandbox = agent_config
        provider = create_workload_provider(
            PROVIDER_TYPE_AGENT_SANDBOX,
            mock_k8s_client,
            agent_sandbox_app_config,
        )

        assert isinstance(provider, AgentSandboxProvider)
        assert provider.k8s_client == mock_k8s_client
        assert provider.shutdown_policy == "Retain"
        assert provider.service_account == agent_sandbox_app_config.kubernetes.service_account
    
    def test_create_provider_case_insensitive(self, mock_k8s_client, k8s_app_config):
        """Test case: Case-insensitive provider creation

        Purpose: Verify that provider type name is case-insensitive
        """
        provider1 = create_workload_provider("BatchSandbox", mock_k8s_client, k8s_app_config)
        provider2 = create_workload_provider(PROVIDER_TYPE_BATCHSANDBOX, mock_k8s_client, k8s_app_config)
        provider3 = create_workload_provider("BATCHSANDBOX", mock_k8s_client, k8s_app_config)
        
        assert isinstance(provider1, BatchSandboxProvider)
        assert isinstance(provider2, BatchSandboxProvider)
        assert isinstance(provider3, BatchSandboxProvider)
    
    def test_create_provider_with_none_type_uses_default(self, mock_k8s_client, k8s_app_config):
        """Test case: None type uses default provider

        Purpose: Verify that the first registered provider is used when provider_type is None
        """
        provider = create_workload_provider(None, mock_k8s_client, k8s_app_config)
        
        # Should use the first registered provider (batchsandbox)
        assert isinstance(provider, BatchSandboxProvider)
    
    def test_create_provider_with_invalid_type_raises_error(self, mock_k8s_client):
        """
        Test case: Invalid provider type raises exception
        
        Purpose: Verify that ValueError is raised when passing unregistered provider type
        """
        with pytest.raises(ValueError, match="Unsupported workload provider type"):
            create_workload_provider("invalid", mock_k8s_client)
    
    def test_create_batchsandbox_with_template_file(self, mock_k8s_client, k8s_app_config, tmp_path):
        """Test case: Create BatchSandbox provider with template file

        Purpose: Verify that factory method correctly passes template file path to BatchSandboxProvider
        """
        template_file = tmp_path / "test_template.yaml"
        template_file.write_text("""apiVersion: execution.alibaba-inc.com/v1alpha1
kind: BatchSandbox
metadata:
  name: test-template
spec:
  template:
    spec:
      nodeSelector:
        gpu: "true"
""")

        k8s_app_config.kubernetes.batchsandbox_template_file = str(template_file)

        with patch.object(BatchSandboxProvider, '__init__', return_value=None) as mock_init:
            create_workload_provider(PROVIDER_TYPE_BATCHSANDBOX, mock_k8s_client, k8s_app_config)
            
            # Verify that app_config carrying the template path was passed
            mock_init.assert_called_once()
            call_kwargs = mock_init.call_args.kwargs
            assert call_kwargs['app_config'].kubernetes.batchsandbox_template_file == str(template_file)
    
    def test_list_available_providers(self):
        """
        Test case: Get registered providers
        
        Purpose: Verify that list of all registered provider types can be retrieved
        """
        providers = list_available_providers()

        assert isinstance(providers, list)
        assert PROVIDER_TYPE_BATCHSANDBOX in providers
        assert PROVIDER_TYPE_AGENT_SANDBOX in providers
    
    def test_register_custom_provider(self, mock_k8s_client, isolated_registry):
        """
        Test case: Register custom provider
        
        Purpose: Verify that new provider type can be dynamically registered
        """
        # Create a custom provider class
        class CustomProvider(WorkloadProvider):
            def __init__(self, k8s_client):
                self.k8s_client = k8s_client
            
            def create_workload(self, *args, **kwargs):
                pass
            
            def get_workload(self, *args, **kwargs):
                pass
            
            def delete_workload(self, *args, **kwargs):
                pass
            
            def list_workloads(self, *args, **kwargs):
                pass
            
            def update_expiration(self, *args, **kwargs):
                pass
            
            def get_expiration(self, *args, **kwargs):
                pass
            
            def get_status(self, *args, **kwargs):
                pass
            
            def get_endpoint_info(self, *args, **kwargs):
                pass
        
        # Register custom provider
        register_provider("custom", CustomProvider)
        
        # Verify that custom provider can be created
        provider = create_workload_provider("custom", mock_k8s_client)
        assert isinstance(provider, CustomProvider)
        
        # Verify it's registered
        assert "custom" in list_available_providers()
    
    def test_create_batchsandbox_with_config(self, mock_k8s_client, k8s_app_config):
        """Test case: Create BatchSandbox provider with explicit config

        Purpose: Verify that provider creation works when k8s_config is provided
        """
        provider = create_workload_provider(PROVIDER_TYPE_BATCHSANDBOX, mock_k8s_client, k8s_app_config)
        
        assert isinstance(provider, BatchSandboxProvider)
        assert provider.k8s_client == mock_k8s_client
    
    def test_create_provider_with_empty_registry_raises_error(self, mock_k8s_client, isolated_registry):
        """
        Test case: Creating provider with empty registry raises exception
        
        Purpose: Verify that ValueError is raised when no provider is registered and type is None
        """
        from src.services.k8s import provider_factory
        
        # Clear the registry to test empty registry scenario
        provider_factory._PROVIDER_REGISTRY.clear()
        
        # Verify that ValueError is raised when registry is empty and type is None
        with pytest.raises(ValueError, match="No workload providers are registered"):
            create_workload_provider(None, mock_k8s_client)


================================================
FILE: server/tests/k8s/test_rate_limiter.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Unit tests for TokenBucketRateLimiter."""

import time
import threading
from unittest.mock import patch

import pytest

from src.services.k8s.rate_limiter import TokenBucketRateLimiter


class TestTokenBucketRateLimiter:
    """Tests for the token-bucket rate limiter."""

    # ------------------------------------------------------------------
    # Construction
    # ------------------------------------------------------------------

    def test_invalid_qps_raises_value_error(self):
        """qps <= 0 must raise ValueError."""
        with pytest.raises(ValueError, match="qps must be > 0"):
            TokenBucketRateLimiter(qps=0)

    def test_negative_qps_raises_value_error(self):
        """Negative qps must raise ValueError."""
        with pytest.raises(ValueError):
            TokenBucketRateLimiter(qps=-1.0)

    def test_burst_defaults_to_qps_when_zero(self):
        """burst=0 means the bucket capacity equals qps (minimum 1)."""
        limiter = TokenBucketRateLimiter(qps=5.0, burst=0)
        assert limiter._burst == 5.0

    def test_explicit_burst_is_respected(self):
        """Explicit burst value sets bucket capacity independently from qps."""
        limiter = TokenBucketRateLimiter(qps=5.0, burst=20)
        assert limiter._burst == 20.0

    def test_burst_minimum_is_one_when_qps_below_one(self):
        """burst is clamped to 1 when qps < 1 and burst is not set."""
        limiter = TokenBucketRateLimiter(qps=0.5)
        assert limiter._burst == 1.0

    def test_low_qps_limiter_can_acquire(self):
        """A limiter with qps < 1 and default burst must be able to issue a token."""
        limiter = TokenBucketRateLimiter(qps=0.5)
        assert limiter.try_acquire() is True

    # ------------------------------------------------------------------
    # try_acquire
    # ------------------------------------------------------------------

    def test_try_acquire_succeeds_when_bucket_full(self):
        """try_acquire returns True when tokens are available."""
        limiter = TokenBucketRateLimiter(qps=10.0, burst=10)
        assert limiter.try_acquire() is True

    def test_try_acquire_fails_when_bucket_empty(self):
        """try_acquire returns False after exhausting all tokens."""
        limiter = TokenBucketRateLimiter(qps=1.0, burst=1)
        limiter.try_acquire()  # consume the only token
        assert limiter.try_acquire() is False

    def test_try_acquire_consumes_token(self):
        """Each successful try_acquire reduces available tokens by one."""
        limiter = TokenBucketRateLimiter(qps=10.0, burst=3)
        assert limiter.try_acquire() is True
        assert limiter.try_acquire() is True
        assert limiter.try_acquire() is True
        assert limiter.try_acquire() is False

    # ------------------------------------------------------------------
    # acquire (blocking)
    # ------------------------------------------------------------------

    def test_acquire_succeeds_immediately_when_tokens_available(self):
        """acquire completes without sleeping when the bucket has tokens."""
        limiter = TokenBucketRateLimiter(qps=100.0, burst=10)
        start = time.monotonic()
        limiter.acquire()
        elapsed = time.monotonic() - start
        assert elapsed < 0.1  # should be essentially instant

    def test_acquire_blocks_until_token_available(self):
        """acquire blocks and returns only after a token refills."""
        limiter = TokenBucketRateLimiter(qps=10.0, burst=1)
        limiter.try_acquire()  # drain the bucket

        start = time.monotonic()
        limiter.acquire()  # should wait ~0.1s for next token
        elapsed = time.monotonic() - start

        assert elapsed >= 0.05  # some delay occurred

    def test_acquire_minimum_sleep_prevents_busy_loop(self):
        """acquire sleeps at least 1 ms even when wait is near-zero."""
        limiter = TokenBucketRateLimiter(qps=1.0, burst=1)
        # Manually set tokens to just below 1 to produce a near-zero wait
        with limiter._lock:
            limiter._tokens = 1.0 - 1e-10

        with patch("src.services.k8s.rate_limiter.time.sleep") as mock_sleep:
            # _try_acquire will succeed on first or second call; we only care
            # that if sleep is called, the argument is >= 0.001.
            limiter.acquire()
            for call in mock_sleep.call_args_list:
                assert call.args[0] >= 0.001

    # ------------------------------------------------------------------
    # Token refill
    # ------------------------------------------------------------------

    def test_tokens_refill_over_time(self):
        """Tokens are replenished proportional to elapsed time."""
        limiter = TokenBucketRateLimiter(qps=100.0, burst=10)
        # Drain all tokens
        for _ in range(10):
            limiter.try_acquire()
        assert limiter.try_acquire() is False

        time.sleep(0.05)  # wait for ~5 tokens to refill at 100 qps

        assert limiter.try_acquire() is True

    def test_tokens_capped_at_burst(self):
        """Token count never exceeds burst capacity."""
        limiter = TokenBucketRateLimiter(qps=10.0, burst=5)
        time.sleep(0.5)  # wait long enough to overflow if cap not applied
        # Force a refill by calling _try_acquire internals
        with limiter._lock:
            limiter._refill()
        assert limiter._tokens <= 5.0

    # ------------------------------------------------------------------
    # Thread safety
    # ------------------------------------------------------------------

    def test_concurrent_acquires_do_not_exceed_burst(self):
        """Concurrent threads must not collectively acquire more than burst tokens."""
        burst = 5
        limiter = TokenBucketRateLimiter(qps=1000.0, burst=burst)
        successes = []
        lock = threading.Lock()

        # Freeze time so _refill() never adds extra tokens during the test
        fixed_time = limiter._last_refill

        def worker():
            with patch("src.services.k8s.rate_limiter.time.monotonic", return_value=fixed_time):
                if limiter.try_acquire():
                    with lock:
                        successes.append(1)

        threads = [threading.Thread(target=worker) for _ in range(20)]
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        assert len(successes) <= burst


================================================
FILE: server/tests/smoke.sh
================================================
#!/bin/bash
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


set -euo pipefail

color() {
  if [[ -t 1 ]] && command -v tput >/dev/null 2>&1; then
    tput setaf "$1"
  fi
}

reset_color() {
  if [[ -t 1 ]] && command -v tput >/dev/null 2>&1; then
    tput sgr0
  fi
}

STEP_COLOR=6   # cyan
INFO_COLOR=2   # green
WARN_COLOR=3   # yellow
ERR_COLOR=1    # red

step() {
  printf "\n%s==== %s ====%s\n" "$(color ${STEP_COLOR})" "$1" "$(reset_color)"
}

info() {
  printf "%s%s%s\n" "$(color ${INFO_COLOR})" "$1" "$(reset_color)"
}

warn() {
  printf "%s%s%s\n" "$(color ${WARN_COLOR})" "$1" "$(reset_color)" >&2
}

error() {
  printf "%s%s%s\n" "$(color ${ERR_COLOR})" "$1" "$(reset_color)" >&2
}

BASE_URL="${BASE_URL:-http://localhost:32888}"
BASE_API_URL="${BASE_URL}/v1"
curl_json() {
  curl -sfSL "$@"
}

curl_json_status() {
  # Returns body + trailing status code line to allow non-2xx handling.
  curl -sSL -w "\n%{http_code}" "$@"
}

wait_for_running() {
  local sandbox_id="${1:-${SANDBOX_ID}}"
  local deadline=$((SECONDS + 30))
  while true; do
    local result
    result=$(curl -sSL -w "\n%{http_code}" \
      "${BASE_API_URL}/sandboxes/${sandbox_id}" \
      | python3 -c '
import json, sys
raw = sys.stdin.read()
lines = raw.rsplit("\n", 1)
http_code = lines[-1].strip() if len(lines) > 1 else "000"
body_text = lines[0] if len(lines) > 1 else ""
if http_code == "404":
    print("ERROR:not found (404) — may have failed during provisioning")
elif http_code != "200":
    print(f"RETRY:HTTP {http_code}")
elif not body_text.strip():
    print("RETRY:empty body")
else:
    try:
        data = json.loads(body_text)
        state = data.get("status", {}).get("state", "")
        print(f"STATE:{state}")
        print(body_text)
    except json.JSONDecodeError as exc:
        print(f"RETRY:invalid JSON: {exc}")
') || true

    local tag="${result%%:*}"
    local detail="${result#*:}"

    case "${tag}" in
      ERROR)
        error "Sandbox ${sandbox_id}: ${detail}"
        return 1
        ;;
      RETRY)
        warn "GET sandbox ${sandbox_id}: ${detail}, retrying..."
        if (( SECONDS >= deadline )); then
          error "Sandbox ${sandbox_id} did not reach Running within 30s."
          return 1
        fi
        sleep 1
        continue
        ;;
      STATE)
        local state="${detail%%$'\n'*}"
        local body="${detail#*$'\n'}"
        if [[ "${state}" == "Running" ]]; then
          printf '%s' "${body}"
          return 0
        fi
        if [[ "${state}" == "Failed" || "${state}" == "Terminated" ]]; then
          error "Sandbox ${sandbox_id} entered terminal state '${state}'."
          return 1
        fi
        if (( SECONDS >= deadline )); then
          error "Sandbox ${sandbox_id} did not reach Running within 30s (last: ${state})."
          return 1
        fi
        sleep 1
        ;;
      *)
        warn "GET sandbox ${sandbox_id}: unexpected output, retrying..."
        if (( SECONDS >= deadline )); then
          error "Sandbox ${sandbox_id} did not reach Running within 30s."
          return 1
        fi
        sleep 1
        ;;
    esac
  done
}

wait_for_expired() {
  local sandbox_id=$1
  local deadline=$((SECONDS + 90))
  while true; do
    local resp body status
    resp=$(curl_json_status "${BASE_API_URL}/sandboxes/${sandbox_id}")
    status="${resp##*$'\n'}"
    body="${resp%$'\n'*}"
    if [[ "${status}" == "404" ]]; then
      info "Sandbox ${sandbox_id} expired as expected."
      return 0
    fi
    if (( SECONDS >= deadline )); then
      error "Sandbox ${sandbox_id} did not expire within expected window (last status ${status})."
      echo "${body}"
      return 1
    fi
    sleep 2
  done
}

wait_for_sidecar_gone() {
  local sandbox_id=$1
  local deadline=$((SECONDS + 20))
  while true; do
    if ! docker ps -a --filter "label=opensandbox.io/egress-sidecar-for=${sandbox_id}" -q | grep -q .; then
      info "No sidecar remaining for sandbox ${sandbox_id}"
      return 0
    fi
    if (( SECONDS >= deadline )); then
      error "Sidecar for sandbox ${sandbox_id} still present after timeout"
      docker ps -a --filter "label=opensandbox.io/egress-sidecar-for=${sandbox_id}"
      return 1
    fi
    sleep 2
  done
}

docker pull ubuntu:latest

create_payload='{
  "image": { "uri": "ubuntu" },
  "env": { "HELLO": "WORLD" },
  "metadata": { "hello": "world" },
  "entrypoint": ["tail", "-f", "/dev/null"],
  "resourceLimits": { "cpu": "500m", "memory": "512Mi" },
  "timeout": 60
}'

step "Create sandbox (60s TTL)"
create_resp=$(curl_json \
  -H 'Content-Type: application/json' \
  -d "${create_payload}" \
  "${BASE_API_URL}/sandboxes")

SANDBOX_ID=$(python3 - <<'PY' "${create_resp}"
import json,sys
data=json.loads(sys.argv[1])
sid=str(data.get("id","")).strip()
if not sid:
    raise SystemExit("Failed to parse sandbox id from response")
print(sid,end="")
PY
)

echo "Sandbox created: id=${SANDBOX_ID}"

step "Wait for sandbox to reach Running"
get_resp=$(wait_for_running)
state=$(python3 - <<'PY' "${get_resp}"
import json,sys
body=json.loads(sys.argv[1])
print(body.get("status",{}).get("state"))
PY
)
echo "Sandbox state: ${state}"

python3 - <<'PY' "${get_resp}" "${SANDBOX_ID}"
import json,sys
body=json.loads(sys.argv[1])
expected=sys.argv[2]
assert str(body.get("id"))==expected, "Sandbox ID mismatch in GET response"
assert body.get("status",{}).get("state") in {"Pending","Running","Unknown","Paused","Terminated","Failed"}, "Unexpected state"
PY

step "List sandboxes (metadata filter)"
list_resp=$(curl_json \
  -G \
  --data-urlencode "metadata=hello=world" \
  --data-urlencode "page=1" \
  --data-urlencode "pageSize=10" \
  "${BASE_API_URL}/sandboxes")

python3 - <<'PY' "${list_resp}" "${SANDBOX_ID}"
import json,sys
body=json.loads(sys.argv[1])
sid=sys.argv[2]
ids=[item.get("id") for item in body.get("items",[])]
assert sid in ids, "Sandbox ID not found in list response"
assert body.get("pagination",{}).get("page") == 1, "Unexpected pagination page"
PY
echo "List check passed (found sandbox, pagination ok)"

step "Renew sandbox expiration (+10m)"
new_expiration=$(python3 - <<'PY'
from datetime import datetime, timedelta, timezone
print((datetime.now(timezone.utc) + timedelta(minutes=10)).isoformat())
PY
)

renew_payload=$(cat <<JSON
{
  "expiresAt": "${new_expiration}"
}
JSON
)

renew_resp=$(curl_json \
  -X POST \
  -H 'Content-Type: application/json' \
  -d "${renew_payload}" \
  "${BASE_API_URL}/sandboxes/${SANDBOX_ID}/renew-expiration")
renewed=$(python3 - <<'PY' "${renew_resp}"
import json,sys
body=json.loads(sys.argv[1])
print(body.get("expiresAt"))
PY
)
echo "Expiration renewed to: ${renewed}"

step "Request endpoint on port 8080"
endpoint_resp=$(curl_json "${BASE_API_URL}/sandboxes/${SANDBOX_ID}/endpoints/8080")
endpoint=$(python3 - <<'PY' "${endpoint_resp}"
import json,sys
body=json.loads(sys.argv[1])
print(body.get("endpoint"))
PY
)
echo "Endpoint: ${endpoint}"

step "Delete sandbox"
curl_json -X DELETE "${BASE_API_URL}/sandboxes/${SANDBOX_ID}"
echo "Sandbox ${SANDBOX_ID} deleted."

step "Create sandbox with networkPolicy (egress sidecar)"
egress_payload='{
  "image": { "uri": "ubuntu" },
  "env": {},
  "metadata": { "egress": "on" },
  "entrypoint": ["tail", "-f", "/dev/null"],
  "resourceLimits": { "cpu": "500m", "memory": "512Mi" },
  "timeout": 60,
  "networkPolicy": {
    "defaultAction": "deny",
    "egress": [
      { "action": "allow", "target": "pypi.org" }
    ]
  }
}'

create_resp_with_status=$(curl_json_status \
  -H 'Content-Type: application/json' \
  -d "${egress_payload}" \
  "${BASE_API_URL}/sandboxes")

status_code="${create_resp_with_status##*$'\n'}"
create_resp_body="${create_resp_with_status%$'\n'*}"

if [[ "${status_code}" != "202" ]]; then
  warn "Skip egress sidecar smoke (status ${status_code}). Body: ${create_resp_body}"
  warn "Likely network_mode=host or egress.image unset."
else
  SANDBOX_ID=$(python3 - <<'PY' "${create_resp_body}"
import json,sys
data=json.loads(sys.argv[1])
sid=str(data.get("id","")).strip()
if not sid:
    raise SystemExit("Failed to parse sandbox id from response")
print(sid,end="")
PY
)
  echo "Egress sandbox created: id=${SANDBOX_ID}"

  step "Wait for egress sandbox to reach Running"
  wait_for_running "${SANDBOX_ID}" >/dev/null

  step "Verify egress sidecar is running"
  SIDECAR_ID=$(docker ps -a --filter "label=opensandbox.io/egress-sidecar-for=${SANDBOX_ID}" -q | head -n1 || true)
  if [[ -z "${SIDECAR_ID}" ]]; then
    error "Expected egress sidecar for sandbox ${SANDBOX_ID}, but none found."
    exit 1
  fi
  info "Sidecar ${SIDECAR_ID} detected for sandbox ${SANDBOX_ID}"

  step "Delete egress sandbox and ensure sidecar cleanup"
  curl_json -X DELETE "${BASE_API_URL}/sandboxes/${SANDBOX_ID}"
  wait_for_sidecar_gone "${SANDBOX_ID}"
fi

step "Create sandbox with host volume mount"
# Prepare the host volume test directory
mkdir -p /tmp/opensandbox-e2e/host-volume-test
echo "opensandbox-e2e-marker" > /tmp/opensandbox-e2e/host-volume-test/marker.txt
chmod -R 755 /tmp/opensandbox-e2e

volume_payload='{
  "image": { "uri": "ubuntu" },
  "env": {},
  "metadata": { "volume": "host-test" },
  "entrypoint": ["tail", "-f", "/dev/null"],
  "resourceLimits": { "cpu": "500m", "memory": "512Mi" },
  "timeout": 60,
  "volumes": [
    {
      "name": "test-host-vol",
      "host": { "path": "/tmp/opensandbox-e2e/host-volume-test" },
      "mountPath": "/mnt/host-data",
      "readOnly": false
    }
  ]
}'

volume_resp_with_status=$(curl_json_status \
  -H 'Content-Type: application/json' \
  -d "${volume_payload}" \
  "${BASE_API_URL}/sandboxes")

volume_status="${volume_resp_with_status##*$'\n'}"
volume_body="${volume_resp_with_status%$'\n'*}"

if [[ "${volume_status}" != "202" ]]; then
  warn "Skip host volume smoke (status ${volume_status}). Body: ${volume_body}"
  warn "Likely host path validation or storage config issue."
else
  VOLUME_SANDBOX_ID=$(python3 - <<'PY' "${volume_body}"
import json,sys
data=json.loads(sys.argv[1])
sid=str(data.get("id","")).strip()
if not sid:
    raise SystemExit("Failed to parse sandbox id from response")
print(sid,end="")
PY
)
  echo "Volume sandbox created: id=${VOLUME_SANDBOX_ID}"

  step "Wait for volume sandbox to reach Running"
  wait_for_running "${VOLUME_SANDBOX_ID}" >/dev/null

  # --- Verify the bind mount is actually effective ---
  # Resolve the Docker container ID from the sandbox API response.
  volume_sandbox_resp=$(curl_json "${BASE_API_URL}/sandboxes/${VOLUME_SANDBOX_ID}")
  container_id=$(python3 -c '
import json, sys
body = json.loads(sys.argv[1])
print(body.get("containerId", body.get("container_id", "")), end="")
' "${volume_sandbox_resp}")
  # Fallback: if the API doesn't expose container_id, search by label.
  if [[ -z "${container_id}" ]]; then
    container_id=$(docker ps -qf "label=sandbox_id=${VOLUME_SANDBOX_ID}" | head -1)
  fi

  if [[ -n "${container_id}" ]]; then
    step "Verify host volume bind mount content inside container"
    # 1. Read the marker file written on the host
    marker_content=$(docker exec "${container_id}" cat /mnt/host-data/marker.txt 2>&1) || true
    if [[ "${marker_content}" == "opensandbox-e2e-marker" ]]; then
      info "PASS: marker.txt content matches expected value."
    else
      error "FAIL: marker.txt content='${marker_content}', expected='opensandbox-e2e-marker'"
      exit 1
    fi

    # 2. Write a file from inside the container and verify it on the host
    docker exec "${container_id}" sh -c 'echo "written-from-sandbox" > /mnt/host-data/sandbox-output.txt'
    host_content=$(cat /tmp/opensandbox-e2e/host-volume-test/sandbox-output.txt 2>&1) || true
    if [[ "${host_content}" == "written-from-sandbox" ]]; then
      info "PASS: file written inside container is visible on host."
    else
      error "FAIL: sandbox-output.txt on host='${host_content}', expected='written-from-sandbox'"
      exit 1
    fi
  else
    warn "Skip bind-mount verification: could not resolve container ID for sandbox ${VOLUME_SANDBOX_ID}."
  fi

  step "Delete volume sandbox"
  curl_json -X DELETE "${BASE_API_URL}/sandboxes/${VOLUME_SANDBOX_ID}"
  echo "Volume sandbox ${VOLUME_SANDBOX_ID} deleted."
fi

step "Create short-lived sandbox (60s TTL) for auto-expiration"
create_payload_short='{
  "image": { "uri": "ubuntu" },
  "env": {},
  "metadata": { "lifecycle": "short" },
  "entrypoint": ["tail", "-f", "/dev/null"],
  "resourceLimits": { "cpu": "1", "memory": "2Gi" },
  "timeout": 60
}'

create_resp_short=$(curl_json \
  -H 'Content-Type: application/json' \
  -d "${create_payload_short}" \
  "${BASE_API_URL}/sandboxes")

SANDBOX_ID=$(python3 - <<'PY' "${create_resp_short}"
import json,sys
data=json.loads(sys.argv[1])
sid=str(data.get("id","")).strip()
if not sid:
    raise SystemExit("Failed to parse sandbox id from response")
print(sid,end="")
PY
)

echo "Short-lived sandbox created: id=${SANDBOX_ID}"

step "Wait for short-lived sandbox to reach Running"
get_resp_short=$(wait_for_running "${SANDBOX_ID}")
state_short=$(python3 - <<'PY' "${get_resp_short}"
import json,sys
body=json.loads(sys.argv[1])
print(body.get("status",{}).get("state"))
PY
)
echo "Sandbox state: ${state_short}"

step "Wait for sandbox ${SANDBOX_ID} to auto-expire (expect 404)"
wait_for_expired "${SANDBOX_ID}"

step "server Lifecycle API smoke test completed successfully"


================================================
FILE: server/tests/test_agent_sandbox_service.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Unit tests for KubernetesSandboxService with agent-sandbox provider.
"""

from datetime import datetime, timezone
from unittest.mock import MagicMock, patch

import pytest
from fastapi import HTTPException
from pydantic import ValidationError

from src.api.schema import SandboxStatus
from src.config import (
    AppConfig,
    RuntimeConfig,
    ServerConfig,
    KubernetesRuntimeConfig,
    AgentSandboxRuntimeConfig,
)
from src.services.k8s.kubernetes_service import KubernetesSandboxService
from src.services.constants import SandboxErrorCodes


@pytest.fixture
def agent_sandbox_runtime_config():
    """Provide agent-sandbox runtime configuration"""
    return KubernetesRuntimeConfig(
        kubeconfig_path="/tmp/test-kubeconfig",
        namespace="test-namespace",
        service_account="test-sa",
        workload_provider="agent-sandbox",
    )


@pytest.fixture
def agent_sandbox_app_config(agent_sandbox_runtime_config):
    """Provide complete app configuration (kubernetes + agent-sandbox provider)"""
    return AppConfig(
        server=ServerConfig(
            host="0.0.0.0",
            port=8080,
            log_level="DEBUG",
            api_key="test-api-key",
        ),
        runtime=RuntimeConfig(
            type="kubernetes",
            execd_image="ghcr.io/opensandbox/execd:test",
        ),
        kubernetes=agent_sandbox_runtime_config,
        agent_sandbox=AgentSandboxRuntimeConfig(
            template_file=None,
            shutdown_policy="Delete",
            ingress_enabled=True,
        ),
    )


@pytest.fixture
def app_config_docker():
    """Provide Docker type app configuration"""
    return AppConfig(
        server=ServerConfig(
            host="0.0.0.0",
            port=8080,
            log_level="DEBUG",
            api_key="test-api-key",
        ),
        runtime=RuntimeConfig(
            type="docker",
            execd_image="ghcr.io/opensandbox/execd:test",
        ),
        kubernetes=None,
    )


class TestAgentSandboxServiceInit:
    """KubernetesSandboxService initialization tests (agent-sandbox provider)"""

    def test_init_with_valid_config_succeeds(self, agent_sandbox_runtime_config):
        """
        Test case: Successful initialization with valid config
        """
        config = AppConfig(
            server=ServerConfig(
                host="0.0.0.0",
                port=8080,
                log_level="DEBUG",
                api_key="test-api-key",
            ),
            runtime=RuntimeConfig(
                type="kubernetes",
                execd_image="ghcr.io/opensandbox/execd:test",
            ),
            kubernetes=agent_sandbox_runtime_config,
            agent_sandbox=AgentSandboxRuntimeConfig(
                template_file="/tmp/template.yaml",
                shutdown_policy="Retain",
                ingress_enabled=True,
            ),
        )

        with patch("src.services.k8s.kubernetes_service.K8sClient") as mock_k8s_client, patch(
            "src.services.k8s.kubernetes_service.create_workload_provider"
        ) as mock_provider_factory:
            mock_provider_factory.return_value = MagicMock()

            service = KubernetesSandboxService(config)

            assert service.namespace == agent_sandbox_runtime_config.namespace
            assert service.execd_image == config.runtime.execd_image
            mock_k8s_client.assert_called_once_with(agent_sandbox_runtime_config)
            mock_provider_factory.assert_called_once()
            call_kwargs = mock_provider_factory.call_args.kwargs
            assert call_kwargs["provider_type"] == "agent-sandbox"
            assert call_kwargs["app_config"].agent_sandbox.template_file == "/tmp/template.yaml"
            assert call_kwargs["app_config"].agent_sandbox.shutdown_policy == "Retain"
            assert call_kwargs["app_config"].kubernetes == agent_sandbox_runtime_config

    def test_init_without_kubernetes_config_raises_error(self):
        """
        Test case: Raises exception when Kubernetes config is missing
        """
        with pytest.raises(ValidationError, match="agent_sandbox block requires kubernetes.workload_provider"):
            AppConfig(
                server=ServerConfig(
                    host="0.0.0.0",
                    port=8080,
                    log_level="DEBUG",
                    api_key="test-api-key",
                ),
                runtime=RuntimeConfig(
                    type="kubernetes",
                    execd_image="ghcr.io/opensandbox/execd:test",
                ),
                kubernetes=None,
                agent_sandbox=AgentSandboxRuntimeConfig(),
            )


    def test_init_with_wrong_runtime_type_raises_error(self, app_config_docker):
        """
        Test case: Raises exception with wrong runtime type
        """
        with pytest.raises(ValueError, match="requires runtime.type = 'kubernetes'"):
            KubernetesSandboxService(app_config_docker)

    def test_init_with_k8s_client_failure_raises_http_exception(self, agent_sandbox_app_config):
        """
        Test case: Raises HTTPException when K8sClient initialization fails
        """
        with patch("src.services.k8s.kubernetes_service.K8sClient") as mock_k8s_client:
            mock_k8s_client.side_effect = Exception("Failed to load kubeconfig")

            with pytest.raises(HTTPException) as exc_info:
                KubernetesSandboxService(agent_sandbox_app_config)

            assert exc_info.value.status_code == 503
            assert "code" in exc_info.value.detail
            assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_INITIALIZATION_ERROR


class TestAgentSandboxServiceBuildSandbox:
    """KubernetesSandboxService _build_sandbox_from_workload tests for agent-sandbox CRD"""

    def test_build_sandbox_from_workload_dict(self):
        """
        Test case: Verify sandbox fields are built from dict workload
        """
        service = object.__new__(KubernetesSandboxService)
        service.workload_provider = MagicMock(
            get_expiration=MagicMock(return_value=datetime(2025, 12, 31, tzinfo=timezone.utc)),
            get_status=MagicMock(
                return_value={
                    "state": "Running",
                    "reason": "Ready",
                    "message": "Ready",
                    "last_transition_at": datetime(2025, 12, 31, tzinfo=timezone.utc),
                }
            ),
        )

        workload = {
            "metadata": {
                "labels": {
                    "opensandbox.io/id": "sandbox-id",
                    "team": "platform",
                },
                "creationTimestamp": "2025-12-31T09:00:00Z",
            },
            "spec": {
                "podTemplate": {
                    "spec": {
                        "containers": [
                            {
                                "image": "python:3.11",
                                "command": ["/bin/bash"],
                            }
                        ]
                    }
                }
            },
        }

        sandbox = service._build_sandbox_from_workload(workload)

        assert sandbox.id == "sandbox-id"
        assert sandbox.image.uri == "python:3.11"
        assert sandbox.entrypoint == ["/bin/bash"]
        assert sandbox.metadata == {"team": "platform"}
        assert isinstance(sandbox.status, SandboxStatus)
        assert sandbox.status.state == "Running"


================================================
FILE: server/tests/test_auth_middleware.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from fastapi import FastAPI
from fastapi.testclient import TestClient

from src.config import AppConfig, IngressConfig, RuntimeConfig, ServerConfig
from src.middleware.auth import AuthMiddleware


def _app_config_with_api_key() -> AppConfig:
    return AppConfig(
        server=ServerConfig(api_key="secret-key"),
        runtime=RuntimeConfig(type="docker", execd_image="opensandbox/execd:latest"),
        ingress=IngressConfig(mode="direct"),
    )


def _build_test_app():
    app = FastAPI()
    config = _app_config_with_api_key()
    app.add_middleware(AuthMiddleware, config=config)

    @app.get("/secured")
    def secured_endpoint():
        return {"ok": True}

    return app


def test_auth_middleware_rejects_missing_key():
    app = _build_test_app()
    client = TestClient(app)
    response = client.get("/secured")
    assert response.status_code == 401
    assert response.json()["code"] == "MISSING_API_KEY"


def test_auth_middleware_accepts_valid_key():
    app = _build_test_app()
    client = TestClient(app)
    response = client.get("/secured", headers={"OPEN-SANDBOX-API-KEY": "secret-key"})
    assert response.status_code == 200
    assert response.json() == {"ok": True}


def test_auth_middleware_skips_validation_for_proxy_to_sandbox():
    """Proxy-to-sandbox paths must not require API key; server only forwards to sandbox."""
    app = _build_test_app()

    @app.get("/sandboxes/{sandbox_id}/proxy/{port}/{full_path:path}")
    def proxy_echo(sandbox_id: str, port: int, full_path: str):
        return {"proxied": True, "sandbox_id": sandbox_id, "port": port, "path": full_path}

    client = TestClient(app)
    # No OPEN-SANDBOX-API-KEY header; should still succeed for proxy path
    response = client.get("/sandboxes/abc-123/proxy/8080/foo/bar")
    assert response.status_code == 200
    assert response.json()["proxied"] is True
    assert response.json()["sandbox_id"] == "abc-123"
    assert response.json()["port"] == 8080
    assert response.json()["path"] == "foo/bar"


def test_auth_middleware_v1_proxy_path_exempt():
    """V1 prefix proxy path is also exempt."""
    app = _build_test_app()

    @app.get("/v1/sandboxes/{sandbox_id}/proxy/{port}/{full_path:path}")
    def proxy_echo(sandbox_id: str, port: int, full_path: str):
        return {"proxied": True}

    client = TestClient(app)
    response = client.get("/v1/sandboxes/sid/proxy/443/")
    assert response.status_code == 200
    assert response.json()["proxied"] is True


def test_auth_middleware_requires_key_for_non_proxy_paths_containing_proxy_and_sandboxes():
    """Paths that contain both 'proxy' and 'sandboxes' but not in proxy-route shape still require auth."""
    app = _build_test_app()

    @app.get("/proxy/sandboxes/anything")
    def fake_proxy():
        return {"reached": True}

    client = TestClient(app)
    response = client.get("/proxy/sandboxes/anything")
    assert response.status_code == 401
    assert response.json()["code"] == "MISSING_API_KEY"


def test_auth_middleware_requires_key_for_malformed_proxy_port():
    """Malformed port (non-numeric) must get 401, not 422; limits unauthenticated surface."""
    app = _build_test_app()

    @app.get("/sandboxes/{sandbox_id}/proxy/{port}/{full_path:path}")
    def proxy_echo(sandbox_id: str, port: int, full_path: str):
        return {"proxied": True}

    client = TestClient(app)
    response = client.get("/sandboxes/s1/proxy/not-a-port/x")
    assert response.status_code == 401
    assert response.json()["code"] == "MISSING_API_KEY"


def test_auth_middleware_is_proxy_path_rejects_traversal():
    """Paths containing '..' are never considered proxy (no auth bypass)."""
    assert AuthMiddleware._is_proxy_path("/sandboxes/abc/proxy/8080/../other") is False
    assert AuthMiddleware._is_proxy_path("/sandboxes/../admin/proxy/8080") is False


def test_auth_middleware_is_proxy_path_accepts_valid_shapes():
    """Only exact proxy route shape (including numeric port) is accepted."""
    assert AuthMiddleware._is_proxy_path("/sandboxes/id/proxy/8080") is True
    assert AuthMiddleware._is_proxy_path("/sandboxes/id/proxy/8080/") is True
    assert AuthMiddleware._is_proxy_path("/v1/sandboxes/id/proxy/443/path") is True
    assert AuthMiddleware._is_proxy_path("/proxy/sandboxes/x") is False
    assert AuthMiddleware._is_proxy_path("/foo/sandboxes/id/proxy/8080") is False
    # Non-numeric port must not skip auth (malformed path → 401, not 422)
    assert AuthMiddleware._is_proxy_path("/sandboxes/s1/proxy/not-a-port/x") is False
    assert AuthMiddleware._is_proxy_path("/sandboxes/s1/proxy/8080x/") is False


================================================
FILE: server/tests/test_config.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import textwrap

import pytest

from src import config as config_module
from src.config import (
    AppConfig,
    EGRESS_MODE_DNS,
    EGRESS_MODE_DNS_NFT,
    EgressConfig,
    GatewayConfig,
    GatewayRouteModeConfig,
    IngressConfig,
    RuntimeConfig,
    ServerConfig,
    StorageConfig,
)


def _reset_config(monkeypatch):
    monkeypatch.setattr(config_module, "_config", None, raising=False)
    monkeypatch.setattr(config_module, "_config_path", None, raising=False)


def test_load_config_from_file(tmp_path, monkeypatch):
    _reset_config(monkeypatch)
    toml = textwrap.dedent(
        """
        [server]
        host = "127.0.0.1"
        port = 9000
        log_level = "DEBUG"
        api_key = "secret"
        max_sandbox_timeout_seconds = 172800

        [runtime]
        type = "kubernetes"
        execd_image = "opensandbox/execd:test"

        [ingress]
        mode = "gateway"
        gateway.address = "*.opensandbox.io"
        gateway.route.mode = "wildcard"
        """
    )
    config_path = tmp_path / "config.toml"
    config_path.write_text(toml)

    loaded = config_module.load_config(config_path)
    assert loaded.server.host == "127.0.0.1"
    assert loaded.server.port == 9000
    assert loaded.server.log_level == "DEBUG"
    assert loaded.server.api_key == "secret"
    assert loaded.server.max_sandbox_timeout_seconds == 172800
    assert loaded.runtime.type == "kubernetes"
    assert loaded.runtime.execd_image == "opensandbox/execd:test"
    assert loaded.ingress is not None
    assert loaded.ingress.mode == "gateway"
    assert loaded.ingress.gateway is not None
    assert loaded.ingress.gateway.address == "*.opensandbox.io"
    assert loaded.ingress.gateway.route.mode == "wildcard"
    assert loaded.kubernetes is not None


def test_docker_runtime_disallows_kubernetes_block():
    server_cfg = ServerConfig()
    runtime_cfg = RuntimeConfig(type="docker", execd_image="busybox:latest")
    kubernetes_cfg = config_module.KubernetesRuntimeConfig(namespace="sandbox")
    with pytest.raises(ValueError):
        AppConfig(server=server_cfg, runtime=runtime_cfg, kubernetes=kubernetes_cfg)


def test_server_config_defaults_include_max_sandbox_timeout():
    server_cfg = ServerConfig()
    assert server_cfg.max_sandbox_timeout_seconds is None


def test_kubernetes_runtime_fills_missing_block():
    server_cfg = ServerConfig()
    runtime_cfg = RuntimeConfig(type="kubernetes", execd_image="opensandbox/execd:latest")
    app_cfg = AppConfig(server=server_cfg, runtime=runtime_cfg)
    assert app_cfg.kubernetes is not None


def test_ingress_gateway_requires_gateway_block():
    with pytest.raises(ValueError):
        IngressConfig(mode="gateway")
    cfg = IngressConfig(
        mode="gateway",
        gateway=GatewayConfig(
            address="gateway.opensandbox.io",
            route=GatewayRouteModeConfig(mode="uri"),
        ),
    )
    assert cfg.gateway.route.mode == "uri"


def test_gateway_address_validation_for_wildcard_mode():
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="gateway.opensandbox.io",
                route=GatewayRouteModeConfig(mode="wildcard"),
            ),
        )
    cfg = IngressConfig(
        mode="gateway",
        gateway=GatewayConfig(
            address="*.opensandbox.io",
            route=GatewayRouteModeConfig(mode="wildcard"),
        ),
    )
    assert cfg.gateway.address == "*.opensandbox.io"
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="10.0.0.1",
                route=GatewayRouteModeConfig(mode="wildcard"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="http://10.0.0.1:8080",
                route=GatewayRouteModeConfig(mode="wildcard"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="10.0.0.1:8080",
                route=GatewayRouteModeConfig(mode="wildcard"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="https://*.opensandbox.io",
                route=GatewayRouteModeConfig(mode="wildcard"),
            ),
        )


def test_gateway_route_mode_allows_wildcard_alias():
    cfg = IngressConfig(
        mode="gateway",
        gateway=GatewayConfig(
            address="*.opensandbox.io",
            route=GatewayRouteModeConfig(mode="wildcard"),
        ),
    )
    assert cfg.gateway.route.mode == "wildcard"


def test_gateway_address_validation_for_non_wildcard_mode():
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="*.opensandbox.io",
                route=GatewayRouteModeConfig(mode="header"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="not a host",
                route=GatewayRouteModeConfig(mode="uri"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="gateway.opensandbox.io:8080",
                route=GatewayRouteModeConfig(mode="header"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="10.0.0.1:70000",
                route=GatewayRouteModeConfig(mode="header"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="ftp://gateway.opensandbox.io",
                route=GatewayRouteModeConfig(mode="header"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="http://",
                route=GatewayRouteModeConfig(mode="header"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="http://user:pass@gateway.opensandbox.io",
                route=GatewayRouteModeConfig(mode="header"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="http://gateway.opensandbox.io:8080",
                route=GatewayRouteModeConfig(mode="header"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="10.0.0.1:0",
                route=GatewayRouteModeConfig(mode="uri"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="10.0.0.1:abc",
                route=GatewayRouteModeConfig(mode="uri"),
            ),
        )
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="http://[::1]",
                route=GatewayRouteModeConfig(mode="header"),
            ),
        )
    cfg = IngressConfig(
        mode="gateway",
        gateway=GatewayConfig(
            address="gateway.opensandbox.io",
            route=GatewayRouteModeConfig(mode="uri"),
        ),
    )
    assert cfg.gateway.address == "gateway.opensandbox.io"
    cfg_ip = IngressConfig(
        mode="gateway",
        gateway=GatewayConfig(
            address="10.0.0.1",
            route=GatewayRouteModeConfig(mode="header"),
        ),
    )
    assert cfg_ip.gateway.address == "10.0.0.1"
    cfg_ip_port = IngressConfig(
        mode="gateway",
        gateway=GatewayConfig(
            address="10.0.0.1:8080",
            route=GatewayRouteModeConfig(mode="header"),
        ),
    )
    assert cfg_ip_port.gateway.address == "10.0.0.1:8080"


def test_gateway_address_allows_scheme_less_defaults():
    cfg = IngressConfig(
        mode="gateway",
        gateway=GatewayConfig(
            address="*.example.com",
            route=GatewayRouteModeConfig(mode="wildcard"),
        ),
    )
    assert cfg.gateway.address == "*.example.com"
    with pytest.raises(ValueError):
        IngressConfig(
            mode="gateway",
            gateway=GatewayConfig(
                address="https://*.example.com",
                route=GatewayRouteModeConfig(mode="wildcard"),
            ),
        )


def test_direct_mode_rejects_gateway_block():
    with pytest.raises(ValueError):
        IngressConfig(
            mode="direct",
            gateway=GatewayConfig(
                address="gateway.opensandbox.io",
                route=GatewayRouteModeConfig(mode="header"),
            ),
        )


def test_docker_runtime_rejects_gateway_ingress():
    server_cfg = ServerConfig()
    runtime_cfg = RuntimeConfig(type="docker", execd_image="busybox:latest")
    with pytest.raises(ValueError):
        AppConfig(
            server=server_cfg,
            runtime=runtime_cfg,
            ingress=IngressConfig(
                mode="gateway",
                gateway=GatewayConfig(
                    address="gateway.opensandbox.io",
                    route=GatewayRouteModeConfig(mode="header"),
                ),
            ),
        )
    # direct remains valid
    app_cfg = AppConfig(
        server=server_cfg,
        runtime=runtime_cfg,
        ingress=IngressConfig(mode="direct"),
    )
    assert app_cfg.ingress.mode == "direct"


# ============================================================================
# StorageConfig Tests
# ============================================================================


def test_storage_config_defaults():
    """StorageConfig should default to empty allowed_host_paths list."""
    cfg = StorageConfig()
    assert cfg.allowed_host_paths == []


def test_storage_config_with_paths():
    """StorageConfig should accept explicit allowed_host_paths."""
    cfg = StorageConfig(allowed_host_paths=["/data/opensandbox", "/tmp/sandbox"])
    assert cfg.allowed_host_paths == ["/data/opensandbox", "/tmp/sandbox"]


def test_app_config_default_storage():
    """AppConfig should include default StorageConfig when not specified."""
    server_cfg = ServerConfig()
    runtime_cfg = RuntimeConfig(type="docker", execd_image="busybox:latest")
    app_cfg = AppConfig(server=server_cfg, runtime=runtime_cfg)
    assert app_cfg.storage is not None
    assert app_cfg.storage.allowed_host_paths == []


def test_load_config_with_storage_block(tmp_path, monkeypatch):
    """StorageConfig should be loaded from [storage] TOML block."""
    _reset_config(monkeypatch)
    toml = textwrap.dedent(
        """
        [server]
        host = "127.0.0.1"
        port = 9000

        [runtime]
        type = "docker"
        execd_image = "ghcr.io/opensandbox/platform:test"

        [router]
        domain = "opensandbox.io"

        [storage]
        allowed_host_paths = ["/data/opensandbox", "/tmp/sandbox"]
        """
    )
    config_path = tmp_path / "config.toml"
    config_path.write_text(toml)

    loaded = config_module.load_config(config_path)
    assert loaded.storage is not None
    assert loaded.storage.allowed_host_paths == ["/data/opensandbox", "/tmp/sandbox"]


def test_load_config_without_storage_block_uses_defaults(tmp_path, monkeypatch):
    """AppConfig should use default StorageConfig when [storage] is not in TOML."""
    _reset_config(monkeypatch)
    toml = textwrap.dedent(
        """
        [server]
        host = "127.0.0.1"
        port = 9000

        [runtime]
        type = "docker"
        execd_image = "ghcr.io/opensandbox/platform:test"

        [router]
        domain = "opensandbox.io"
        """
    )
    config_path = tmp_path / "config.toml"
    config_path.write_text(toml)

    loaded = config_module.load_config(config_path)
    assert loaded.storage is not None
    assert loaded.storage.allowed_host_paths == []


# ============================================================================
# SecureRuntimeConfig Tests
# ============================================================================


def test_secure_runtime_empty_type_is_valid():
    """Empty type (default runc) should be valid."""
    cfg = config_module.SecureRuntimeConfig(type="")
    assert cfg.type == ""
    assert cfg.docker_runtime is None
    assert cfg.k8s_runtime_class is None


def test_secure_runtime_gvisor_with_docker_runtime_is_valid():
    """gVisor with docker_runtime should be valid."""
    cfg = config_module.SecureRuntimeConfig(
        type="gvisor",
        docker_runtime="runsc",
        k8s_runtime_class="gvisor",
    )
    assert cfg.type == "gvisor"
    assert cfg.docker_runtime == "runsc"
    assert cfg.k8s_runtime_class == "gvisor"


def test_secure_runtime_gvisor_with_k8s_runtime_class_is_valid():
    """gVisor with only k8s_runtime_class should be valid."""
    cfg = config_module.SecureRuntimeConfig(
        type="gvisor",
        docker_runtime=None,
        k8s_runtime_class="gvisor",
    )
    assert cfg.type == "gvisor"
    assert cfg.docker_runtime is None
    assert cfg.k8s_runtime_class == "gvisor"


def test_secure_runtime_kata_with_runtimes_is_valid():
    """Kata with both runtimes should be valid."""
    cfg = config_module.SecureRuntimeConfig(
        type="kata",
        docker_runtime="kata-runtime",
        k8s_runtime_class="kata-qemu",
    )
    assert cfg.type == "kata"
    assert cfg.docker_runtime == "kata-runtime"
    assert cfg.k8s_runtime_class == "kata-qemu"


def test_secure_runtime_firecracker_with_k8s_runtime_is_valid():
    """Firecracker with k8s_runtime_class should be valid."""
    cfg = config_module.SecureRuntimeConfig(
        type="firecracker",
        docker_runtime="",
        k8s_runtime_class="kata-fc",
    )
    assert cfg.type == "firecracker"
    assert cfg.docker_runtime == ""
    assert cfg.k8s_runtime_class == "kata-fc"


def test_secure_runtime_firecracker_without_k8s_runtime_raises_error():
    """Firecracker without k8s_runtime_class should raise error."""
    with pytest.raises(ValueError) as exc:
        config_module.SecureRuntimeConfig(
            type="firecracker",
            docker_runtime="",
            k8s_runtime_class=None,
        )
    assert "k8s_runtime_class" in str(exc.value).lower()


def test_secure_runtime_gvisor_without_any_runtime_raises_error():
    """gVisor without any runtime configured should raise error."""
    with pytest.raises(ValueError) as exc:
        config_module.SecureRuntimeConfig(
            type="gvisor",
            docker_runtime=None,
            k8s_runtime_class=None,
        )
    assert "docker_runtime" in str(exc.value).lower() or "k8s_runtime_class" in str(exc.value).lower()


def test_secure_runtime_kata_without_any_runtime_raises_error():
    """Kata without any runtime configured should raise error."""
    with pytest.raises(ValueError) as exc:
        config_module.SecureRuntimeConfig(
            type="kata",
            docker_runtime=None,
            k8s_runtime_class=None,
        )
    assert "docker_runtime" in str(exc.value).lower() or "k8s_runtime_class" in str(exc.value).lower()


def test_secure_runtime_invalid_type_raises_error():
    """Invalid type should raise ValidationError."""
    with pytest.raises(Exception):
        config_module.SecureRuntimeConfig(type="invalid_runtime")


def test_app_config_with_secure_runtime():
    """AppConfig should parse secure_runtime section."""
    cfg = AppConfig(
        runtime={"type": "docker", "execd_image": "execd:v1"},
        secure_runtime={
            "type": "gvisor",
            "docker_runtime": "runsc",
            "k8s_runtime_class": "gvisor",
        },
    )
    assert cfg.secure_runtime is not None
    assert cfg.secure_runtime.type == "gvisor"
    assert cfg.secure_runtime.docker_runtime == "runsc"


def test_app_config_without_secure_runtime():
    """AppConfig without secure_runtime should have None."""
    cfg = AppConfig(
        runtime={"type": "docker", "execd_image": "execd:v1"},
    )
    assert cfg.secure_runtime is None


def test_load_config_with_secure_runtime(tmp_path, monkeypatch):
    """SecureRuntimeConfig should be loaded from [secure_runtime] TOML block."""
    _reset_config(monkeypatch)
    toml = textwrap.dedent(
        """
        [server]
        host = "127.0.0.1"
        port = 9000

        [runtime]
        type = "docker"
        execd_image = "ghcr.io/opensandbox/platform:test"

        [secure_runtime]
        type = "gvisor"
        docker_runtime = "runsc"
        k8s_runtime_class = "gvisor"
        """
    )
    config_path = tmp_path / "config.toml"
    config_path.write_text(toml)

    loaded = config_module.load_config(config_path)
    assert loaded.secure_runtime is not None
    assert loaded.secure_runtime.type == "gvisor"
    assert loaded.secure_runtime.docker_runtime == "runsc"
    assert loaded.secure_runtime.k8s_runtime_class == "gvisor"


def test_docker_runtime_with_firecracker_raises_error():
    """Docker runtime with Firecracker secure runtime should raise error.

    Firecracker (kata-fc) is only available as a Kubernetes RuntimeClass,
    not as a Docker OCI runtime. This test prevents the silent fallback
    to runc which would bypass the intended microVM isolation.
    """
    with pytest.raises(ValueError) as exc:
        AppConfig(
            runtime={"type": "docker", "execd_image": "execd:v1"},
            secure_runtime={
                "type": "firecracker",
                "k8s_runtime_class": "kata-fc",
            },
        )
    assert "firecracker" in str(exc.value).lower()
    assert "kubernetes" in str(exc.value).lower()


def test_kubernetes_runtime_with_firecracker_is_valid():
    """Kubernetes runtime with Firecracker should be valid."""
    cfg = AppConfig(
        runtime={"type": "kubernetes", "execd_image": "execd:v1"},
        kubernetes={"namespace": "default"},
        secure_runtime={
            "type": "firecracker",
            "k8s_runtime_class": "kata-fc",
        },
    )
    assert cfg.runtime.type == "kubernetes"
    assert cfg.secure_runtime is not None
    assert cfg.secure_runtime.type == "firecracker"
    assert cfg.secure_runtime.k8s_runtime_class == "kata-fc"


def test_egress_config_mode_literal():
    assert EgressConfig(image="opensandbox/egress:v1").mode == EGRESS_MODE_DNS
    cfg = EgressConfig(image="opensandbox/egress:v1", mode=EGRESS_MODE_DNS_NFT)
    assert cfg.mode == EGRESS_MODE_DNS_NFT


================================================
FILE: server/tests/test_docker_endpoint.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from unittest.mock import MagicMock, patch

from src.services.constants import (
    OPEN_SANDBOX_EGRESS_AUTH_HEADER,
    SANDBOX_EMBEDDING_PROXY_PORT_LABEL,
    SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY,
)
from src.services.docker import DockerSandboxService
from src.config import AppConfig, RuntimeConfig, DockerConfig, ServerConfig

@pytest.fixture
def mock_docker_service():
    """Create a DockerSandboxService with mocked docker client."""
    # Setup base config
    config = AppConfig(
        server=ServerConfig(port=8080, host="0.0.0.0"),
        runtime=RuntimeConfig(type="docker", execd_image="test/execd:latest"),
        router=None,
        docker=DockerConfig(network_mode="bridge"),
    )

    with patch("docker.from_env") as mock_docker:
        mock_client = MagicMock()
        mock_docker.return_value = mock_client

        # Initialize service
        service = DockerSandboxService(config=config)
        # Inject the mock client directly to ensure we control it
        service.docker_client = mock_client

        yield service, mock_client

def test_get_endpoint_host_mode(mock_docker_service):
    service, mock_client = mock_docker_service
    service.app_config.docker.network_mode = "host"
    service.network_mode = "host"

    mock_container = MagicMock()
    mock_container.attrs = {"State": {"Running": True}}
    mock_client.containers.list.return_value = [mock_container]

    with patch("src.services.sandbox_service.SandboxService._resolve_bind_ip", return_value="10.0.0.1"):
        endpoint = service.get_endpoint("sbx-123", 8080, resolve_internal=False)
        assert endpoint.endpoint == "10.0.0.1:8080"

    endpoint_internal = service.get_endpoint("sbx-123", 8080, resolve_internal=True)
    assert endpoint_internal.endpoint == "127.0.0.1:8080"


def test_get_endpoint_bridge_http_port(mock_docker_service):
    service, mock_client = mock_docker_service
    service.app_config.docker.network_mode = "bridge"
    service.network_mode = "bridge"

    labels = {
        "opensandbox.io/embedding-proxy-port": "50002",
        "opensandbox.io/http-port": "50001",
    }
    mock_container = MagicMock()
    mock_container.attrs = {
        "State": {"Running": True},
        "Config": {"Labels": labels},
        "NetworkSettings": {"IPAddress": "172.17.0.5"},
    }
    mock_client.containers.list.return_value = [mock_container]

    with patch("src.services.sandbox_service.SandboxService._resolve_bind_ip", return_value="192.168.1.100"):
        endpoint = service.get_endpoint("sbx-123", 8080, resolve_internal=False)

    assert endpoint.endpoint == "192.168.1.100:50001"


def test_get_endpoint_bridge_other_port_via_execd(mock_docker_service):
    service, mock_client = mock_docker_service
    service.app_config.docker.network_mode = "bridge"
    service.network_mode = "bridge"

    labels = {
        "opensandbox.io/embedding-proxy-port": "50002",
        "opensandbox.io/http-port": "50001",
    }
    mock_container = MagicMock()
    mock_container.attrs = {
        "State": {"Running": True},
        "Config": {"Labels": labels},
        "NetworkSettings": {"IPAddress": "172.17.0.5"},
    }
    mock_client.containers.list.return_value = [mock_container]

    with patch("src.services.sandbox_service.SandboxService._resolve_bind_ip", return_value="192.168.1.100"):
        endpoint = service.get_endpoint("sbx-123", 6000, resolve_internal=False)

    assert endpoint.endpoint == "192.168.1.100:50002/proxy/6000"


def test_get_endpoint_bridge_egress_port_includes_auth_header(mock_docker_service):
    service, mock_client = mock_docker_service
    service.app_config.docker.network_mode = "bridge"
    service.network_mode = "bridge"

    labels = {
        "opensandbox.io/embedding-proxy-port": "50002",
        "opensandbox.io/http-port": "50001",
        "opensandbox.io/egress-auth-token": "egress-token",
    }
    mock_container = MagicMock()
    mock_container.attrs = {
        "State": {"Running": True},
        "Config": {"Labels": labels},
        "NetworkSettings": {"IPAddress": "172.17.0.5"},
    }
    mock_client.containers.list.return_value = [mock_container]

    with patch("src.services.sandbox_service.SandboxService._resolve_bind_ip", return_value="192.168.1.100"):
        endpoint = service.get_endpoint("sbx-123", 18080, resolve_internal=False)

    assert endpoint.endpoint == "192.168.1.100:50002/proxy/18080"
    assert endpoint.headers == {OPEN_SANDBOX_EGRESS_AUTH_HEADER: "egress-token"}


def test_get_endpoint_bridge_non_egress_port_still_includes_instance_auth_header(
    mock_docker_service,
):
    service, mock_client = mock_docker_service
    service.app_config.docker.network_mode = "bridge"
    service.network_mode = "bridge"

    labels = {
        SANDBOX_EMBEDDING_PROXY_PORT_LABEL: "50002",
        SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY: "egress-token",
    }
    mock_container = MagicMock()
    mock_container.attrs = {
        "State": {"Running": True},
        "Config": {"Labels": labels},
        "NetworkSettings": {"IPAddress": "172.17.0.5"},
    }
    mock_client.containers.list.return_value = [mock_container]

    with patch("src.services.sandbox_service.SandboxService._resolve_bind_ip", return_value="192.168.1.100"):
        endpoint = service.get_endpoint("sbx-123", 44772, resolve_internal=False)

    assert endpoint.endpoint == "192.168.1.100:50002/proxy/44772"
    assert endpoint.headers == {OPEN_SANDBOX_EGRESS_AUTH_HEADER: "egress-token"}

def test_get_endpoint_bridge_internal_resolution(mock_docker_service):
    service, mock_client = mock_docker_service
    service.app_config.docker.network_mode = "bridge"
    service.network_mode = "bridge"

    mock_container = MagicMock()
    mock_container.attrs = {
        "State": {"Running": True},
        "NetworkSettings": {"IPAddress": "10.0.0.5"},
    }
    mock_client.containers.list.return_value = [mock_container]

    endpoint = service.get_endpoint("sbx-123", 8080, resolve_internal=True)
    assert endpoint.endpoint == "10.0.0.5:8080"


def test_get_endpoint_bridge_internal_resolution_with_egress_sidecar_falls_back_to_host_mapped_endpoint(
    mock_docker_service,
):
    service, mock_client = mock_docker_service
    service.app_config.docker.network_mode = "bridge"
    service.network_mode = "bridge"

    labels = {
        SANDBOX_EMBEDDING_PROXY_PORT_LABEL: "50002",
        SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY: "egress-token",
    }
    mock_container = MagicMock()
    mock_container.attrs = {
        "State": {"Running": True},
        "Config": {"Labels": labels},
        "NetworkSettings": {"IPAddress": ""},
    }
    mock_client.containers.list.return_value = [mock_container]

    endpoint = service.get_endpoint("sbx-123", 18080, resolve_internal=True)

    assert endpoint.endpoint == "127.0.0.1:50002/proxy/18080"
    assert endpoint.headers == {OPEN_SANDBOX_EGRESS_AUTH_HEADER: "egress-token"}


def test_get_endpoint_bridge_internal_resolution_with_egress_sidecar_ignores_container_ip(
    mock_docker_service,
):
    service, mock_client = mock_docker_service
    service.app_config.docker.network_mode = "bridge"
    service.network_mode = "bridge"

    labels = {
        SANDBOX_EMBEDDING_PROXY_PORT_LABEL: "50002",
        SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY: "egress-token",
    }
    mock_container = MagicMock()
    mock_container.attrs = {
        "State": {"Running": True},
        "Config": {"Labels": labels},
        "NetworkSettings": {"IPAddress": "10.0.0.5"},
    }
    mock_client.containers.list.return_value = [mock_container]

    endpoint = service.get_endpoint("sbx-123", 18080, resolve_internal=True)

    assert endpoint.endpoint == "127.0.0.1:50002/proxy/18080"
    assert endpoint.headers == {OPEN_SANDBOX_EGRESS_AUTH_HEADER: "egress-token"}


def test_get_endpoint_bridge_internal_resolution_with_egress_sidecar_uses_proxy_host_not_eip(
    mock_docker_service,
):
    service, mock_client = mock_docker_service
    service.app_config.server.host = "0.0.0.0"
    service.app_config.server.eip = "203.0.113.10"
    service.app_config.docker.network_mode = "bridge"
    service.network_mode = "bridge"

    labels = {
        SANDBOX_EMBEDDING_PROXY_PORT_LABEL: "50002",
        SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY: "egress-token",
    }
    mock_container = MagicMock()
    mock_container.attrs = {
        "State": {"Running": True},
        "Config": {"Labels": labels},
        "NetworkSettings": {"IPAddress": ""},
    }
    mock_client.containers.list.return_value = [mock_container]

    endpoint = service.get_endpoint("sbx-123", 18080, resolve_internal=True)

    assert endpoint.endpoint == "127.0.0.1:50002/proxy/18080"
    assert endpoint.headers == {OPEN_SANDBOX_EGRESS_AUTH_HEADER: "egress-token"}


def test_get_endpoint_bridge_uses_docker_host_ip_when_server_in_container():
    """When server runs in container (host=0.0.0.0), endpoint uses [docker].host_ip."""
    config = AppConfig(
        server=ServerConfig(port=8080, host="0.0.0.0"),
        runtime=RuntimeConfig(type="docker", execd_image="test/execd:latest"),
        router=None,
        docker=DockerConfig(network_mode="bridge", host_ip="10.57.1.91"),
    )
    with patch("docker.from_env") as mock_docker:
        mock_client = MagicMock()
        mock_docker.return_value = mock_client
        service = DockerSandboxService(config=config)
        service.docker_client = mock_client

    labels = {
        "opensandbox.io/embedding-proxy-port": "40109",
        "opensandbox.io/http-port": "50001",
    }
    mock_container = MagicMock()
    mock_container.attrs = {
        "State": {"Running": True},
        "Config": {"Labels": labels},
        "NetworkSettings": {"IPAddress": "172.17.0.5"},
    }
    mock_client.containers.list.return_value = [mock_container]

    with patch("src.services.docker._running_inside_docker_container", return_value=True):
        endpoint = service.get_endpoint("sbx-123", 44772, resolve_internal=False)

    assert endpoint.endpoint == "10.57.1.91:40109/proxy/44772"


# ---------------------------------------------------------------------------
# User-defined network endpoint tests
# ---------------------------------------------------------------------------

def test_get_endpoint_user_defined_network_external(mock_docker_service):
    """External endpoint for a user-defined network uses host port bindings, same as bridge."""
    service, mock_client = mock_docker_service
    service.app_config.docker.network_mode = "my-app-net"
    service.network_mode = "my-app-net"

    labels = {
        "opensandbox.io/embedding-proxy-port": "51000",
        "opensandbox.io/http-port": "51001",
    }
    mock_container = MagicMock()
    mock_container.attrs = {
        "State": {"Running": True},
        "Config": {"Labels": labels},
        "NetworkSettings": {
            "IPAddress": "",
            "Networks": {"my-app-net": {"IPAddress": "192.168.100.5"}},
        },
    }
    mock_client.containers.list.return_value = [mock_container]

    with patch("src.services.sandbox_service.SandboxService._resolve_bind_ip", return_value="10.0.1.1"):
        ep_http = service.get_endpoint("sbx-123", 8080, resolve_internal=False)
        ep_proxy = service.get_endpoint("sbx-123", 5000, resolve_internal=False)

    assert ep_http.endpoint == "10.0.1.1:51001"
    assert ep_proxy.endpoint == "10.0.1.1:51000/proxy/5000"


def test_get_endpoint_user_defined_network_internal_prefers_configured_network(mock_docker_service):
    """resolve_internal=True on a user-defined network returns the IP from that specific network."""
    service, mock_client = mock_docker_service
    service.app_config.docker.network_mode = "my-app-net"
    service.network_mode = "my-app-net"

    mock_container = MagicMock()
    mock_container.attrs = {
        "State": {"Running": True},
        "NetworkSettings": {
            # top-level IPAddress is empty for user-defined networks
            "IPAddress": "",
            "Networks": {
                "bridge": {"IPAddress": "172.17.0.3"},
                "my-app-net": {"IPAddress": "192.168.100.5"},
            },
        },
    }
    mock_client.containers.list.return_value = [mock_container]

    endpoint = service.get_endpoint("sbx-123", 8080, resolve_internal=True)

    # Must use the IP from the configured network, not the default bridge entry
    assert endpoint.endpoint == "192.168.100.5:8080"


def test_extract_bridge_ip_falls_back_when_named_network_ip_missing(mock_docker_service):
    """_extract_bridge_ip falls back to any available network IP when the named entry is empty."""
    service, _ = mock_docker_service
    service.network_mode = "my-app-net"

    mock_container = MagicMock()
    mock_container.attrs = {
        "NetworkSettings": {
            "IPAddress": "",
            "Networks": {
                "my-app-net": {"IPAddress": ""},   # empty — simulate container still attaching
                "bridge": {"IPAddress": "172.17.0.9"},
            },
        },
    }

    ip = service._extract_bridge_ip(mock_container)
    assert ip == "172.17.0.9"


================================================
FILE: server/tests/test_docker_path_fix.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import posixpath
from unittest.mock import MagicMock, patch
from src.services.docker import DockerSandboxService, EXECED_INSTALL_PATH, BOOTSTRAP_PATH
from src.config import AppConfig, RuntimeConfig, ServerConfig

def _app_config() -> AppConfig:
    return AppConfig(
        server=ServerConfig(),
        runtime=RuntimeConfig(type="docker", execd_image="ghcr.io/opensandbox/platform:latest"),
    )

def test_container_internal_paths_use_posix_style():
    """Verify that container internal paths always use forward slashes."""
    assert "\\" not in EXECED_INSTALL_PATH
    assert "/" in EXECED_INSTALL_PATH
    assert "\\" not in BOOTSTRAP_PATH
    assert "/" in BOOTSTRAP_PATH
    assert EXECED_INSTALL_PATH == "/opt/opensandbox/execd"
    assert BOOTSTRAP_PATH == "/opt/opensandbox/bootstrap.sh"

@patch("src.services.docker.docker")
def test_copy_execd_to_container_uses_posix_dirname(mock_docker):
    """Verify _copy_execd_to_container uses posixpath for target directory."""
    service = DockerSandboxService(config=_app_config())
    mock_container = MagicMock()
    
    # Mock _fetch_execd_archive and _ensure_directory
    with patch.object(service, "_fetch_execd_archive", return_value=b"fake-archive"), \
         patch.object(service, "_ensure_directory") as mock_ensure_dir, \
         patch.object(service, "_docker_operation"):
        
        service._copy_execd_to_container(mock_container, "test-sandbox")
        
        # The target_parent should be posixpath.dirname(EXECED_INSTALL_PATH)
        expected_parent = posixpath.dirname(EXECED_INSTALL_PATH.rstrip("/")) or "/"
        mock_ensure_dir.assert_called_once_with(mock_container, expected_parent, "test-sandbox")

@patch("src.services.docker.docker")
def test_install_bootstrap_script_uses_posix_dirname(mock_docker):
    """Verify _install_bootstrap_script uses posixpath for script directory."""
    service = DockerSandboxService(config=_app_config())
    mock_container = MagicMock()
    
    with patch.object(service, "_ensure_directory") as mock_ensure_dir, \
         patch.object(service, "_docker_operation"):
        
        service._install_bootstrap_script(mock_container, "test-sandbox")
        
        # The script_dir should be posixpath.dirname(BOOTSTRAP_PATH)
        expected_dir = posixpath.dirname(BOOTSTRAP_PATH)
        mock_ensure_dir.assert_called_once_with(mock_container, expected_dir, "test-sandbox")


================================================
FILE: server/tests/test_docker_service.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from datetime import datetime, timedelta, timezone
from typing import Any, cast
from unittest.mock import AsyncMock, MagicMock, patch

from docker.errors import DockerException, NotFound as DockerNotFound
import pytest
from fastapi import HTTPException, status
from pydantic import ValidationError

from src.config import (
    AppConfig,
    EGRESS_MODE_DNS,
    EgressConfig,
    RuntimeConfig,
    ServerConfig,
    StorageConfig,
    IngressConfig,
)
from src.services.constants import EGRESS_MODE_ENV, OPENSANDBOX_EGRESS_TOKEN
from src.services.constants import (
    SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY,
    SANDBOX_EXPIRES_AT_LABEL,
    SANDBOX_ID_LABEL,
    SANDBOX_MANUAL_CLEANUP_LABEL,
    SANDBOX_OSSFS_MOUNTS_LABEL,
    SandboxErrorCodes,
)
from src.services.docker import DockerSandboxService, PendingSandbox
from src.services.helpers import parse_memory_limit, parse_nano_cpus, parse_timestamp
from src.api.schema import (
    CreateSandboxRequest,
    CreateSandboxResponse,
    Host,
    ImageSpec,
    NetworkPolicy,
    ListSandboxesRequest,
    OSSFS,
    PVC,
    ResourceLimits,
    Sandbox,
    SandboxFilter,
    SandboxStatus,
    Volume,
)


def _app_config() -> AppConfig:
    return AppConfig(
        server=ServerConfig(),
        runtime=RuntimeConfig(type="docker", execd_image="ghcr.io/opensandbox/platform:latest"),
        ingress=IngressConfig(mode="direct"),
    )


def test_parse_memory_limit_handles_units():
    assert parse_memory_limit("512Mi") == 512 * 1024 * 1024
    assert parse_memory_limit("1G") == 1_000_000_000
    assert parse_memory_limit("2gi") == 2 * 1024**3
    assert parse_memory_limit("invalid") is None


def test_parse_nano_cpus():
    assert parse_nano_cpus("500m") == 500_000_000
    assert parse_nano_cpus("2") == 2_000_000_000
    assert parse_nano_cpus("bad") is None


def test_parse_timestamp_defaults_on_invalid():
    ts = parse_timestamp("0001-01-01T00:00:00Z")
    assert ts.tzinfo is not None
    future = parse_timestamp("2024-01-01T00:00:00Z")
    assert future.year == 2024


def test_env_allows_empty_string_and_skips_none():
    # Use base config helper
    DockerSandboxService(config=_app_config())
    # Build request with mixed env values
    req = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={"FOO": "bar", "EMPTY": "", "NONE": None},
        metadata={},
        entrypoint=["python"],
    )
    # Validate env handling
    env_dict = req.env or {}
    environment = []
    for key, value in env_dict.items():
        if value is None:
            continue
        environment.append(f"{key}={value}")

    assert "FOO=bar" in environment
    assert "EMPTY=" in environment  # empty string preserved
    # None should be skipped
    assert all(not item.startswith("NONE=") for item in environment)


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_create_sandbox_applies_security_defaults(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_client.api.create_host_config.return_value = {
        "security_opt": ["no-new-privileges:true"],
        "cap_drop": _app_config().docker.drop_capabilities,
        "pids_limit": _app_config().docker.pids_limit,
    }
    mock_client.api.create_container.return_value = {"Id": "cid"}
    mock_client.containers.get.return_value = MagicMock()
    mock_docker.from_env.return_value = mock_client

    service = DockerSandboxService(config=_app_config())
    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={},
        entrypoint=["python"],
    )

    with (
        patch.object(service, "_ensure_image_available"),
        patch.object(service, "_prepare_sandbox_runtime"),
    ):
        await service.create_sandbox(request)

    host_config = mock_client.api.create_container.call_args.kwargs["host_config"]
    assert "no-new-privileges:true" in host_config.get("security_opt", [])
    assert host_config.get("cap_drop") == service.app_config.docker.drop_capabilities
    assert host_config.get("pids_limit") == service.app_config.docker.pids_limit


@pytest.mark.parametrize(
    "runtime_exc, expected_status, expect_wrapped_error",
    [
        (
            RuntimeError("tarfile error"),
            status.HTTP_500_INTERNAL_SERVER_ERROR,
            True,
        ),
        (
            HTTPException(
                status_code=status.HTTP_409_CONFLICT,
                detail={"code": "CONFLICT", "message": "conflict error"},
            ),
            status.HTTP_409_CONFLICT,
            False,
        ),
    ],
)
@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_prepare_runtime_failure_triggers_cleanup(
    mock_docker, runtime_exc, expected_status, expect_wrapped_error
):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_client.api.create_container.return_value = {"Id": "cid"}
    mock_container = MagicMock()
    mock_client.containers.get.return_value = mock_container
    mock_docker.from_env.return_value = mock_client

    service = DockerSandboxService(config=_app_config())
    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={},
        entrypoint=["python"],
    )

    with (
        patch.object(service, "_ensure_image_available"),
        patch.object(service, "_prepare_sandbox_runtime", side_effect=runtime_exc),
    ):
        with pytest.raises(HTTPException) as exc:
            await service.create_sandbox(request)

    mock_container.remove.assert_called_with(force=True)

    assert exc.value.status_code == expected_status

    if expect_wrapped_error:
        assert str(runtime_exc) in str(exc.value.detail["message"])
    else:
        assert exc.value.detail["message"] == runtime_exc.detail["message"]


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_create_sandbox_rejects_invalid_metadata(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_docker.from_env.return_value = mock_client

    service = DockerSandboxService(config=_app_config())

    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={"Bad Key": "ok"},  # space is invalid for label key
        entrypoint=["python"],
    )

    with pytest.raises(HTTPException) as exc:
        await service.create_sandbox(request)

    assert exc.value.status_code == status.HTTP_400_BAD_REQUEST
    assert exc.value.detail["code"] == SandboxErrorCodes.INVALID_METADATA_LABEL
    mock_client.containers.create.assert_not_called()


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_create_sandbox_rejects_timeout_above_configured_maximum(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_docker.from_env.return_value = mock_client

    config = _app_config()
    config.server.max_sandbox_timeout_seconds = 3600
    service = DockerSandboxService(config=config)

    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=7200,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={},
        entrypoint=["python"],
    )

    with pytest.raises(HTTPException) as exc:
        await service.create_sandbox(request)

    assert exc.value.status_code == status.HTTP_400_BAD_REQUEST
    assert exc.value.detail["code"] == SandboxErrorCodes.INVALID_PARAMETER
    assert "configured maximum of 3600s" in exc.value.detail["message"]


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_create_sandbox_requires_entrypoint(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_docker.from_env.return_value = mock_client

    service = DockerSandboxService(config=_app_config())

    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={},
        entrypoint=["python"],
    )
    request.entrypoint = []

    with pytest.raises(HTTPException) as exc:
        await service.create_sandbox(request)

    assert exc.value.status_code == status.HTTP_400_BAD_REQUEST
    assert exc.value.detail["code"] == SandboxErrorCodes.INVALID_ENTRYPOINT
    mock_client.containers.create.assert_not_called()


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_network_policy_rejected_on_host_mode(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_docker.from_env.return_value = mock_client

    cfg = _app_config()
    cfg.docker.network_mode = "host"
    cfg.egress = EgressConfig(image="egress:latest")
    service = DockerSandboxService(config=cfg)

    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={},
        entrypoint=["python"],
        networkPolicy=NetworkPolicy(default_action="deny", egress=[]),
    )

    with pytest.raises(HTTPException) as exc:
        await service.create_sandbox(request)

    assert exc.value.status_code == status.HTTP_400_BAD_REQUEST
    assert exc.value.detail["code"] == SandboxErrorCodes.INVALID_PARAMETER


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_network_policy_requires_egress_image(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_docker.from_env.return_value = mock_client

    cfg = _app_config()
    cfg.docker.network_mode = "bridge"
    cfg.egress = None
    service = DockerSandboxService(config=cfg)

    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={},
        entrypoint=["python"],
        networkPolicy=NetworkPolicy(default_action="deny", egress=[]),
    )

    with pytest.raises(HTTPException) as exc:
        await service.create_sandbox(request)

    assert exc.value.status_code == status.HTTP_400_BAD_REQUEST
    assert exc.value.detail["code"] == SandboxErrorCodes.INVALID_PARAMETER


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_egress_sidecar_injection_and_capabilities(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []

    def host_cfg_side_effect(**kwargs):
        return kwargs

    mock_client.api.create_host_config.side_effect = host_cfg_side_effect
    mock_client.api.create_container.side_effect = [
        {"Id": "sidecar-id"},
        {"Id": "main-id"},
    ]
    mock_client.containers.get.side_effect = [MagicMock(id="sidecar-id"), MagicMock(id="main-id")]
    mock_docker.from_env.return_value = mock_client

    cfg = _app_config()
    cfg.docker.network_mode = "bridge"
    cfg.egress = EgressConfig(image="egress:latest")
    service = DockerSandboxService(config=cfg)

    req = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={},
        entrypoint=["python"],
        networkPolicy=NetworkPolicy(default_action="deny", egress=[]),
    )

    with (
        patch("src.services.docker.generate_egress_token", return_value="egress-token"),
        patch.object(service, "_allocate_distinct_host_ports", return_value=(44772, 8080)),
        patch.object(service, "_ensure_image_available"),
        patch.object(service, "_prepare_sandbox_runtime"),
    ):
        await service.create_sandbox(req)

    assert len(mock_client.api.create_container.call_args_list) == 2
    sidecar_call = mock_client.api.create_container.call_args_list[0]
    main_call = mock_client.api.create_container.call_args_list[1]
    sidecar_kwargs = sidecar_call.kwargs
    main_kwargs = main_call.kwargs

    # Sidecar host config should have NET_ADMIN and port bindings
    assert "NET_ADMIN" in sidecar_kwargs["host_config"]["cap_add"]
    assert "44772" in sidecar_kwargs["host_config"]["port_bindings"]
    assert "8080" in sidecar_kwargs["host_config"]["port_bindings"]

    # Main container should share sidecar netns, drop NET_ADMIN, and have no port bindings
    assert main_kwargs["host_config"]["network_mode"] == "container:sidecar-id"
    assert "NET_ADMIN" in set(main_kwargs["host_config"].get("cap_drop") or [])
    assert "port_bindings" not in main_kwargs["host_config"]

    # Main container labels should carry host port info
    labels = main_kwargs["labels"]
    assert labels.get("opensandbox.io/embedding-proxy-port")
    assert labels.get("opensandbox.io/http-port")
    assert labels[SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY] == "egress-token"

    sidecar_env = sidecar_kwargs["environment"]
    assert f"{OPENSANDBOX_EGRESS_TOKEN}=egress-token" in sidecar_env
    assert f"{EGRESS_MODE_ENV}={EGRESS_MODE_DNS}" in sidecar_env


# ---------------------------------------------------------------------------
# User-defined network tests
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_network_policy_rejected_on_user_defined_network(mock_docker):
    """networkPolicy must be rejected when network_mode is a user-defined named network."""
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_docker.from_env.return_value = mock_client

    cfg = _app_config()
    cfg.docker.network_mode = "my-custom-net"
    cfg.egress = EgressConfig(image="egress:latest")
    service = DockerSandboxService(config=cfg)

    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={},
        entrypoint=["python"],
        networkPolicy=NetworkPolicy(default_action="deny", egress=[]),
    )

    with pytest.raises(HTTPException) as exc:
        await service.create_sandbox(request)

    assert exc.value.status_code == status.HTTP_400_BAD_REQUEST
    assert exc.value.detail["code"] == SandboxErrorCodes.INVALID_PARAMETER
    assert "my-custom-net" in exc.value.detail["message"]


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_create_sandbox_fails_when_user_defined_network_not_found(mock_docker):
    """create_sandbox raises 400 with a clear message when the named network does not exist."""
    from docker.errors import NotFound as DockerNotFound

    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_client.networks.get.side_effect = DockerNotFound("network not found")
    mock_docker.from_env.return_value = mock_client

    cfg = _app_config()
    cfg.docker.network_mode = "missing-net"
    service = DockerSandboxService(config=cfg)

    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={},
        entrypoint=["python"],
    )

    with pytest.raises(HTTPException) as exc:
        await service.create_sandbox(request)

    assert exc.value.status_code == status.HTTP_400_BAD_REQUEST
    assert exc.value.detail["code"] == SandboxErrorCodes.INVALID_PARAMETER
    assert "missing-net" in exc.value.detail["message"]
    assert "docker network create" in exc.value.detail["message"]


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_create_sandbox_user_defined_network_uses_correct_network_mode(mock_docker):
    """Containers created on a user-defined network use the network name as network_mode."""

    def host_cfg_side_effect(**kwargs):
        return kwargs

    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_client.networks.get.return_value = MagicMock()  # network exists
    mock_client.api.create_host_config.side_effect = host_cfg_side_effect
    mock_client.api.create_container.return_value = {"Id": "main-id"}
    mock_client.containers.get.return_value = MagicMock(id="main-id")
    mock_docker.from_env.return_value = mock_client

    cfg = _app_config()
    cfg.docker.network_mode = "my-app-net"
    service = DockerSandboxService(config=cfg)

    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={},
        entrypoint=["python"],
    )

    with (
        patch.object(service, "_ensure_image_available"),
        patch.object(service, "_prepare_sandbox_runtime"),
    ):
        await service.create_sandbox(request)

    call_kwargs = mock_client.api.create_container.call_args.kwargs
    assert call_kwargs["host_config"]["network_mode"] == "my-app-net"


@patch("src.services.docker.docker")
def test_validate_network_skipped_for_builtin_modes(mock_docker):
    """_validate_network_exists does NOT call the Docker API for host or bridge modes."""
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_docker.from_env.return_value = mock_client

    for mode in ("host", "bridge", "none"):
        mock_client.networks.get.reset_mock()
        cfg = _app_config()
        cfg.docker.network_mode = mode
        service = DockerSandboxService(config=cfg)
        service._validate_network_exists()
        mock_client.networks.get.assert_not_called()


@patch("src.services.docker.docker")
def test_egress_sidecar_cleanup_uses_api_remove_when_lookup_fails(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []

    def host_cfg_side_effect(**kwargs):
        return kwargs

    mock_client.api.create_host_config.side_effect = host_cfg_side_effect
    mock_client.api.create_container.return_value = {"Id": "sidecar-id"}
    mock_client.containers.get.side_effect = DockerException("lookup failed")
    mock_docker.from_env.return_value = mock_client

    cfg = _app_config()
    cfg.docker.network_mode = "bridge"
    cfg.egress = EgressConfig(image="egress:latest")
    service = DockerSandboxService(config=cfg)

    with (
        patch.object(service, "_ensure_image_available"),
        patch.object(service, "_docker_operation") as mock_op,
    ):
        mock_op.return_value.__enter__.return_value = None
        mock_op.return_value.__exit__.return_value = None

        with pytest.raises(HTTPException) as exc:
            service._start_egress_sidecar(
                "sandbox-id",
                NetworkPolicy(defaultAction="deny", egress=[]),
                egress_token="egress-token",
                host_execd_port=44772,
                host_http_port=8080,
            )

    detail = exc.value.detail
    assert isinstance(detail, dict)
    typed_detail = cast(dict[str, Any], detail)
    assert exc.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
    assert typed_detail["message"] == "Egress sidecar container failed to start."
    mock_client.api.remove_container.assert_called_once_with("sidecar-id", force=True)


@patch("src.services.docker.docker")
def test_egress_sidecar_missing_id_preserves_specific_error(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []

    def host_cfg_side_effect(**kwargs):
        return kwargs

    mock_client.api.create_host_config.side_effect = host_cfg_side_effect
    mock_client.api.create_container.return_value = {}
    mock_docker.from_env.return_value = mock_client

    cfg = _app_config()
    cfg.docker.network_mode = "bridge"
    cfg.egress = EgressConfig(image="egress:latest")
    service = DockerSandboxService(config=cfg)

    with (
        patch.object(service, "_ensure_image_available"),
        patch.object(service, "_docker_operation") as mock_op,
    ):
        mock_op.return_value.__enter__.return_value = None
        mock_op.return_value.__exit__.return_value = None

        with pytest.raises(HTTPException) as exc:
            service._start_egress_sidecar(
                "sandbox-id",
                NetworkPolicy(defaultAction="deny", egress=[]),
                egress_token="egress-token",
                host_execd_port=44772,
                host_http_port=8080,
            )

    detail = exc.value.detail
    assert isinstance(detail, dict)
    typed_detail = cast(dict[str, Any], detail)
    assert exc.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
    assert typed_detail["message"] == "Docker did not return an egress sidecar container ID."
    mock_client.containers.get.assert_not_called()
    mock_client.api.remove_container.assert_not_called()


@patch("src.services.docker.docker")
def test_egress_sidecar_cleanup_wraps_unexpected_lookup_error(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []

    def host_cfg_side_effect(**kwargs):
        return kwargs

    mock_client.api.create_host_config.side_effect = host_cfg_side_effect
    mock_client.api.create_container.return_value = {"Id": "sidecar-id"}
    mock_client.containers.get.side_effect = RuntimeError("lookup failed")
    mock_docker.from_env.return_value = mock_client

    cfg = _app_config()
    cfg.docker.network_mode = "bridge"
    cfg.egress = EgressConfig(image="egress:latest")
    service = DockerSandboxService(config=cfg)

    with (
        patch.object(service, "_ensure_image_available"),
        patch.object(service, "_docker_operation") as mock_op,
    ):
        mock_op.return_value.__enter__.return_value = None
        mock_op.return_value.__exit__.return_value = None

        with pytest.raises(HTTPException) as exc:
            service._start_egress_sidecar(
                "sandbox-id",
                NetworkPolicy(defaultAction="deny", egress=[]),
                egress_token="egress-token",
                host_execd_port=44772,
                host_http_port=8080,
            )

    detail = exc.value.detail
    assert isinstance(detail, dict)
    typed_detail = cast(dict[str, Any], detail)
    assert exc.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
    assert typed_detail["code"] == SandboxErrorCodes.CONTAINER_START_FAILED
    assert typed_detail["message"] == "Egress sidecar container failed to start."
    mock_client.api.remove_container.assert_called_once_with("sidecar-id", force=True)


def test_expire_cleans_sidecar():
    service = DockerSandboxService(config=_app_config())
    mock_container = MagicMock()
    mock_container.attrs = {"State": {"Running": False}, "Config": {"Labels": {}}}
    mock_container.kill = MagicMock()
    mock_container.remove = MagicMock()

    with (
        patch.object(service, "_get_container_by_sandbox_id", return_value=mock_container),
        patch.object(service, "_remove_expiration_tracking") as mock_remove,
        patch.object(service, "_cleanup_egress_sidecar") as mock_cleanup,
        patch.object(service, "_docker_operation") as mock_op,
    ):
        mock_op.return_value.__enter__.return_value = None
        mock_op.return_value.__exit__.return_value = None
        service._expire_sandbox("sandbox-id")

    mock_cleanup.assert_called_once_with("sandbox-id")
    mock_remove.assert_called_once()


def test_restore_cleans_orphan_sidecar():
    cfg = _app_config()
    service = DockerSandboxService(config=cfg)

    orphan_sidecar = MagicMock()
    orphan_sidecar.attrs = {
        "Config": {"Labels": {"opensandbox.io/egress-sidecar-for": "orphan-id"}}
    }

    with (
        patch.object(service.docker_client.containers, "list", return_value=[orphan_sidecar]),
        patch.object(service, "_get_container_by_sandbox_id") as mock_get,
        patch.object(service, "_cleanup_egress_sidecar") as mock_cleanup,
    ):
        mock_get.side_effect = HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail={})
        service._restore_existing_sandboxes()

    mock_cleanup.assert_called_once_with("orphan-id")


def test_prepare_creation_context_allows_manual_cleanup():
    service = DockerSandboxService(config=_app_config())
    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={},
        entrypoint=["python"],
    )

    _, _, expires_at = service._prepare_creation_context(request)

    assert expires_at is None


def test_build_labels_marks_manual_cleanup_without_expiration():
    service = DockerSandboxService(config=_app_config())
    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={"team": "manual"},
        entrypoint=["python"],
    )

    labels, _ = service._build_labels_and_env("sandbox-manual", request, None)

    assert labels[SANDBOX_ID_LABEL] == "sandbox-manual"
    assert labels[SANDBOX_MANUAL_CLEANUP_LABEL] == "true"
    assert "opensandbox.io/expires-at" not in labels


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_create_sandbox_with_manual_cleanup_completes_full_create_path(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_docker.from_env.return_value = mock_client

    service = DockerSandboxService(config=_app_config())
    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        resourceLimits=ResourceLimits(root={}),
        env={"DEBUG": "1"},
        metadata={"team": "manual"},
        entrypoint=["python"],
    )

    with (
        patch.object(service, "_create_and_start_container") as mock_create,
        patch.object(service, "_schedule_expiration") as mock_schedule,
    ):
        response = await service.create_sandbox(request)

    assert response.expires_at is None
    assert response.metadata == {"team": "manual"}
    assert response.entrypoint == ["python"]
    mock_create.assert_called_once()
    mock_schedule.assert_not_called()


def test_restore_existing_sandboxes_ignores_manual_cleanup_without_warning():
    service = DockerSandboxService(config=_app_config())
    manual_container = MagicMock()
    manual_container.attrs = {
        "Config": {
            "Labels": {
                SANDBOX_ID_LABEL: "manual-id",
                SANDBOX_MANUAL_CLEANUP_LABEL: "true",
            }
        }
    }

    with (
        patch.object(service.docker_client.containers, "list", return_value=[manual_container]),
        patch("src.services.docker.logger.warning") as mock_warning,
        patch.object(service, "_schedule_expiration") as mock_schedule,
    ):
        service._restore_existing_sandboxes()

    mock_schedule.assert_not_called()
    mock_warning.assert_not_called()


def test_renew_expiration_rejects_manual_cleanup_sandbox():
    service = DockerSandboxService(config=_app_config())
    container = MagicMock()
    container.attrs = {
        "Config": {
            "Labels": {
                SANDBOX_ID_LABEL: "manual-id",
                SANDBOX_MANUAL_CLEANUP_LABEL: "true",
            }
        }
    }
    request = MagicMock(expires_at=datetime.now(timezone.utc) + timedelta(hours=1))

    with patch.object(service, "_get_container_by_sandbox_id", return_value=container):
        with pytest.raises(HTTPException) as exc_info:
            service.renew_expiration("manual-id", request)

    assert exc_info.value.status_code == status.HTTP_409_CONFLICT
    assert exc_info.value.detail["message"] == "Sandbox manual-id does not have automatic expiration enabled."


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_create_sandbox_async_returns_provisioning(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_docker.from_env.return_value = mock_client

    service = DockerSandboxService(config=_app_config())

    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={"team": "async"},
        entrypoint=["python", "app.py"],
    )

    with patch.object(service, "create_sandbox", new_callable=AsyncMock) as mock_sync:
        mock_sync.return_value = CreateSandboxResponse(
            id="sandbox-sync",
            status=SandboxStatus(
                state="Running",
                reason="CONTAINER_RUNNING",
                message="started",
                last_transition_at=datetime.now(timezone.utc),
            ),
            metadata={"team": "async"},
            expiresAt=datetime.now(timezone.utc),
            createdAt=datetime.now(timezone.utc),
            entrypoint=["python", "app.py"],
        )
        response = await service.create_sandbox(request)

    assert response.status.state == "Running"
    assert response.metadata == {"team": "async"}
    mock_sync.assert_called_once()


@pytest.mark.asyncio
@patch("src.services.docker.docker")
async def test_get_sandbox_returns_pending_state(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_docker.from_env.return_value = mock_client

    service = DockerSandboxService(config=_app_config())

    request = CreateSandboxRequest(
        image=ImageSpec(uri="python:3.11"),
        timeout=120,
        resourceLimits=ResourceLimits(root={}),
        env={},
        metadata={},
        entrypoint=["python", "app.py"],
    )

    with patch.object(service, "create_sandbox", new_callable=AsyncMock) as mock_sync:
        mock_sync.return_value = CreateSandboxResponse(
            id="sandbox-sync",
            status=SandboxStatus(
                state="Running",
                reason="CONTAINER_RUNNING",
                message="started",
                last_transition_at=datetime.now(timezone.utc),
            ),
            metadata={},
            expiresAt=datetime.now(timezone.utc),
            createdAt=datetime.now(timezone.utc),
            entrypoint=["python", "app.py"],
        )
        response = await service.create_sandbox(request)

    assert response.status.state == "Running"
    assert response.entrypoint == ["python", "app.py"]


@patch("src.services.docker.docker")
def test_list_sandboxes_deduplicates_container_and_pending(mock_docker):
    # Build a realistic container mock to avoid parse_timestamp errors.
    container = MagicMock()
    container.attrs = {
        "Config": {"Labels": {SANDBOX_ID_LABEL: "sandbox-123"}},
        "Created": "2025-01-01T00:00:00Z",
        "State": {
            "Status": "running",
            "Running": True,
            "FinishedAt": "0001-01-01T00:00:00Z",
            "ExitCode": 0,
        },
    }
    container.image = MagicMock(tags=["image:latest"], short_id="sha-image")

    mock_client = MagicMock()
    mock_client.containers.list.return_value = [container]
    mock_docker.from_env.return_value = mock_client

    service = DockerSandboxService(config=_app_config())
    sandbox_id = "sandbox-123"

    # Prepare container and pending representations
    container_sandbox = Sandbox(
        id=sandbox_id,
        image=ImageSpec(uri="image:latest"),
        status=SandboxStatus(
            state="Running",
            reason="CONTAINER_RUNNING",
            message="running",
            last_transition_at=datetime.now(timezone.utc),
        ),
        metadata={"team": "c"},
        entrypoint=["/bin/sh"],
        expiresAt=datetime.now(timezone.utc),
        createdAt=datetime.now(timezone.utc),
    )
    # Force container state to be returned
    service._container_to_sandbox = MagicMock(return_value=container_sandbox)

    response = service.list_sandboxes(ListSandboxesRequest(filter=SandboxFilter(), pagination=None))

    assert len(response.items) == 1
    assert response.items[0].status.state == "Running"
    assert response.items[0].metadata == {"team": "c"}


@patch("src.services.docker.docker")
def test_get_sandbox_prefers_container_over_pending(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_docker.from_env.return_value = mock_client

    service = DockerSandboxService(config=_app_config())
    sandbox_id = "sandbox-abc"

    pending_status = SandboxStatus(
        state="Pending",
        reason="SANDBOX_SCHEDULED",
        message="pending",
        last_transition_at=datetime.now(timezone.utc),
    )
    service._pending_sandboxes[sandbox_id] = PendingSandbox(
        request=MagicMock(metadata={}, entrypoint=["/bin/sh"], image=ImageSpec(uri="image:latest")),
        created_at=datetime.now(timezone.utc),
        expires_at=datetime.now(timezone.utc),
        status=pending_status,
    )

    container_sandbox = Sandbox(
        id=sandbox_id,
        image=ImageSpec(uri="image:latest"),
        status=SandboxStatus(
            state="Running",
            reason="CONTAINER_RUNNING",
            message="running",
            last_transition_at=datetime.now(timezone.utc),
        ),
        metadata={},
        entrypoint=["/bin/sh"],
        expiresAt=datetime.now(timezone.utc),
        createdAt=datetime.now(timezone.utc),
    )

    service._get_container_by_sandbox_id = MagicMock(return_value=MagicMock())
    service._container_to_sandbox = MagicMock(return_value=container_sandbox)

    sandbox = service.get_sandbox(sandbox_id)
    assert sandbox.status.state == "Running"
    assert sandbox.entrypoint == ["/bin/sh"]


@patch("src.services.docker.docker")
def test_async_worker_cleans_up_leftover_container_on_failure(mock_docker):
    mock_client = MagicMock()
    mock_client.containers.list.return_value = []
    mock_docker.from_env.return_value = mock_client

    service = DockerSandboxService(config=_app_config())
    sandbox_id = "sandbox-fail"
    created_at = datetime.now(timezone.utc)
    expires_at = created_at

    pending_status = SandboxStatus(
        state="Pending",
        reason="SANDBOX_SCHEDULED",
        message="pending",
        last_transition_at=created_at,
    )
    service._pending_sandboxes[sandbox_id] = PendingSandbox(
        request=MagicMock(metadata={}, entrypoint=["/bin/sh"], image=ImageSpec(uri="image:latest")),
        created_at=created_at,
        expires_at=expires_at,
        status=pending_status,
    )

    service._provision_sandbox = MagicMock(
        side_effect=HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail={"message": "boom"},
        )
    )
    service._cleanup_failed_containers = MagicMock()

    service._async_provision_worker(
        sandbox_id,
        MagicMock(),
        created_at,
        expires_at,
    )

    service._cleanup_failed_containers.assert_called_once_with(sandbox_id)
    assert service._pending_sandboxes[sandbox_id].status.state == "Failed"


# ============================================================================
# Volume Support Tests
# ============================================================================


@patch("src.services.docker.docker")
class TestBuildVolumeBinds:
    """Tests for DockerSandboxService._build_volume_binds instance method."""

    def test_none_volumes_returns_empty(self, mock_docker):
        """None volumes should produce empty binds list."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        assert service._build_volume_binds(None) == []

    def test_empty_volumes_returns_empty(self, mock_docker):
        """Empty volumes list should produce empty binds list."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        assert service._build_volume_binds([]) == []

    def test_single_host_volume_rw(self, mock_docker):
        """Single host volume with read-write should produce correct bind string."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        volume = Volume(
            name="workdir",
            host=Host(path="/data/opensandbox/user-a"),
            mount_path="/mnt/work",
            read_only=False,
        )
        binds = service._build_volume_binds([volume])
        assert binds == ["/data/opensandbox/user-a:/mnt/work:rw"]

    def test_single_host_volume_ro(self, mock_docker):
        """Single host volume with read-only should produce correct bind string."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        volume = Volume(
            name="workdir",
            host=Host(path="/data/opensandbox/user-a"),
            mount_path="/mnt/work",
            read_only=True,
        )
        binds = service._build_volume_binds([volume])
        assert binds == ["/data/opensandbox/user-a:/mnt/work:ro"]

    def test_host_volume_with_subpath(self, mock_docker):
        """Host volume with subPath should resolve the full host path."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        volume = Volume(
            name="workdir",
            host=Host(path="/data/opensandbox/user-a"),
            mount_path="/mnt/work",
            read_only=False,
            sub_path="task-001",
        )
        binds = service._build_volume_binds([volume])
        expected_host = os.path.normpath("/data/opensandbox/user-a/task-001")
        assert binds == [f"{expected_host}:/mnt/work:rw"]

    def test_multiple_host_volumes(self, mock_docker):
        """Multiple host volumes should produce multiple bind strings."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        volumes = [
            Volume(
                name="workdir",
                host=Host(path="/data/work"),
                mount_path="/mnt/work",
                read_only=False,
            ),
            Volume(
                name="data",
                host=Host(path="/data/shared"),
                mount_path="/mnt/data",
                read_only=True,
            ),
        ]
        binds = service._build_volume_binds(volumes)
        assert len(binds) == 2
        assert "/data/work:/mnt/work:rw" in binds
        assert "/data/shared:/mnt/data:ro" in binds

    def test_single_pvc_volume_rw(self, mock_docker):
        """Single PVC volume with read-write (no subPath) should produce named volume bind string."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        volume = Volume(
            name="shared-data",
            pvc=PVC(claim_name="my-shared-volume"),
            mount_path="/mnt/data",
            read_only=False,
        )
        binds = service._build_volume_binds([volume])
        assert binds == ["my-shared-volume:/mnt/data:rw"]

    def test_single_pvc_volume_ro(self, mock_docker):
        """Single PVC volume with read-only (no subPath) should produce named volume bind string."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        volume = Volume(
            name="models",
            pvc=PVC(claim_name="shared-models-pvc"),
            mount_path="/mnt/models",
            read_only=True,
        )
        binds = service._build_volume_binds([volume])
        assert binds == ["shared-models-pvc:/mnt/models:ro"]

    def test_pvc_volume_with_subpath(self, mock_docker):
        """PVC volume with subPath should resolve via cached Mountpoint and produce bind mount."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        volume = Volume(
            name="datasets",
            pvc=PVC(claim_name="my-vol"),
            mount_path="/mnt/train",
            read_only=False,
            sub_path="datasets/train",
        )
        cache = {
            "my-vol": {
                "Name": "my-vol",
                "Driver": "local",
                "Mountpoint": "/var/lib/docker/volumes/my-vol/_data",
            }
        }
        binds = service._build_volume_binds([volume], pvc_inspect_cache=cache)
        assert binds == ["/var/lib/docker/volumes/my-vol/_data/datasets/train:/mnt/train:rw"]

    def test_pvc_volume_with_subpath_readonly(self, mock_docker):
        """PVC volume with subPath and readOnly should produce ':ro' bind mount."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        volume = Volume(
            name="datasets",
            pvc=PVC(claim_name="my-vol"),
            mount_path="/mnt/eval",
            read_only=True,
            sub_path="datasets/eval",
        )
        cache = {
            "my-vol": {
                "Name": "my-vol",
                "Driver": "local",
                "Mountpoint": "/var/lib/docker/volumes/my-vol/_data",
            }
        }
        binds = service._build_volume_binds([volume], pvc_inspect_cache=cache)
        assert binds == ["/var/lib/docker/volumes/my-vol/_data/datasets/eval:/mnt/eval:ro"]

    def test_mixed_host_and_pvc_volumes(self, mock_docker):
        """Mixed host and PVC volumes should both produce bind strings."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        volumes = [
            Volume(
                name="workdir",
                host=Host(path="/data/work"),
                mount_path="/mnt/work",
                read_only=False,
            ),
            Volume(
                name="shared-data",
                pvc=PVC(claim_name="my-shared-volume"),
                mount_path="/mnt/data",
                read_only=True,
            ),
        ]
        binds = service._build_volume_binds(volumes)
        assert len(binds) == 2
        assert "/data/work:/mnt/work:rw" in binds
        assert "my-shared-volume:/mnt/data:ro" in binds

    def test_ossfs_volume_with_subpath(self, mock_docker):
        """OSSFS volume should resolve host path using subPath as OSS prefix."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        volume = Volume(
            name="oss-data",
            ossfs=OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
            ),
            mount_path="/mnt/data",
            read_only=False,
            sub_path="task-001",
        )
        binds = service._build_volume_binds([volume])
        assert binds == ["/mnt/ossfs/bucket-test-3/task-001:/mnt/data:rw"]


@patch("src.services.docker.docker")
class TestDockerVolumeValidation:
    """Tests for volume validation in DockerSandboxService.create_sandbox."""

    @pytest.mark.asyncio
    async def test_pvc_volume_not_found_rejected(self, mock_docker):
        """PVC backend with non-existent Docker named volume should be rejected."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.inspect_volume.side_effect = DockerNotFound("volume not found")
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
            volumes=[
                Volume(
                    name="models",
                    pvc=PVC(claim_name="nonexistent-volume"),
                    mount_path="/mnt/models",
                    read_only=True,
                )
            ],
        )

        with pytest.raises(HTTPException) as exc_info:
            await service.create_sandbox(request)

        assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
        assert exc_info.value.detail["code"] == SandboxErrorCodes.PVC_VOLUME_NOT_FOUND

    def test_ossfs_inline_credentials_missing_rejected(self, mock_docker):
        """OSSFS with missing inline credentials should be rejected at schema validation."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_docker.from_env.return_value = mock_client
        with pytest.raises(ValidationError):
            OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                access_key_id=None,
                access_key_secret=None,
            )

    @pytest.mark.asyncio
    async def test_ossfs_mount_failure_rejected(self, mock_docker):
        """OSSFS mount failure should be rejected."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_docker.from_env.return_value = mock_client
        service = DockerSandboxService(config=_app_config())

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
            volumes=[
                Volume(
                    name="oss-data",
                    ossfs=OSSFS(
                        bucket="bucket-test-3",
                        endpoint="oss-cn-hangzhou.aliyuncs.com",
                        access_key_id="AKIDEXAMPLE",
                        access_key_secret="SECRETEXAMPLE",
                    ),
                    mount_path="/mnt/data",
                    sub_path="task-001",
                )
            ],
        )

        with patch("src.services.ossfs_mixin.os.name", "posix"):
            with patch("src.services.ossfs_mixin.os.path.ismount", return_value=False):
                with patch("src.services.ossfs_mixin.os.makedirs"):
                    with patch("src.services.ossfs_mixin.subprocess.run") as mock_run:
                        mock_run.return_value = MagicMock(returncode=1, stderr="mount failed")
                        with pytest.raises(HTTPException) as exc_info:
                            await service.create_sandbox(request)

        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
        assert exc_info.value.detail["code"] == SandboxErrorCodes.OSSFS_MOUNT_FAILED

    def test_ossfs_windows_host_not_supported(self, mock_docker):
        """OSSFS backend should be rejected when server host is Windows."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_docker.from_env.return_value = mock_client
        service = DockerSandboxService(config=_app_config())
        volume = Volume(
            name="oss-data",
            ossfs=OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
            ),
            mount_path="/mnt/data",
        )

        with patch("src.services.ossfs_mixin.os.name", "nt"):
            with pytest.raises(HTTPException) as exc_info:
                service._validate_ossfs_volume(volume)
        assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_PARAMETER

    def test_ossfs_v1_mount_command_uses_o_options(self, mock_docker):
        """OSSFS 1.0 should build mount command with -o style options."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_docker.from_env.return_value = mock_client
        service = DockerSandboxService(config=_app_config())
        volume = Volume(
            name="oss-data",
            ossfs=OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                version="1.0",
                options=["allow_other", "umask=0022"],
                access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
            ),
            mount_path="/mnt/data",
            sub_path="task-001",
        )
        backend_path = "/mnt/ossfs/bucket-test-3/task-001"

        with patch("src.services.ossfs_mixin.os.makedirs"):
            with patch("src.services.ossfs_mixin.subprocess.run") as mock_run:
                mock_run.return_value = MagicMock(returncode=0, stderr="")
                service._mount_ossfs_backend_path(volume, backend_path)

        cmd = mock_run.call_args.args[0]
        assert "bucket-test-3:/task-001" in cmd
        assert "-o" in cmd
        assert "allow_other" in cmd
        assert "umask=0022" in cmd
        assert "--allow_other" not in cmd
        assert "sigv4" not in cmd
        assert not any(str(part).startswith("region=") for part in cmd)

    def test_ossfs_v2_mount_command_uses_config_file(self, mock_docker):
        """OSSFS 2.0 should mount by ossfs2 config file."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_docker.from_env.return_value = mock_client
        service = DockerSandboxService(config=_app_config())
        volume = Volume(
            name="oss-data",
            ossfs=OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                version="2.0",
                options=["allow_other", "umask=0022"],
                access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
            ),
            mount_path="/mnt/data",
            sub_path="task-001",
        )
        backend_path = "/mnt/ossfs/bucket-test-3/task-001"

        with patch("src.services.ossfs_mixin.os.makedirs"):
            with patch("src.services.ossfs_mixin.subprocess.run") as mock_run:
                mock_run.return_value = MagicMock(returncode=0, stderr="")
                service._mount_ossfs_backend_path(volume, backend_path)

        cmd = mock_run.call_args.args[0]
        assert cmd[0] == "ossfs2"
        assert cmd[1] == "mount"
        assert cmd[2] == backend_path
        assert cmd[3] == "-c"
        assert cmd[4].endswith(".conf")

    def test_ossfs_v2_config_contains_required_lines(self, mock_docker):
        """OSSFS 2.0 config should encode endpoint/bucket/creds/options/prefix."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_docker.from_env.return_value = mock_client
        service = DockerSandboxService(config=_app_config())
        volume = Volume(
            name="oss-data",
            ossfs=OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                version="2.0",
                options=["allow_other", "umask=0022"],
                access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
            ),
            mount_path="/mnt/data",
            sub_path="task-001",
        )

        conf_lines = service._build_ossfs_v2_config_lines(
            volume=volume,
            endpoint_url="http://oss-cn-hangzhou.aliyuncs.com",
            prefix="task-001",
        )
        assert "--oss_endpoint=http://oss-cn-hangzhou.aliyuncs.com" in conf_lines
        assert "--oss_bucket=bucket-test-3" in conf_lines
        assert "--oss_access_key_id=AKIDEXAMPLE" in conf_lines
        assert "--oss_access_key_secret=SECRETEXAMPLE" in conf_lines
        assert "--oss_bucket_prefix=task-001/" in conf_lines
        assert "--allow_other" in conf_lines
        assert "--umask=0022" in conf_lines

    @pytest.mark.asyncio
    async def test_ossfs_volume_binds_passed_to_docker(self, mock_docker):
        """OSSFS volume should be converted to host bind path and passed to Docker."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.create_host_config.return_value = {}
        mock_client.api.create_container.return_value = {"Id": "cid"}
        mock_client.containers.get.return_value = MagicMock()
        mock_docker.from_env.return_value = mock_client
        service = DockerSandboxService(config=_app_config())

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
            volumes=[
                Volume(
                    name="oss-data",
                    ossfs=OSSFS(
                        bucket="bucket-test-3",
                        endpoint="oss-cn-hangzhou.aliyuncs.com",
                        access_key_id="AKIDEXAMPLE",
                        access_key_secret="SECRETEXAMPLE",
                    ),
                    mount_path="/mnt/data",
                    read_only=True,
                    sub_path="task-001",
                )
            ],
        )

        with patch("src.services.ossfs_mixin.os.name", "posix"):
            with patch("src.services.ossfs_mixin.os.path.ismount", return_value=False):
                with patch("src.services.ossfs_mixin.os.makedirs"):
                    with patch("src.services.ossfs_mixin.subprocess.run") as mock_run:
                        mock_run.return_value = MagicMock(returncode=0, stderr="")
                        with patch.object(service, "_ensure_image_available"), patch.object(
                            service, "_prepare_sandbox_runtime"
                        ):
                            response = await service.create_sandbox(request)

        assert response.status.state == "Running"
        assert mock_run.called
        host_config_call = mock_client.api.create_host_config.call_args
        binds = host_config_call.kwargs["binds"]
        assert binds[0] == "/mnt/ossfs/bucket-test-3/task-001:/mnt/data:ro"
        create_call = mock_client.api.create_container.call_args
        labels = create_call.kwargs["labels"]
        assert SANDBOX_OSSFS_MOUNTS_LABEL in labels
        assert labels[SANDBOX_OSSFS_MOUNTS_LABEL] == '["/mnt/ossfs/bucket-test-3/task-001"]'

    def test_prepare_ossfs_mounts_reuses_mount_key(self, mock_docker):
        """Two OSSFS volumes on same base path should mount once and share refs."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        volumes = [
            Volume(
                name="oss-data-a",
                ossfs=OSSFS(
                    bucket="bucket-test-3",
                    endpoint="oss-cn-hangzhou.aliyuncs.com",
                    access_key_id="AKIDEXAMPLE",
                    access_key_secret="SECRETEXAMPLE",
                ),
                mount_path="/mnt/data-a",
                sub_path="task-001",
            ),
            Volume(
                name="oss-data-b",
                ossfs=OSSFS(
                    bucket="bucket-test-3",
                    endpoint="oss-cn-hangzhou.aliyuncs.com",
                    access_key_id="AKIDEXAMPLE",
                    access_key_secret="SECRETEXAMPLE",
                ),
                mount_path="/mnt/data-b",
                sub_path="task-001",
            ),
        ]

        with patch("src.services.ossfs_mixin.os.path.ismount", return_value=False):
            with patch("src.services.ossfs_mixin.os.makedirs"):
                with patch("src.services.ossfs_mixin.subprocess.run") as mock_run:
                    mock_run.return_value = MagicMock(returncode=0, stderr="")
                    mount_keys = service._prepare_ossfs_mounts(volumes)

        mount_key = "/mnt/ossfs/bucket-test-3/task-001"
        assert mount_keys == [mount_key]
        assert service._ossfs_mount_ref_counts[mount_key] == 1
        assert mock_run.call_count == 1

    def test_prepare_ossfs_mounts_rolls_back_on_partial_failure(self, mock_docker):
        """If one OSSFS mount fails, already prepared mounts should be rolled back."""
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())
        volumes = [
            Volume(
                name="oss-data-a",
                ossfs=OSSFS(
                    bucket="bucket-a",
                    endpoint="oss-cn-hangzhou.aliyuncs.com",
                    access_key_id="AKIDEXAMPLE",
                    access_key_secret="SECRETEXAMPLE",
                ),
                mount_path="/mnt/data-a",
            ),
            Volume(
                name="oss-data-b",
                ossfs=OSSFS(
                    bucket="bucket-b",
                    endpoint="oss-cn-hangzhou.aliyuncs.com",
                    access_key_id="AKIDEXAMPLE",
                    access_key_secret="SECRETEXAMPLE",
                ),
                mount_path="/mnt/data-b",
            ),
        ]

        mount_key_a = "/mnt/ossfs/bucket-a"
        mount_key_b = "/mnt/ossfs/bucket-b"

        with patch.object(
            service,
            "_ensure_ossfs_mounted",
            side_effect=[mount_key_a, HTTPException(status_code=500, detail={"code": "E", "message": "boom"})],
        ) as ensure_mock:
            with patch.object(service, "_release_ossfs_mounts") as release_mock:
                with pytest.raises(HTTPException):
                    service._prepare_ossfs_mounts(volumes)

        assert ensure_mock.call_count == 2
        release_mock.assert_called_once_with([mount_key_a])
        assert mount_key_b not in release_mock.call_args.args[0]

    def test_delete_sandbox_releases_ossfs_mount(self, mock_docker):
        """Deleting sandbox should release and unmount tracked OSSFS mount."""
        mount_key = "/mnt/ossfs/bucket-test-3/task-001"
        mock_container = MagicMock()
        mock_container.attrs = {
            "Config": {
                "Labels": {
                    SANDBOX_ID_LABEL: "sandbox-1",
                    SANDBOX_OSSFS_MOUNTS_LABEL: f'["{mount_key}"]',
                }
            },
            "State": {"Running": True},
        }

        mock_client = MagicMock()
        mock_client.containers.list.return_value = [mock_container]
        mock_docker.from_env.return_value = mock_client
        service = DockerSandboxService(config=_app_config())
        service._ossfs_mount_ref_counts[mount_key] = 1

        with patch("src.services.ossfs_mixin.os.path.ismount", return_value=True):
            with patch("src.services.ossfs_mixin.subprocess.run") as mock_run:
                mock_run.return_value = MagicMock(returncode=0, stderr="")
                service.delete_sandbox("sandbox-1")

        assert mount_key not in service._ossfs_mount_ref_counts
        assert mock_run.called

    def test_release_ossfs_mount_untracked_key_does_not_unmount(self, mock_docker):
        """Untracked mount key must not trigger unmount command."""
        mount_key = "/mnt/ossfs/bucket-test-3/task-001"
        mock_docker.from_env.return_value = MagicMock()
        service = DockerSandboxService(config=_app_config())

        with patch("src.services.ossfs_mixin.os.path.ismount", return_value=True):
            with patch("src.services.ossfs_mixin.subprocess.run") as mock_run:
                service._release_ossfs_mount(mount_key)

        mock_run.assert_not_called()
        assert mount_key not in service._ossfs_mount_ref_counts

    def test_restore_existing_sandboxes_rebuilds_ossfs_refs(self, mock_docker):
        """Service startup rebuilds OSSFS mount refs from container labels."""
        mount_key = "/mnt/ossfs/bucket-test-3/task-001"
        expires_at = (datetime.now(timezone.utc) + timedelta(minutes=10)).isoformat()
        container = MagicMock()
        container.attrs = {
            "Config": {
                "Labels": {
                    SANDBOX_ID_LABEL: "sandbox-1",
                    SANDBOX_EXPIRES_AT_LABEL: expires_at,
                    SANDBOX_OSSFS_MOUNTS_LABEL: f'["{mount_key}"]',
                }
            },
            "State": {"Running": True},
        }
        mock_client = MagicMock()
        mock_client.containers.list.return_value = [container]
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        assert service._ossfs_mount_ref_counts[mount_key] == 1

    def test_delete_one_sandbox_after_restart_keeps_shared_mount(self, mock_docker):
        """After restart, deleting one of two users must not unmount shared OSSFS mount."""
        mount_key = "/mnt/ossfs/bucket-test-3/task-001"
        expires_at = (datetime.now(timezone.utc) + timedelta(minutes=10)).isoformat()
        container_a = MagicMock()
        container_a.attrs = {
            "Config": {
                "Labels": {
                    SANDBOX_ID_LABEL: "sandbox-a",
                    SANDBOX_EXPIRES_AT_LABEL: expires_at,
                    SANDBOX_OSSFS_MOUNTS_LABEL: f'["{mount_key}"]',
                }
            },
            "State": {"Running": True},
        }
        container_b = MagicMock()
        container_b.attrs = {
            "Config": {
                "Labels": {
                    SANDBOX_ID_LABEL: "sandbox-b",
                    SANDBOX_EXPIRES_AT_LABEL: expires_at,
                    SANDBOX_OSSFS_MOUNTS_LABEL: f'["{mount_key}"]',
                }
            },
            "State": {"Running": True},
        }
        mock_client = MagicMock()
        mock_client.containers.list.return_value = [container_a, container_b]
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())
        assert service._ossfs_mount_ref_counts[mount_key] == 2

        with patch("src.services.ossfs_mixin.os.path.ismount", return_value=True):
            with patch("src.services.ossfs_mixin.subprocess.run") as mock_run:
                service.delete_sandbox("sandbox-a")

        assert service._ossfs_mount_ref_counts[mount_key] == 1
        mock_run.assert_not_called()

    def test_restore_manual_cleanup_sandbox_rebuilds_ossfs_refs(self, mock_docker):
        """Manual cleanup sandbox OSSFS refs should be restored on startup."""
        mount_key = "/mnt/ossfs/bucket-manual/data"
        container = MagicMock()
        container.attrs = {
            "Config": {
                "Labels": {
                    SANDBOX_ID_LABEL: "sandbox-manual",
                    SANDBOX_MANUAL_CLEANUP_LABEL: "true",
                    SANDBOX_OSSFS_MOUNTS_LABEL: f'["{mount_key}"]',
                }
            },
            "State": {"Running": True},
        }
        mock_client = MagicMock()
        mock_client.containers.list.return_value = [container]
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        assert service._ossfs_mount_ref_counts.get(mount_key) == 1

    @pytest.mark.asyncio
    async def test_pvc_volume_inspect_failure_returns_500(self, mock_docker):
        """Docker API failure during volume inspection should return 500."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.inspect_volume.side_effect = DockerException("connection error")
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
            volumes=[
                Volume(
                    name="shared-data",
                    pvc=PVC(claim_name="my-volume"),
                    mount_path="/mnt/data",
                )
            ],
        )

        with pytest.raises(HTTPException) as exc_info:
            await service.create_sandbox(request)

        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
        assert exc_info.value.detail["code"] == SandboxErrorCodes.PVC_VOLUME_INSPECT_FAILED

    @pytest.mark.asyncio
    async def test_pvc_volume_binds_passed_to_docker(self, mock_docker):
        """PVC volume binds should be passed to Docker host config as named volume refs."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.inspect_volume.return_value = {"Name": "my-shared-volume"}
        mock_client.api.create_host_config.return_value = {}
        mock_client.api.create_container.return_value = {"Id": "cid"}
        mock_client.containers.get.return_value = MagicMock()
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
            volumes=[
                Volume(
                    name="shared-data",
                    pvc=PVC(claim_name="my-shared-volume"),
                    mount_path="/mnt/data",
                    read_only=False,
                )
            ],
        )

        with (
            patch.object(service, "_ensure_image_available"),
            patch.object(service, "_prepare_sandbox_runtime"),
        ):
            response = await service.create_sandbox(request)

        assert response.status.state == "Running"

        # Verify named volume bind was passed to create_host_config
        host_config_call = mock_client.api.create_host_config.call_args
        assert "binds" in host_config_call.kwargs
        binds = host_config_call.kwargs["binds"]
        assert len(binds) == 1
        assert binds[0] == "my-shared-volume:/mnt/data:rw"

    @pytest.mark.asyncio
    async def test_pvc_volume_readonly_binds_passed_to_docker(self, mock_docker):
        """PVC volume with read-only should produce ':ro' bind string."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.inspect_volume.return_value = {"Name": "shared-models"}
        mock_client.api.create_host_config.return_value = {}
        mock_client.api.create_container.return_value = {"Id": "cid"}
        mock_client.containers.get.return_value = MagicMock()
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
            volumes=[
                Volume(
                    name="models",
                    pvc=PVC(claim_name="shared-models"),
                    mount_path="/mnt/models",
                    read_only=True,
                )
            ],
        )

        with (
            patch.object(service, "_ensure_image_available"),
            patch.object(service, "_prepare_sandbox_runtime"),
        ):
            await service.create_sandbox(request)

        host_config_call = mock_client.api.create_host_config.call_args
        binds = host_config_call.kwargs["binds"]
        assert binds[0] == "shared-models:/mnt/models:ro"

    @pytest.mark.asyncio
    async def test_pvc_subpath_non_local_driver_rejected(self, mock_docker):
        """PVC with subPath on a non-local driver should be rejected."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.inspect_volume.return_value = {
            "Name": "cloud-vol",
            "Driver": "nfs",
            "Mountpoint": "",
        }
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
            volumes=[
                Volume(
                    name="data",
                    pvc=PVC(claim_name="cloud-vol"),
                    mount_path="/mnt/data",
                    sub_path="subdir",
                )
            ],
        )

        with pytest.raises(HTTPException) as exc_info:
            await service.create_sandbox(request)

        assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
        assert exc_info.value.detail["code"] == SandboxErrorCodes.PVC_SUBPATH_UNSUPPORTED_DRIVER

    @pytest.mark.asyncio
    async def test_pvc_subpath_symlink_escape_rejected(self, mock_docker):
        """PVC with subPath that resolves outside mountpoint via symlink should be rejected."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.inspect_volume.return_value = {
            "Name": "my-vol",
            "Driver": "local",
            "Mountpoint": "/var/lib/docker/volumes/my-vol/_data",
        }
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
            volumes=[
                Volume(
                    name="data",
                    pvc=PVC(claim_name="my-vol"),
                    mount_path="/mnt/data",
                    sub_path="datasets",
                )
            ],
        )

        # Simulate: realpath resolves a symlink that escapes the mountpoint.
        # datasets -> / inside the volume, so realpath(…/_data/datasets) = /
        with patch("src.services.docker.os.path.realpath") as mock_realpath:
            mock_realpath.side_effect = lambda p, **kwargs: ("/" if p.endswith("datasets") else p)
            with pytest.raises(HTTPException) as exc_info:
                await service.create_sandbox(request)

        assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_SUB_PATH
        assert "symlink" in exc_info.value.detail["message"]

    @pytest.mark.asyncio
    async def test_pvc_subpath_binds_resolved_to_mountpoint(self, mock_docker):
        """PVC with subPath should resolve Mountpoint+subPath and pass as bind mount."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.inspect_volume.return_value = {
            "Name": "my-vol",
            "Driver": "local",
            "Mountpoint": "/var/lib/docker/volumes/my-vol/_data",
        }
        mock_client.api.create_host_config.return_value = {}
        mock_client.api.create_container.return_value = {"Id": "cid"}
        mock_client.containers.get.return_value = MagicMock()
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
            volumes=[
                Volume(
                    name="train-data",
                    pvc=PVC(claim_name="my-vol"),
                    mount_path="/mnt/train",
                    read_only=True,
                    sub_path="datasets/train",
                )
            ],
        )

        with (
            patch.object(service, "_ensure_image_available"),
            patch.object(service, "_prepare_sandbox_runtime"),
        ):
            await service.create_sandbox(request)

        host_config_call = mock_client.api.create_host_config.call_args
        binds = host_config_call.kwargs["binds"]
        assert len(binds) == 1
        assert binds[0] == "/var/lib/docker/volumes/my-vol/_data/datasets/train:/mnt/train:ro"

    @pytest.mark.asyncio
    async def test_host_path_not_found_rejected(self, mock_docker):
        """Host path create failure should return 500 with HOST_PATH_CREATE_FAILED."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
            volumes=[
                Volume(
                    name="workdir",
                    host=Host(path="/nonexistent/path/that/does/not/exist"),
                    mount_path="/mnt/work",
                    read_only=False,
                )
            ],
        )

        with patch("src.services.docker.os.makedirs", side_effect=PermissionError("denied")):
            with pytest.raises(HTTPException) as exc_info:
                await service.create_sandbox(request)

        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
        assert exc_info.value.detail["code"] == SandboxErrorCodes.HOST_PATH_CREATE_FAILED

    @pytest.mark.asyncio
    async def test_host_path_not_in_allowlist_rejected(self, mock_docker):
        """Host path not in allowlist should be rejected."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_docker.from_env.return_value = mock_client

        cfg = _app_config()
        cfg.storage = StorageConfig(allowed_host_paths=["/data/opensandbox"])
        service = DockerSandboxService(config=cfg)

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
            volumes=[
                Volume(
                    name="workdir",
                    host=Host(path="/etc/passwd"),
                    mount_path="/mnt/work",
                    read_only=False,
                )
            ],
        )

        with pytest.raises(HTTPException) as exc_info:
            await service.create_sandbox(request)

        assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
        assert exc_info.value.detail["code"] == SandboxErrorCodes.HOST_PATH_NOT_ALLOWED

    @pytest.mark.asyncio
    async def test_no_volumes_passes_validation(self, mock_docker):
        """Request without volumes should pass validation."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.create_host_config.return_value = {}
        mock_client.api.create_container.return_value = {"Id": "cid"}
        mock_client.containers.get.return_value = MagicMock()
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
        )

        with (
            patch.object(service, "_ensure_image_available"),
            patch.object(service, "_prepare_sandbox_runtime"),
        ):
            response = await service.create_sandbox(request)

        assert response.status.state == "Running"

    @pytest.mark.asyncio
    async def test_host_volume_binds_passed_to_docker(self, mock_docker):
        """Host volume binds should be passed to Docker host config."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.create_host_config.return_value = {}
        mock_client.api.create_container.return_value = {"Id": "cid"}
        mock_client.containers.get.return_value = MagicMock()
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        import tempfile

        with tempfile.TemporaryDirectory() as tmpdir:
            request = CreateSandboxRequest(
                image=ImageSpec(uri="python:3.11"),
                timeout=120,
                resourceLimits=ResourceLimits(root={}),
                env={},
                metadata={},
                entrypoint=["python"],
                volumes=[
                    Volume(
                        name="workdir",
                        host=Host(path=tmpdir),
                        mount_path="/mnt/work",
                        read_only=False,
                    )
                ],
            )

            with (
                patch.object(service, "_ensure_image_available"),
                patch.object(service, "_prepare_sandbox_runtime"),
            ):
                await service.create_sandbox(request)

            # Verify binds were passed to create_host_config
            host_config_call = mock_client.api.create_host_config.call_args
            assert "binds" in host_config_call.kwargs
            binds = host_config_call.kwargs["binds"]
            assert len(binds) == 1
            assert binds[0] == f"{tmpdir}:/mnt/work:rw"

    @pytest.mark.asyncio
    async def test_host_volume_with_subpath_resolved_correctly(self, mock_docker):
        """Host volume subPath should be resolved and validated."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.create_host_config.return_value = {}
        mock_client.api.create_container.return_value = {"Id": "cid"}
        mock_client.containers.get.return_value = MagicMock()
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        import tempfile

        with tempfile.TemporaryDirectory() as tmpdir:
            # Create the subPath directory
            sub_dir = os.path.join(tmpdir, "task-001")
            os.makedirs(sub_dir)

            request = CreateSandboxRequest(
                image=ImageSpec(uri="python:3.11"),
                timeout=120,
                resourceLimits=ResourceLimits(root={}),
                env={},
                metadata={},
                entrypoint=["python"],
                volumes=[
                    Volume(
                        name="workdir",
                        host=Host(path=tmpdir),
                        mount_path="/mnt/work",
                        read_only=True,
                        sub_path="task-001",
                    )
                ],
            )

            with (
                patch.object(service, "_ensure_image_available"),
                patch.object(service, "_prepare_sandbox_runtime"),
            ):
                await service.create_sandbox(request)

            host_config_call = mock_client.api.create_host_config.call_args
            binds = host_config_call.kwargs["binds"]
            assert len(binds) == 1
            assert binds[0] == f"{sub_dir}:/mnt/work:ro"

    @pytest.mark.asyncio
    async def test_host_subpath_auto_created(self, mock_docker):
        """Host volume with non-existent subPath should be auto-created."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.create_host_config.return_value = {}
        mock_client.api.create_container.return_value = {"Id": "cid"}
        mock_client.containers.get.return_value = MagicMock()
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        import tempfile

        with tempfile.TemporaryDirectory() as tmpdir:
            sub = "auto-created-sub"
            request = CreateSandboxRequest(
                image=ImageSpec(uri="python:3.11"),
                timeout=120,
                resourceLimits=ResourceLimits(root={}),
                env={},
                metadata={},
                entrypoint=["python"],
                volumes=[
                    Volume(
                        name="workdir",
                        host=Host(path=tmpdir),
                        mount_path="/mnt/work",
                        read_only=False,
                        sub_path=sub,
                    )
                ],
            )

            import os

            resolved = os.path.join(tmpdir, sub)
            assert not os.path.exists(resolved)

            # create_sandbox will proceed past volume validation (subpath
            # auto-created) but will fail later during container provisioning
            # (mock doesn't cover the full flow).  We only care that the
            # directory was created — NOT that it raised HOST_PATH_CREATE_FAILED.
            try:
                await service.create_sandbox(request)
            except HTTPException as e:
                # If it's our own create-failed error, the auto-create didn't
                # work — let the test fail explicitly.
                if e.detail.get("code") == SandboxErrorCodes.HOST_PATH_CREATE_FAILED:
                    raise
            except Exception:
                pass  # other provisioning errors are expected

            assert os.path.isdir(resolved)

    @pytest.mark.asyncio
    async def test_empty_allowlist_permits_any_host_path(self, mock_docker):
        """Empty allowed_host_paths (default) should permit any valid host path."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.create_host_config.return_value = {}
        mock_client.api.create_container.return_value = {"Id": "cid"}
        mock_client.containers.get.return_value = MagicMock()
        mock_docker.from_env.return_value = mock_client

        # Default config has storage.allowed_host_paths = []
        cfg = _app_config()
        assert cfg.storage.allowed_host_paths == []
        service = DockerSandboxService(config=cfg)

        import tempfile

        with tempfile.TemporaryDirectory() as tmpdir:
            request = CreateSandboxRequest(
                image=ImageSpec(uri="python:3.11"),
                timeout=120,
                resourceLimits=ResourceLimits(root={}),
                env={},
                metadata={},
                entrypoint=["python"],
                volumes=[
                    Volume(
                        name="workdir",
                        host=Host(path=tmpdir),
                        mount_path="/mnt/work",
                        read_only=False,
                    )
                ],
            )

            with (
                patch.object(service, "_ensure_image_available"),
                patch.object(service, "_prepare_sandbox_runtime"),
            ):
                response = await service.create_sandbox(request)

            assert response.status.state == "Running"

    @pytest.mark.asyncio
    async def test_no_volumes_omits_binds_from_host_config(self, mock_docker):
        """When no volumes are specified, 'binds' should not appear in Docker host config."""
        mock_client = MagicMock()
        mock_client.containers.list.return_value = []
        mock_client.api.create_host_config.return_value = {}
        mock_client.api.create_container.return_value = {"Id": "cid"}
        mock_client.containers.get.return_value = MagicMock()
        mock_docker.from_env.return_value = mock_client

        service = DockerSandboxService(config=_app_config())

        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=120,
            resourceLimits=ResourceLimits(root={}),
            env={},
            metadata={},
            entrypoint=["python"],
        )

        with (
            patch.object(service, "_ensure_image_available"),
            patch.object(service, "_prepare_sandbox_runtime"),
        ):
            await service.create_sandbox(request)

        host_config_call = mock_client.api.create_host_config.call_args
        assert "binds" not in host_config_call.kwargs


================================================
FILE: server/tests/test_endpoint.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from src.services.helpers import normalize_external_endpoint_url


def test_normalize_external_endpoint_url_defaults_to_https() -> None:
    assert (
        normalize_external_endpoint_url("oss-cn-hangzhou.aliyuncs.com")
        == "https://oss-cn-hangzhou.aliyuncs.com"
    )


def test_normalize_external_endpoint_url_keeps_existing_scheme() -> None:
    assert (
        normalize_external_endpoint_url("http://oss-cn-hangzhou.aliyuncs.com")
        == "http://oss-cn-hangzhou.aliyuncs.com"
    )


================================================
FILE: server/tests/test_endpoint_auth.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from src.services.constants import OPEN_SANDBOX_EGRESS_AUTH_HEADER
from src.services.endpoint_auth import (
    build_egress_auth_headers,
    generate_egress_token,
    merge_endpoint_headers,
)


def test_generate_egress_token_returns_random_urlsafe_strings() -> None:
    first = generate_egress_token()
    second = generate_egress_token()

    assert first
    assert second
    assert first != second


def test_build_egress_auth_headers_uses_expected_header_name() -> None:
    token = "egress-token"

    assert build_egress_auth_headers(token) == {
        OPEN_SANDBOX_EGRESS_AUTH_HEADER: token,
    }


def test_merge_endpoint_headers_preserves_existing_headers() -> None:
    existing = {"OpenSandbox-Ingress-To": "sbx-1-18080"}
    extra = {OPEN_SANDBOX_EGRESS_AUTH_HEADER: "egress-token"}

    merged = merge_endpoint_headers(existing, extra)

    assert merged == {
        "OpenSandbox-Ingress-To": "sbx-1-18080",
        OPEN_SANDBOX_EGRESS_AUTH_HEADER: "egress-token",
    }
    assert existing == {"OpenSandbox-Ingress-To": "sbx-1-18080"}


def test_merge_endpoint_headers_handles_missing_existing_headers() -> None:
    merged = merge_endpoint_headers(None, {OPEN_SANDBOX_EGRESS_AUTH_HEADER: "egress-token"})

    assert merged == {OPEN_SANDBOX_EGRESS_AUTH_HEADER: "egress-token"}


================================================
FILE: server/tests/test_helpers.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from datetime import datetime, timezone

from src.services.helpers import parse_timestamp


def test_parse_timestamp_truncates_nanoseconds():
    ts = "2025-12-10T05:29:56.359015208Z"

    result = parse_timestamp(ts)

    assert result.tzinfo is not None
    assert result.astimezone(timezone.utc) == result
    assert result.year == 2025
    assert result.month == 12
    assert result.day == 10
    assert result.microsecond == 359015


def test_parse_timestamp_parses_valid_rfc3339():
    ts = "2024-01-01T12:34:56.123456Z"

    result = parse_timestamp(ts)

    assert result.tzinfo is not None
    assert result == datetime(2024, 1, 1, 12, 34, 56, 123456, tzinfo=timezone.utc)


def test_parse_timestamp_invalid_falls_back_to_now():
    before = datetime.now(timezone.utc)
    result = parse_timestamp("not-a-time")
    after = datetime.now(timezone.utc)

    assert result.tzinfo is not None
    assert before <= result <= after


================================================
FILE: server/tests/test_ingress.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from src.config import (
    GatewayConfig,
    GatewayRouteModeConfig,
    IngressConfig,
    INGRESS_MODE_DIRECT,
    INGRESS_MODE_GATEWAY,
)
from src.services.constants import OPEN_SANDBOX_INGRESS_HEADER
from src.services.helpers import format_ingress_endpoint


def test_format_ingress_endpoint_returns_none_when_not_gateway():
    cfg = IngressConfig(mode=INGRESS_MODE_DIRECT)
    assert format_ingress_endpoint(cfg, "sid", 8080) is None
    assert format_ingress_endpoint(None, "sid", 8080) is None


def test_format_ingress_endpoint_wildcard():
    cfg = IngressConfig(
        mode=INGRESS_MODE_GATEWAY,
        gateway=GatewayConfig(
            address="*.example.com",
            route=GatewayRouteModeConfig(mode="wildcard"),
        ),
    )
    endpoint = format_ingress_endpoint(cfg, "sid", 8080)
    assert endpoint is not None
    assert endpoint.endpoint == "sid-8080.example.com"
    assert endpoint.headers is None


def test_format_ingress_endpoint_uri():
    cfg = IngressConfig(
        mode=INGRESS_MODE_GATEWAY,
        gateway=GatewayConfig(
            address="gateway.example.com",
            route=GatewayRouteModeConfig(mode="uri"),
        ),
    )
    endpoint = format_ingress_endpoint(cfg, "sid", 9000)
    assert endpoint is not None
    assert endpoint.endpoint == "gateway.example.com/sid/9000"
    assert endpoint.headers is None


def test_format_ingress_endpoint_header():
    cfg = IngressConfig(
        mode=INGRESS_MODE_GATEWAY,
        gateway=GatewayConfig(
            address="gateway.example.com",
            route=GatewayRouteModeConfig(mode="header"),
        ),
    )
    endpoint = format_ingress_endpoint(cfg, "sid", 8080)
    assert endpoint is not None
    assert endpoint.endpoint == "gateway.example.com"
    assert endpoint.headers == {OPEN_SANDBOX_INGRESS_HEADER: "sid-8080"}

================================================
FILE: server/tests/test_routes.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
API route tests for OpenSandbox Lifecycle API.

This module contains test cases for all API endpoints.
Most test bodies are placeholders that will be implemented as features mature.
"""

from datetime import datetime, timezone

from fastapi.testclient import TestClient

from src.api import lifecycle
from src.api.schema import ImageSpec, Sandbox, SandboxStatus


class TestHealthCheck:
    """Test cases for health check endpoint."""

    def test_health_check(self, client: TestClient):
        """
        Test health check endpoint.
        """
        response = client.get("/health")
        assert response.status_code == 200
        assert response.json() == {"status": "healthy"}


class TestAuthentication:
    """Test cases for authentication middleware."""

    def test_missing_api_key(self, client: TestClient):
        """
        Test request without API key returns 401.
        """
        response = client.get("/sandboxes/123e4567-e89b-12d3-a456-426614174000")
        assert response.status_code == 401
        assert "MISSING_API_KEY" in response.json()["code"]

    def test_missing_api_key_v1_prefix(self, client: TestClient):
        """
        Test request without API key on versioned route returns 401.
        """
        response = client.get("/v1/sandboxes/123e4567-e89b-12d3-a456-426614174000")
        assert response.status_code == 401
        assert "MISSING_API_KEY" in response.json()["code"]

    def test_invalid_api_key(self, client: TestClient):
        """
        Test request with invalid API key returns 401.
        """
        _ = client.get(
            "/sandboxes/123e4567-e89b-12d3-a456-426614174000",
            headers={"OPEN-SANDBOX-API-KEY": "invalid-key"},
        )
        # Note: Current implementation accepts any non-empty key if no keys configured.
        # This test will need to be updated when proper key validation is implemented.
        pass


class TestCreateSandbox:
    """Test cases for sandbox creation endpoint."""

    def test_create_sandbox_success(
        self,
        client: TestClient,
        auth_headers: dict,
        sample_sandbox_request: dict,
    ):
        """
        Test successful sandbox creation.
        """
        pass

    def test_create_sandbox_invalid_request(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test sandbox creation with invalid request.
        """
        pass

    def test_create_sandbox_unauthorized(
        self,
        client: TestClient,
        sample_sandbox_request: dict,
    ):
        """
        Test sandbox creation without authentication.
        """
        pass


class TestListSandboxes:
    """Test cases for sandbox listing endpoint."""

    def test_list_sandboxes_success(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test successful sandbox listing.
        """
        _ = client.get("/sandboxes", headers=auth_headers)
        # Note: Actual response depends on mock service implementation,
        # but here we just check if the endpoint is reachable via GET
        # and doesn't 404. Since we haven't mocked the service response fully in this placeholder,
        # we expect at least a valid status code flow (e.g. 200 if mocked properly, or 500 if mock fails).
        # Assuming the service mock returns a valid list response:
        # assert response.status_code == 200
        pass

    def test_list_sandboxes_with_filters(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test sandbox listing with filters.
        """
        params = {"state": ["Running"], "metadata": "project=test"}
        _ = client.get("/sandboxes", headers=auth_headers, params=params)
        pass

    def test_list_sandboxes_with_pagination(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test sandbox listing with pagination.
        """
        params = {"page": 2, "pageSize": 10}
        _ = client.get("/sandboxes", headers=auth_headers, params=params)
        pass


class TestGetSandbox:
    """Test cases for get sandbox endpoint."""

    def test_get_sandbox_success(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test successful sandbox retrieval.
        """
        pass

    def test_get_sandbox_preserves_nullable_expires_at(
        self,
        client: TestClient,
        auth_headers: dict,
        monkeypatch,
    ):
        """
        Ensure expiresAt is returned as null for manual-cleanup sandboxes.
        """
        now = datetime.now(timezone.utc)
        sandbox = Sandbox(
            id="sandbox-123",
            image=ImageSpec(uri="python:3.11"),
            status=SandboxStatus(state="Running"),
            metadata=None,
            entrypoint=["python"],
            expires_at=None,
            created_at=now,
        )

        class StubService:
            @staticmethod
            def get_sandbox(sandbox_id: str) -> Sandbox:
                return sandbox

        monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

        response = client.get("/sandboxes/sandbox-123", headers=auth_headers)
        assert response.status_code == 200

        payload = response.json()
        assert payload["metadata"] is None
        assert payload["id"] == "sandbox-123"
        assert payload["entrypoint"] == ["python"]
        assert "expiresAt" in payload
        assert payload["expiresAt"] is None
        assert "createdAt" in payload
        assert payload["status"]["state"] == "Running"
        assert payload["status"]["reason"] is None
        assert payload["status"]["message"] is None
        assert payload["status"]["lastTransitionAt"] is None

    def test_get_sandbox_not_found(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test get sandbox with non-existent ID.
        """
        pass


class TestDeleteSandbox:
    """Test cases for delete sandbox endpoint."""

    def test_delete_sandbox_success(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test successful sandbox deletion.
        """
        pass

    def test_delete_sandbox_not_found(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test delete sandbox with non-existent ID.
        """
        pass


class TestPauseResumeSandbox:
    """Test cases for pause and resume endpoints."""

    def test_pause_sandbox_success(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test successful sandbox pause.
        """
        pass

    def test_resume_sandbox_success(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test successful sandbox resume.
        """
        pass

    def test_pause_sandbox_invalid_state(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test pause sandbox in invalid state.
        """
        pass


class TestRenewExpiration:
    """Test cases for renew expiration endpoint."""

    def test_renew_expiration_success(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test successful expiration renewal.
        """
        pass

    def test_renew_expiration_invalid_time(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test renew expiration with invalid time.
        """
        pass


class TestGetEndpoint:
    """Test cases for get endpoint endpoint."""

    def test_get_endpoint_success(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test successful endpoint retrieval.
        """
        pass

    def test_get_endpoint_invalid_port(
        self,
        client: TestClient,
        auth_headers: dict,
    ):
        """
        Test get endpoint with invalid port.
        """
        pass


================================================
FILE: server/tests/test_routes_create_delete.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from datetime import datetime, timedelta, timezone

from fastapi.testclient import TestClient

from src.api import lifecycle
from src.api.schema import CreateSandboxResponse, SandboxStatus


def test_create_sandbox_returns_202_and_service_payload(
    client: TestClient,
    auth_headers: dict,
    sample_sandbox_request: dict,
    monkeypatch,
) -> None:
    now = datetime.now(timezone.utc)
    calls: list[object] = []

    class StubService:
        @staticmethod
        async def create_sandbox(request) -> CreateSandboxResponse:
            calls.append(request)
            return CreateSandboxResponse(
                id="sbx-001",
                status=SandboxStatus(state="Pending"),
                metadata={"project": "test-project"},
                expiresAt=now + timedelta(hours=1),
                createdAt=now,
                entrypoint=["python", "-c", "print('Hello from sandbox')"],
            )

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.post(
        "/v1/sandboxes",
        headers=auth_headers,
        json=sample_sandbox_request,
    )

    assert response.status_code == 202
    payload = response.json()
    assert payload["id"] == "sbx-001"
    assert payload["status"]["state"] == "Pending"
    assert payload["metadata"]["project"] == "test-project"
    assert payload["entrypoint"] == ["python", "-c", "print('Hello from sandbox')"]
    assert len(calls) == 1
    assert calls[0].image.uri == "python:3.11"


def test_create_sandbox_manual_cleanup_returns_null_expiration(
    client: TestClient,
    auth_headers: dict,
    sample_sandbox_request: dict,
    monkeypatch,
) -> None:
    now = datetime.now(timezone.utc)

    class StubService:
        @staticmethod
        async def create_sandbox(request) -> CreateSandboxResponse:
            return CreateSandboxResponse(
                id="sbx-manual",
                status=SandboxStatus(state="Pending"),
                metadata=None,
                expiresAt=None,
                createdAt=now,
                entrypoint=["python", "-c", "print('Hello from sandbox')"],
            )

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())
    sample_sandbox_request.pop("timeout", None)

    response = client.post(
        "/v1/sandboxes",
        headers=auth_headers,
        json=sample_sandbox_request,
    )

    assert response.status_code == 202
    payload = response.json()
    assert payload["expiresAt"] is None
    assert payload["metadata"] is None
    assert payload["status"]["reason"] is None
    assert payload["status"]["message"] is None
    assert payload["status"]["lastTransitionAt"] is None


def test_create_sandbox_rejects_invalid_request(
    client: TestClient,
    auth_headers: dict,
) -> None:
    response = client.post(
        "/v1/sandboxes",
        headers=auth_headers,
        json={"timeout": 10},
    )

    assert response.status_code == 422


def test_delete_sandbox_returns_204_and_calls_service(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    calls: list[str] = []

    class StubService:
        @staticmethod
        def delete_sandbox(sandbox_id: str) -> None:
            calls.append(sandbox_id)

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.delete("/v1/sandboxes/sbx-001", headers=auth_headers)

    assert response.status_code == 204
    assert response.text == ""
    assert calls == ["sbx-001"]


def test_delete_sandbox_requires_api_key(client: TestClient) -> None:
    response = client.delete("/v1/sandboxes/sbx-001")

    assert response.status_code == 401
    assert response.json()["code"] == "MISSING_API_KEY"


================================================
FILE: server/tests/test_routes_endpoint_behavior.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from fastapi.testclient import TestClient

from src.api import lifecycle
from src.api.schema import Endpoint


def test_get_endpoint_returns_service_result(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    calls: list[tuple[str, int]] = []

    class StubService:
        @staticmethod
        def get_endpoint(sandbox_id: str, port: int) -> Endpoint:
            calls.append((sandbox_id, port))
            return Endpoint(endpoint="10.57.1.91:40109/proxy/44772")

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.get(
        "/v1/sandboxes/sbx-001/endpoints/44772",
        headers=auth_headers,
    )

    assert response.status_code == 200
    assert response.json()["endpoint"] == "10.57.1.91:40109/proxy/44772"
    assert calls == [("sbx-001", 44772)]


def test_get_endpoint_use_server_proxy_rewrites_url(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def get_endpoint(sandbox_id: str, port: int) -> Endpoint:
            return Endpoint(endpoint="10.57.1.91:40109/proxy/44772")

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.get(
        "/v1/sandboxes/sbx-001/endpoints/44772",
        params={"use_server_proxy": "true"},
        headers=auth_headers,
    )

    assert response.status_code == 200
    assert response.json()["endpoint"] == "testserver/sandboxes/sbx-001/proxy/44772"


def test_get_endpoint_rejects_non_numeric_port(
    client: TestClient,
    auth_headers: dict,
) -> None:
    response = client.get(
        "/v1/sandboxes/sbx-001/endpoints/not-a-port",
        headers=auth_headers,
    )

    assert response.status_code == 422


================================================
FILE: server/tests/test_routes_get_sandbox.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from datetime import datetime, timedelta, timezone

from fastapi.exceptions import HTTPException
from fastapi.testclient import TestClient

from src.api import lifecycle
from src.api.schema import ImageSpec, Sandbox, SandboxStatus


def test_get_sandbox_returns_service_payload(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    now = datetime.now(timezone.utc)

    class StubService:
        @staticmethod
        def get_sandbox(sandbox_id: str) -> Sandbox:
            assert sandbox_id == "sbx-001"
            return Sandbox(
                id=sandbox_id,
                image=ImageSpec(uri="python:3.11"),
                status=SandboxStatus(state="Running"),
                metadata={"team": "infra"},
                entrypoint=["python", "-V"],
                expiresAt=now + timedelta(hours=1),
                createdAt=now,
            )

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.get("/v1/sandboxes/sbx-001", headers=auth_headers)

    assert response.status_code == 200
    payload = response.json()
    assert payload["id"] == "sbx-001"
    assert payload["status"]["state"] == "Running"
    assert payload["image"]["uri"] == "python:3.11"


def test_get_sandbox_propagates_not_found(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def get_sandbox(sandbox_id: str) -> Sandbox:
            raise HTTPException(
                status_code=404,
                detail={
                    "code": "SANDBOX_NOT_FOUND",
                    "message": f"Sandbox {sandbox_id} not found",
                },
            )

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.get("/v1/sandboxes/missing", headers=auth_headers)

    assert response.status_code == 404
    assert response.json() == {
        "code": "SANDBOX_NOT_FOUND",
        "message": "Sandbox missing not found",
    }


def test_get_sandbox_requires_api_key(client: TestClient) -> None:
    response = client.get("/v1/sandboxes/sbx-001")

    assert response.status_code == 401
    assert response.json()["code"] == "MISSING_API_KEY"


================================================
FILE: server/tests/test_routes_list_sandboxes.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from datetime import datetime, timedelta, timezone

from fastapi.testclient import TestClient

from src.api import lifecycle
from src.api.schema import (
    ImageSpec,
    ListSandboxesResponse,
    PaginationInfo,
    Sandbox,
    SandboxStatus,
)


def test_list_sandboxes_parses_filters_and_pagination(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    now = datetime.now(timezone.utc)
    captured_requests: list[object] = []

    class StubService:
        @staticmethod
        def list_sandboxes(request) -> ListSandboxesResponse:
            captured_requests.append(request)
            return ListSandboxesResponse(
                items=[
                    Sandbox(
                        id="sbx-001",
                        image=ImageSpec(uri="python:3.11"),
                        status=SandboxStatus(state="Running"),
                        metadata={"team": "infra", "project": "alpha"},
                        entrypoint=["python", "-V"],
                        expiresAt=now + timedelta(hours=1),
                        createdAt=now,
                    )
                ],
                pagination=PaginationInfo(
                    page=2,
                    pageSize=5,
                    totalItems=8,
                    totalPages=2,
                    hasNextPage=False,
                ),
            )

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.get(
        "/v1/sandboxes",
        params={
            "state": ["Running", "Paused"],
            "metadata": "team=infra&project=alpha",
            "page": 2,
            "pageSize": 5,
        },
        headers=auth_headers,
    )

    assert response.status_code == 200
    payload = response.json()
    assert payload["pagination"]["page"] == 2
    assert payload["pagination"]["pageSize"] == 5
    assert payload["items"][0]["status"]["state"] == "Running"
    assert captured_requests[0].filter.state == ["Running", "Paused"]
    assert captured_requests[0].filter.metadata == {"team": "infra", "project": "alpha"}
    assert captured_requests[0].pagination.page == 2
    assert captured_requests[0].pagination.page_size == 5


def test_list_sandboxes_rejects_malformed_metadata_query(
    client: TestClient,
    auth_headers: dict,
) -> None:
    response = client.get(
        "/v1/sandboxes",
        params={"metadata": "team=infra&broken"},
        headers=auth_headers,
    )

    assert response.status_code == 400
    assert response.json()["code"] == "INVALID_METADATA_FORMAT"
    assert "bad query field" in response.json()["message"]


def test_list_sandboxes_keeps_blank_metadata_values(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    captured_requests: list[object] = []

    class StubService:
        @staticmethod
        def list_sandboxes(request) -> ListSandboxesResponse:
            captured_requests.append(request)
            return ListSandboxesResponse(
                items=[],
                pagination=PaginationInfo(
                    page=1,
                    pageSize=20,
                    totalItems=0,
                    totalPages=0,
                    hasNextPage=False,
                ),
            )

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.get(
        "/v1/sandboxes",
        params={"metadata": "team=infra&note="},
        headers=auth_headers,
    )

    assert response.status_code == 200
    assert captured_requests[0].filter.metadata == {"team": "infra", "note": ""}


def test_list_sandboxes_preserves_only_nullable_expires_at(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    now = datetime.now(timezone.utc)

    class StubService:
        @staticmethod
        def list_sandboxes(request) -> ListSandboxesResponse:
            return ListSandboxesResponse(
                items=[
                    Sandbox(
                        id="sbx-manual",
                        image=ImageSpec(uri="python:3.11"),
                        status=SandboxStatus(state="Running"),
                        metadata=None,
                        entrypoint=["python"],
                        expiresAt=None,
                        createdAt=now,
                    )
                ],
                pagination=PaginationInfo(
                    page=1,
                    pageSize=20,
                    totalItems=1,
                    totalPages=1,
                    hasNextPage=False,
                ),
            )

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.get("/v1/sandboxes", headers=auth_headers)

    assert response.status_code == 200
    item = response.json()["items"][0]
    assert item["expiresAt"] is None
    assert item["metadata"] is None
    assert item["status"]["reason"] is None
    assert item["status"]["message"] is None
    assert item["status"]["lastTransitionAt"] is None


def test_list_sandboxes_validates_page_bounds(
    client: TestClient,
    auth_headers: dict,
) -> None:
    response = client.get(
        "/v1/sandboxes",
        params={"page": 0},
        headers=auth_headers,
    )

    assert response.status_code == 422


def test_list_sandboxes_validates_page_size_upper_bound(
    client: TestClient,
    auth_headers: dict,
) -> None:
    response = client.get(
        "/v1/sandboxes",
        params={"pageSize": 201},
        headers=auth_headers,
    )

    assert response.status_code == 422


def test_list_sandboxes_requires_api_key(client: TestClient) -> None:
    response = client.get("/v1/sandboxes")

    assert response.status_code == 401
    assert response.json()["code"] == "MISSING_API_KEY"


================================================
FILE: server/tests/test_routes_pause_resume.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from fastapi.exceptions import HTTPException
from fastapi.testclient import TestClient

from src.api import lifecycle


def test_pause_route_calls_service_and_returns_202(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    calls: list[str] = []

    class StubService:
        @staticmethod
        def pause_sandbox(sandbox_id: str) -> None:
            calls.append(sandbox_id)

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.post("/v1/sandboxes/sbx-001/pause", headers=auth_headers)

    assert response.status_code == 202
    assert calls == ["sbx-001"]


def test_resume_route_calls_service_and_returns_202(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    calls: list[str] = []

    class StubService:
        @staticmethod
        def resume_sandbox(sandbox_id: str) -> None:
            calls.append(sandbox_id)

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.post("/v1/sandboxes/sbx-001/resume", headers=auth_headers)

    assert response.status_code == 202
    assert calls == ["sbx-001"]


def test_pause_route_propagates_service_http_error(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def pause_sandbox(sandbox_id: str) -> None:
            raise HTTPException(
                status_code=404,
                detail={"code": "SANDBOX_NOT_FOUND", "message": f"Sandbox {sandbox_id} not found"},
            )

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.post("/v1/sandboxes/missing/pause", headers=auth_headers)

    assert response.status_code == 404
    assert response.json() == {
        "code": "SANDBOX_NOT_FOUND",
        "message": "Sandbox missing not found",
    }


def test_pause_route_requires_api_key(client: TestClient) -> None:
    response = client.post("/v1/sandboxes/sbx-001/pause")

    assert response.status_code == 401
    assert response.json()["code"] == "MISSING_API_KEY"


================================================
FILE: server/tests/test_routes_proxy.py
================================================
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import httpx
from fastapi.testclient import TestClient

from src.api import lifecycle
from src.api.schema import Endpoint
from src.services.constants import OPEN_SANDBOX_EGRESS_AUTH_HEADER


class _FakeStreamingResponse:
    def __init__(
        self, status_code: int = 200, headers: dict | None = None, chunks: list[bytes] | None = None
    ):
        self.status_code = status_code
        self.headers = httpx.Headers(headers or {})
        self._chunks = chunks or []

    async def aiter_bytes(self):
        for chunk in self._chunks:
            yield chunk


class _FakeAsyncClient:
    def __init__(self):
        self.built = None
        self.response = _FakeStreamingResponse()
        self.raise_connect_error = False
        self.raise_generic_error = False

    def build_request(
        self,
        method: str,
        url: str,
        headers: dict,
        content,
        params: str | None = None,
    ):
        self.built = {
            "method": method,
            "url": url,
            "params": params,
            "headers": headers,
            "content": content,
        }
        return self.built

    async def send(self, req, stream: bool = True):
        if self.raise_connect_error:
            raise httpx.ConnectError("connection refused")
        if self.raise_generic_error:
            raise RuntimeError("unexpected proxy error")
        return self.response


def test_proxy_forwards_filtered_headers_and_query(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def get_endpoint(sandbox_id: str, port: int, resolve_internal: bool = False) -> Endpoint:
            assert sandbox_id == "sbx-123"
            assert port == 44772
            assert resolve_internal is True
            return Endpoint(endpoint="10.57.1.91:40109")

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    fake_client = _FakeAsyncClient()
    fake_client.response = _FakeStreamingResponse(
        status_code=201,
        headers={"x-backend": "yes"},
        chunks=[b"proxy-ok"],
    )
    client.app.state.http_client = fake_client

    headers = {
        **auth_headers,
        "Authorization": "Bearer top-secret",
        "Cookie": "sid=secret",
        "Connection": "keep-alive, X-Hop-Temp",
        "Upgrade": "h2c",
        "Trailer": "X-Checksum",
        "X-Hop-Temp": "drop-me",
        "X-Trace": "trace-1",
    }

    response = client.post(
        "/v1/sandboxes/sbx-123/proxy/44772/api/run",
        params={"q": "search"},
        headers=headers,
        content=b'{"hello":"world"}',
    )

    assert response.status_code == 201
    assert response.content == b"proxy-ok"
    assert response.headers.get("x-backend") == "yes"

    assert fake_client.built is not None
    assert fake_client.built["method"] == "POST"
    assert fake_client.built["url"] == "http://10.57.1.91:40109/api/run"
    assert fake_client.built["params"] == "q=search"
    forwarded_headers = fake_client.built["headers"]
    lowered_headers = {k.lower(): v for k, v in forwarded_headers.items()}
    assert "host" not in lowered_headers
    assert "connection" not in lowered_headers
    assert "upgrade" not in lowered_headers
    assert "trailer" not in lowered_headers
    assert "authorization" not in lowered_headers
    assert "cookie" not in lowered_headers
    assert "x-hop-temp" not in lowered_headers
    assert lowered_headers.get("x-trace") == "trace-1"


def test_proxy_forwards_get_request_with_query_params(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    """Test that GET requests with query parameters are forwarded correctly.

    This test verifies the fix for issue #484 where GET requests with query
    parameters were failing with 400 MISSING_QUERY when using use_server_proxy.
    The query string should be passed via httpx params, not embedded in URL.
    """
    class StubService:
        @staticmethod
        def get_endpoint(sandbox_id: str, port: int, resolve_internal: bool = False) -> Endpoint:
            assert sandbox_id == "sbx-123"
            assert port == 44772
            assert resolve_internal is True
            return Endpoint(endpoint="10.57.1.91:40109")

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    fake_client = _FakeAsyncClient()
    fake_client.response = _FakeStreamingResponse(
        status_code=200,
        headers={"content-type": "application/json"},
        chunks=[b'[{"name":"file.txt","size":100}]'],
    )
    client.app.state.http_client = fake_client

    response = client.get(
        "/v1/sandboxes/sbx-123/proxy/44772/files/search",
        params={"path": "/workspace"},
        headers=auth_headers,
    )

    assert response.status_code == 200
    assert fake_client.built is not None
    assert fake_client.built["method"] == "GET"
    assert fake_client.built["url"] == "http://10.57.1.91:40109/files/search"
    assert fake_client.built["params"] == "path=%2Fworkspace"
    assert fake_client.built["content"] is None


def test_proxy_forwards_delete_request_with_body(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    """Test that DELETE requests with body payload are forwarded correctly.

    This verifies that DELETE requests with JSON/body payload are not
    incorrectly stripped when proxying.
    """
    class StubService:
        @staticmethod
        def get_endpoint(sandbox_id: str, port: int, resolve_internal: bool = False) -> Endpoint:
            return Endpoint(endpoint="10.57.1.91:40109")

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    fake_client = _FakeAsyncClient()
    fake_client.response = _FakeStreamingResponse(
        status_code=200,
        headers={"content-type": "application/json"},
        chunks=[b'{"deleted":true}'],
    )
    client.app.state.http_client = fake_client

    response = client.request(
        "DELETE",
        "/v1/sandboxes/sbx-123/proxy/44772/resources",
        headers=auth_headers,
        content=b'{"id": "resource-123"}',
    )

    assert response.status_code == 200
    assert fake_client.built is not None
    assert fake_client.built["method"] == "DELETE"
    assert fake_client.built["content"] is not None


def test_proxy_filters_response_hop_by_hop_headers(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def get_endpoint(sandbox_id: str, port: int, resolve_internal: bool = False) -> Endpoint:
            assert resolve_internal is True
            return Endpoint(endpoint="10.57.1.91:40109")

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    fake_client = _FakeAsyncClient()
    fake_client.response = _FakeStreamingResponse(
        status_code=200,
        headers={
            "x-backend": "yes",
            "Connection": "keep-alive, X-Hop-Temp",
            "Keep-Alive": "timeout=5",
            "Trailer": "X-Checksum",
            "X-Hop-Temp": "drop-me",
        },
        chunks=[b"proxy-ok"],
    )
    client.app.state.http_client = fake_client

    response = client.get(
        "/v1/sandboxes/sbx-123/proxy/44772/healthz",
        headers=auth_headers,
    )

    assert response.status_code == 200
    assert response.content == b"proxy-ok"
    assert response.headers.get("x-backend") == "yes"
    assert response.headers.get("connection") is None
    assert response.headers.get("keep-alive") is None
    assert response.headers.get("trailer") is None
    assert response.headers.get("x-hop-temp") is None


def test_proxy_rejects_websocket_upgrade(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def get_endpoint(sandbox_id: str, port: int, resolve_internal: bool = False) -> Endpoint:
            return Endpoint(endpoint="10.57.1.91:40109")

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())
    client.app.state.http_client = _FakeAsyncClient()

    response = client.get(
        "/v1/sandboxes/sbx-123/proxy/44772/ws",
        headers={**auth_headers, "Upgrade": "websocket"},
    )

    assert response.status_code == 400
    assert response.json()["message"] == "Websocket upgrade is not supported yet"


def test_proxy_rejects_websocket_upgrade_for_post_and_mixed_case_header(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def get_endpoint(sandbox_id: str, port: int, resolve_internal: bool = False) -> Endpoint:
            return Endpoint(endpoint="10.57.1.91:40109")

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())
    client.app.state.http_client = _FakeAsyncClient()

    response = client.post(
        "/v1/sandboxes/sbx-123/proxy/44772/ws",
        headers={**auth_headers, "Upgrade": "WebSocket"},
        content=b"{}",
    )

    assert response.status_code == 400
    assert response.json()["message"] == "Websocket upgrade is not supported yet"


def test_proxy_maps_connect_error_to_502(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def get_endpoint(sandbox_id: str, port: int, resolve_internal: bool = False) -> Endpoint:
            return Endpoint(endpoint="10.57.1.91:40109")

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())
    fake_client = _FakeAsyncClient()
    fake_client.raise_connect_error = True
    client.app.state.http_client = fake_client

    response = client.get(
        "/v1/sandboxes/sbx-123/proxy/44772/healthz",
        headers=auth_headers,
    )

    assert response.status_code == 502
    assert "Could not connect to the backend sandbox" in response.json()["message"]


def test_proxy_maps_unexpected_error_to_500(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def get_endpoint(sandbox_id: str, port: int, resolve_internal: bool = False) -> Endpoint:
            return Endpoint(endpoint="10.57.1.91:40109")

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())
    fake_client = _FakeAsyncClient()
    fake_client.raise_generic_error = True
    client.app.state.http_client = fake_client

    response = client.get(
        "/v1/sandboxes/sbx-123/proxy/44772/healthz",
        headers=auth_headers,
    )

    assert response.status_code == 500
    assert "An internal error occurred in the proxy" in response.json()["message"]


def test_proxy_forwards_18080_without_server_side_egress_auth_check(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def get_endpoint(sandbox_id: str, port: int, resolve_internal: bool = False) -> Endpoint:
            assert port == 18080
            assert resolve_internal is True
            return Endpoint(endpoint="10.57.1.91:18080")

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())
    fake_client = _FakeAsyncClient()
    fake_client.response = _FakeStreamingResponse(
        status_code=401,
        headers={"content-type": "application/json"},
        chunks=[b'{"code":"UNAUTHORIZED"}'],
    )
    client.app.state.http_client = fake_client

    response = client.get(
        "/v1/sandboxes/sbx-123/proxy/18080/policy",
        headers=auth_headers,
    )

    assert response.status_code == 401
    assert response.json()["code"] == "UNAUTHORIZED"
    assert fake_client.built is not None
    assert fake_client.built["url"] == "http://10.57.1.91:18080/policy"


def test_proxy_forwards_egress_auth_header_for_18080(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def get_endpoint(sandbox_id: str, port: int, resolve_internal: bool = False) -> Endpoint:
            assert port == 18080
            assert resolve_internal is True
            return Endpoint(endpoint="10.57.1.91:18080")

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    fake_client = _FakeAsyncClient()
    fake_client.response = _FakeStreamingResponse(
        status_code=200,
        headers={"content-type": "application/json"},
        chunks=[b'{"status":"ok"}'],
    )
    client.app.state.http_client = fake_client

    response = client.get(
        "/v1/sandboxes/sbx-123/proxy/18080/policy",
        headers={**auth_headers, OPEN_SANDBOX_EGRESS_AUTH_HEADER: "egress-token"},
    )

    assert response.status_code == 200
    assert fake_client.built is not None
    lowered_headers = {k.lower(): v for k, v in fake_client.built["headers"].items()}
    assert lowered_headers[OPEN_SANDBOX_EGRESS_AUTH_HEADER.lower()] == "egress-token"


================================================
FILE: server/tests/test_routes_renew_expiration.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from datetime import datetime, timedelta, timezone

from fastapi.exceptions import HTTPException
from fastapi.testclient import TestClient

from src.api import lifecycle
from src.api.schema import RenewSandboxExpirationResponse


def test_renew_expiration_returns_updated_timestamp(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    target = datetime.now(timezone.utc) + timedelta(hours=2)
    calls: list[tuple[str, datetime]] = []

    class StubService:
        @staticmethod
        def renew_expiration(sandbox_id: str, request) -> RenewSandboxExpirationResponse:
            calls.append((sandbox_id, request.expires_at))
            return RenewSandboxExpirationResponse(expiresAt=target)

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.post(
        "/v1/sandboxes/sbx-001/renew-expiration",
        headers=auth_headers,
        json={"expiresAt": target.isoformat()},
    )

    assert response.status_code == 200
    expires_at = datetime.fromisoformat(response.json()["expiresAt"].replace("Z", "+00:00"))
    assert expires_at == target
    assert calls == [("sbx-001", target)]


def test_renew_expiration_rejects_invalid_payload(
    client: TestClient,
    auth_headers: dict,
) -> None:
    response = client.post(
        "/v1/sandboxes/sbx-001/renew-expiration",
        headers=auth_headers,
        json={"expiresAt": "not-a-datetime"},
    )

    assert response.status_code == 422


def test_renew_expiration_propagates_service_http_error(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def renew_expiration(sandbox_id: str, request) -> RenewSandboxExpirationResponse:
            raise HTTPException(
                status_code=409,
                detail={
                    "code": "INVALID_EXPIRES_AT",
                    "message": f"Requested expiresAt is not valid for sandbox {sandbox_id}",
                },
            )

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.post(
        "/v1/sandboxes/sbx-001/renew-expiration",
        headers=auth_headers,
        json={"expiresAt": "2030-01-01T00:00:00Z"},
    )

    assert response.status_code == 409
    assert response.json() == {
        "code": "INVALID_EXPIRES_AT",
        "message": "Requested expiresAt is not valid for sandbox sbx-001",
    }


def test_renew_expiration_returns_409_for_manual_cleanup_sandbox(
    client: TestClient,
    auth_headers: dict,
    monkeypatch,
) -> None:
    class StubService:
        @staticmethod
        def renew_expiration(sandbox_id: str, request) -> RenewSandboxExpirationResponse:
            raise HTTPException(
                status_code=409,
                detail={
                    "code": "DOCKER::INVALID_EXPIRATION",
                    "message": f"Sandbox {sandbox_id} does not have automatic expiration enabled.",
                },
            )

    monkeypatch.setattr(lifecycle, "sandbox_service", StubService())

    response = client.post(
        "/v1/sandboxes/sbx-manual/renew-expiration",
        headers=auth_headers,
        json={"expiresAt": "2030-01-01T00:00:00Z"},
    )

    assert response.status_code == 409
    assert response.json() == {
        "code": "DOCKER::INVALID_EXPIRATION",
        "message": "Sandbox sbx-manual does not have automatic expiration enabled.",
    }


def test_renew_expiration_requires_api_key(client: TestClient) -> None:
    response = client.post(
        "/v1/sandboxes/sbx-001/renew-expiration",
        json={"expiresAt": "2030-01-01T00:00:00Z"},
    )

    assert response.status_code == 401
    assert response.json()["code"] == "MISSING_API_KEY"


================================================
FILE: server/tests/test_schema.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for Pydantic schema models."""

import pytest
from pydantic import ValidationError

from src.api.schema import (
    CreateSandboxRequest,
    Host,
    ImageSpec,
    OSSFS,
    PVC,
    ResourceLimits,
    Volume,
)


# ============================================================================
# Host Tests
# ============================================================================


class TestHost:
    """Tests for Host model."""

    def test_valid_path(self):
        """Valid absolute path should be accepted."""
        backend = Host(path="/data/opensandbox")
        assert backend.path == "/data/opensandbox"

    def test_path_required(self):
        """Path field should be required."""
        with pytest.raises(ValidationError) as exc_info:
            Host()  # type: ignore
        errors = exc_info.value.errors()
        assert any(e["loc"] == ("path",) for e in errors)

    def test_serialization(self):
        """Model should serialize correctly."""
        backend = Host(path="/data/opensandbox")
        data = backend.model_dump()
        assert data == {"path": "/data/opensandbox"}

    def test_deserialization(self):
        """Model should deserialize correctly."""
        data = {"path": "/data/opensandbox"}
        backend = Host.model_validate(data)
        assert backend.path == "/data/opensandbox"


# ============================================================================
# PVC Tests
# ============================================================================


class TestPVC:
    """Tests for PVC model."""

    def test_valid_claim_name(self):
        """Valid claim name should be accepted."""
        backend = PVC(claim_name="my-pvc")
        assert backend.claim_name == "my-pvc"

    def test_claim_name_alias(self):
        """claimName alias should work."""
        data = {"claimName": "my-pvc"}
        backend = PVC.model_validate(data)
        assert backend.claim_name == "my-pvc"

    def test_serialization_uses_alias(self):
        """Serialization should use camelCase alias."""
        backend = PVC(claim_name="my-pvc")
        data = backend.model_dump(by_alias=True)
        assert data == {"claimName": "my-pvc"}

    def test_claim_name_required(self):
        """claim_name field should be required."""
        with pytest.raises(ValidationError) as exc_info:
            PVC()  # type: ignore
        errors = exc_info.value.errors()
        assert any("claim_name" in str(e["loc"]) or "claimName" in str(e["loc"]) for e in errors)


# ============================================================================
# OSSFS Tests
# ============================================================================


class TestOSSFS:
    """Tests for OSSFS model."""

    def test_valid_ossfs(self):
        backend = OSSFS(
            bucket="bucket-test-3",
            endpoint="oss-cn-hangzhou.aliyuncs.com",
            version="2.0",
            options=["allow_other"],
            access_key_id="AKIDEXAMPLE",
            access_key_secret="SECRETEXAMPLE",
        )
        assert backend.bucket == "bucket-test-3"
        assert backend.version == "2.0"
        assert backend.access_key_id == "AKIDEXAMPLE"

    def test_default_ossfs_version_is_2_0(self):
        backend = OSSFS(
            bucket="bucket-test-3",
            endpoint="oss-cn-hangzhou.aliyuncs.com",
            access_key_id="AKIDEXAMPLE",
            access_key_secret="SECRETEXAMPLE",
        )
        assert backend.version == "2.0"

    def test_inline_credentials_required(self):
        with pytest.raises(ValidationError):
            OSSFS(  # type: ignore
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
            )


# ============================================================================
# Volume Tests
# ============================================================================


class TestVolume:
    """Tests for Volume model."""

    def test_valid_host_volume(self):
        """Valid host volume should be accepted."""
        volume = Volume(
            name="workdir",
            host=Host(path="/data/opensandbox"),
            mount_path="/mnt/work",
            read_only=False,
        )
        assert volume.name == "workdir"
        assert volume.host is not None
        assert volume.host.path == "/data/opensandbox"
        assert volume.mount_path == "/mnt/work"
        assert volume.read_only is False
        assert volume.pvc is None
        assert volume.sub_path is None

    def test_valid_pvc_volume(self):
        """Valid PVC volume should be accepted."""
        volume = Volume(
            name="models",
            pvc=PVC(claim_name="shared-models-pvc"),
            mount_path="/mnt/models",
            read_only=True,
        )
        assert volume.name == "models"
        assert volume.pvc is not None
        assert volume.pvc.claim_name == "shared-models-pvc"
        assert volume.mount_path == "/mnt/models"
        assert volume.read_only is True
        assert volume.host is None

    def test_valid_volume_with_subpath(self):
        """Volume with subPath should be accepted."""
        volume = Volume(
            name="workdir",
            host=Host(path="/data/opensandbox"),
            mount_path="/mnt/work",
            read_only=False,
            sub_path="task-001",
        )
        assert volume.sub_path == "task-001"

    def test_valid_ossfs_volume(self):
        """Valid OSSFS volume should be accepted."""
        volume = Volume(
            name="data",
            ossfs=OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                    access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
            ),
            mount_path="/mnt/data",
            sub_path="task-001",
        )
        assert volume.ossfs is not None
        assert volume.ossfs.access_key_id == "AKIDEXAMPLE"
        assert volume.sub_path == "task-001"

    def test_no_backend_raises(self):
        """Volume without any backend should raise ValidationError."""
        with pytest.raises(ValidationError) as exc_info:
            Volume(
                name="workdir",
                mount_path="/mnt/work",
                read_only=False,
            )
        # Check that validation error mentions backend
        error_message = str(exc_info.value)
        assert "backend" in error_message.lower()

    def test_multiple_backends_raises(self):
        """Volume with multiple backends should raise ValidationError."""
        with pytest.raises(ValidationError) as exc_info:
            Volume(
                name="workdir",
                host=Host(path="/data/opensandbox"),
                pvc=PVC(claim_name="my-pvc"),
                mount_path="/mnt/work",
                read_only=False,
            )
        # Check that validation error mentions backend
        error_message = str(exc_info.value)
        assert "backend" in error_message.lower()

    def test_serialization_host_volume(self):
        """Host volume should serialize correctly with camelCase aliases."""
        volume = Volume(
            name="workdir",
            host=Host(path="/data/opensandbox"),
            mount_path="/mnt/work",
            read_only=False,
            sub_path="task-001",
        )
        data = volume.model_dump(by_alias=True, exclude_none=True)
        assert data == {
            "name": "workdir",
            "host": {"path": "/data/opensandbox"},
            "mountPath": "/mnt/work",
            "readOnly": False,
            "subPath": "task-001",
        }

    def test_serialization_pvc_volume(self):
        """PVC volume should serialize correctly with camelCase aliases."""
        volume = Volume(
            name="models",
            pvc=PVC(claim_name="shared-models-pvc"),
            mount_path="/mnt/models",
            read_only=True,
        )
        data = volume.model_dump(by_alias=True, exclude_none=True)
        assert data == {
            "name": "models",
            "pvc": {"claimName": "shared-models-pvc"},
            "mountPath": "/mnt/models",
            "readOnly": True,
        }

    def test_deserialization_host_volume(self):
        """Host volume should deserialize correctly from camelCase."""
        data = {
            "name": "workdir",
            "host": {"path": "/data/opensandbox"},
            "mountPath": "/mnt/work",
            "readOnly": False,
            "subPath": "task-001",
        }
        volume = Volume.model_validate(data)
        assert volume.name == "workdir"
        assert volume.host is not None
        assert volume.host.path == "/data/opensandbox"
        assert volume.mount_path == "/mnt/work"
        assert volume.read_only is False
        assert volume.sub_path == "task-001"

    def test_deserialization_pvc_volume(self):
        """PVC volume should deserialize correctly from camelCase."""
        data = {
            "name": "models",
            "pvc": {"claimName": "shared-models-pvc"},
            "mountPath": "/mnt/models",
            "readOnly": True,
        }
        volume = Volume.model_validate(data)
        assert volume.name == "models"
        assert volume.pvc is not None
        assert volume.pvc.claim_name == "shared-models-pvc"
        assert volume.mount_path == "/mnt/models"
        assert volume.read_only is True

    def test_serialization_ossfs_volume(self):
        volume = Volume(
            name="data",
            ossfs=OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                    access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
            ),
            mount_path="/mnt/data",
            read_only=False,
            sub_path="task-001",
        )
        data = volume.model_dump(by_alias=True, exclude_none=True)
        assert data["ossfs"]["bucket"] == "bucket-test-3"
        assert data["ossfs"]["accessKeyId"] == "AKIDEXAMPLE"
        assert data["subPath"] == "task-001"


# ============================================================================
# CreateSandboxRequest with Volumes Tests
# ============================================================================


class TestCreateSandboxRequestWithVolumes:
    """Tests for CreateSandboxRequest with volumes field."""

    def test_request_without_timeout_uses_manual_cleanup(self):
        """Request without timeout should be valid and represent manual cleanup mode."""
        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            resource_limits=ResourceLimits({"cpu": "500m", "memory": "512Mi"}),
            entrypoint=["python", "-c", "print('hello')"],
        )
        assert request.timeout is None

    def test_request_without_volumes(self):
        """Request without volumes should be valid."""
        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=3600,
            resource_limits=ResourceLimits({"cpu": "500m", "memory": "512Mi"}),
            entrypoint=["python", "-c", "print('hello')"],
        )
        assert request.volumes is None

    def test_request_with_empty_volumes(self):
        """Request with empty volumes list should be valid."""
        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=3600,
            resource_limits=ResourceLimits({"cpu": "500m", "memory": "512Mi"}),
            entrypoint=["python", "-c", "print('hello')"],
            volumes=[],
        )
        assert request.volumes == []

    def test_request_with_host_volume(self):
        """Request with host volume should be valid."""
        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=3600,
            resource_limits=ResourceLimits({"cpu": "500m", "memory": "512Mi"}),
            entrypoint=["python", "-c", "print('hello')"],
            volumes=[
                Volume(
                    name="workdir",
                    host=Host(path="/data/opensandbox"),
                    mount_path="/mnt/work",
                    read_only=False,
                )
            ],
        )
        assert request.volumes is not None
        assert len(request.volumes) == 1
        assert request.volumes[0].name == "workdir"

    def test_request_with_pvc_volume(self):
        """Request with PVC volume should be valid."""
        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=3600,
            resource_limits=ResourceLimits({"cpu": "500m", "memory": "512Mi"}),
            entrypoint=["python", "-c", "print('hello')"],
            volumes=[
                Volume(
                    name="models",
                    pvc=PVC(claim_name="shared-models-pvc"),
                    mount_path="/mnt/models",
                    read_only=True,
                )
            ],
        )
        assert request.volumes is not None
        assert len(request.volumes) == 1
        assert request.volumes[0].pvc is not None
        assert request.volumes[0].pvc.claim_name == "shared-models-pvc"

    def test_request_with_multiple_volumes(self):
        """Request with multiple volumes should be valid."""
        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=3600,
            resource_limits=ResourceLimits({"cpu": "500m", "memory": "512Mi"}),
            entrypoint=["python", "-c", "print('hello')"],
            volumes=[
                Volume(
                    name="workdir",
                    host=Host(path="/data/opensandbox"),
                    mount_path="/mnt/work",
                    read_only=False,
                ),
                Volume(
                    name="models",
                    pvc=PVC(claim_name="shared-models-pvc"),
                    mount_path="/mnt/models",
                    read_only=True,
                ),
            ],
        )
        assert request.volumes is not None
        assert len(request.volumes) == 2

    def test_serialization_with_volumes(self):
        """Request with volumes should serialize correctly."""
        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=3600,
            resource_limits=ResourceLimits({"cpu": "500m", "memory": "512Mi"}),
            entrypoint=["python", "-c", "print('hello')"],
            volumes=[
                Volume(
                    name="workdir",
                    host=Host(path="/data/opensandbox"),
                    mount_path="/mnt/work",
                    read_only=False,
                    sub_path="task-001",
                )
            ],
        )
        data = request.model_dump(by_alias=True, exclude_none=True)
        assert "volumes" in data
        assert len(data["volumes"]) == 1
        assert data["volumes"][0]["name"] == "workdir"
        assert data["volumes"][0]["mountPath"] == "/mnt/work"
        assert data["volumes"][0]["readOnly"] is False
        assert data["volumes"][0]["subPath"] == "task-001"

    def test_deserialization_with_volumes(self):
        """Request with volumes should deserialize correctly."""
        data = {
            "image": {"uri": "python:3.11"},
            "timeout": 3600,
            "resourceLimits": {"cpu": "500m", "memory": "512Mi"},
            "entrypoint": ["python", "-c", "print('hello')"],
            "volumes": [
                {
                    "name": "workdir",
                    "host": {"path": "/data/opensandbox"},
                    "mountPath": "/mnt/work",
                    "readOnly": False,
                    "subPath": "task-001",
                },
                {
                    "name": "models",
                    "pvc": {"claimName": "shared-models-pvc"},
                    "mountPath": "/mnt/models",
                    "readOnly": True,
                },
            ],
        }
        request = CreateSandboxRequest.model_validate(data)
        assert request.volumes is not None
        assert len(request.volumes) == 2

        # Check host volume
        assert request.volumes[0].name == "workdir"
        assert request.volumes[0].host is not None
        assert request.volumes[0].host.path == "/data/opensandbox"
        assert request.volumes[0].mount_path == "/mnt/work"
        assert request.volumes[0].read_only is False
        assert request.volumes[0].sub_path == "task-001"

        # Check PVC volume
        assert request.volumes[1].name == "models"
        assert request.volumes[1].pvc is not None
        assert request.volumes[1].pvc.claim_name == "shared-models-pvc"
        assert request.volumes[1].mount_path == "/mnt/models"
        assert request.volumes[1].read_only is True

    def test_request_rejects_zero_timeout(self):
        """Zero timeout should still be rejected."""
        with pytest.raises(ValidationError):
            CreateSandboxRequest(
                image=ImageSpec(uri="python:3.11"),
                timeout=0,
                resource_limits=ResourceLimits({"cpu": "500m"}),
                entrypoint=["python", "-c", "print('hello')"],
            )

    def test_request_allows_timeout_above_previous_hardcoded_limit(self):
        """Schema should not hardcode the server-side maximum timeout."""
        request = CreateSandboxRequest(
            image=ImageSpec(uri="python:3.11"),
            timeout=172800,
            resource_limits=ResourceLimits({"cpu": "500m", "memory": "512Mi"}),
            entrypoint=["python", "-c", "print('hello')"],
        )

        assert request.timeout == 172800


================================================
FILE: server/tests/test_validators.py
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from fastapi import HTTPException

from src.api.schema import Host, OSSFS, PVC, Volume
from src.services.constants import SandboxErrorCodes
from src.services.validators import (
    ensure_metadata_labels,
    ensure_timeout_within_limit,
    ensure_valid_host_path,
    ensure_valid_mount_path,
    ensure_valid_pvc_name,
    ensure_valid_sub_path,
    ensure_valid_volume_name,
    ensure_volumes_valid,
)


def test_ensure_metadata_labels_accepts_common_k8s_forms():
    # Various valid label shapes: with/without prefix, mixed chars, empty value allowed.
    valid_metadata = {
        "app": "web",
        "k8s.io/name": "app-1",
        "example.com/label": "a.b_c-1",
        "team": "A1_b-2.c",
        "empty": "",
    }

    # Should not raise
    ensure_metadata_labels(valid_metadata)


def test_ensure_metadata_labels_allows_none_or_empty():
    ensure_metadata_labels(None)
    ensure_metadata_labels({})


def test_ensure_metadata_labels_rejects_name_too_long():
    """Label name part exceeding 63 characters should be rejected."""
    long_name = "a" * 64
    with pytest.raises(HTTPException) as exc_info:
        ensure_metadata_labels({long_name: "value"})
    assert exc_info.value.status_code == 400
    assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_METADATA_LABEL


def test_ensure_metadata_labels_rejects_prefix_too_long():
    """Label prefix (DNS subdomain) exceeding 253 characters should be rejected."""
    # Build a prefix that is longer than 253 chars: 5 labels of 62 chars = 314 chars
    label_part = "a" * 62
    long_prefix = ".".join([label_part] * 5)  # 62*5 + 4 = 314 chars
    key = f"{long_prefix}/name"
    with pytest.raises(HTTPException) as exc_info:
        ensure_metadata_labels({key: "value"})
    assert exc_info.value.status_code == 400
    assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_METADATA_LABEL


def test_ensure_metadata_labels_accepts_key_with_max_length_prefix_and_name():
    """Valid key where prefix <= 253 chars and name <= 63 chars but total > 253 should be accepted."""
    # prefix = 4 labels of 62 chars = 62*4 + 3 = 251 chars (valid DNS subdomain)
    label_part = "a" * 62
    prefix = ".".join([label_part] * 4)  # 251 chars
    assert len(prefix) == 251
    key = f"{prefix}/valid-name"  # total = 251 + 1 + 10 = 262 chars, but prefix <= 253 ✓
    # This was previously rejected due to the incorrect total-length check.
    ensure_metadata_labels({key: "value"})  # Should NOT raise


def test_ensure_metadata_labels_rejects_invalid_prefix_format():
    """Label prefix with invalid DNS subdomain characters should be rejected."""
    with pytest.raises(HTTPException) as exc_info:
        ensure_metadata_labels({"INVALID_PREFIX.io/name": "value"})
    assert exc_info.value.status_code == 400
    assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_METADATA_LABEL


def test_ensure_metadata_labels_rejects_value_too_long():
    """Label value exceeding 63 characters should be rejected."""
    long_value = "a" * 64
    with pytest.raises(HTTPException) as exc_info:
        ensure_metadata_labels({"app": long_value})
    assert exc_info.value.status_code == 400
    assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_METADATA_LABEL


def test_ensure_metadata_labels_rejects_non_string_key():
    """Non-string keys in metadata should be rejected."""
    with pytest.raises(HTTPException) as exc_info:
        ensure_metadata_labels({1: "value"})  # type: ignore[dict-item]
    assert exc_info.value.status_code == 400
    assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_METADATA_LABEL


def test_ensure_metadata_labels_rejects_key_with_empty_prefix():
    """Key with an empty prefix (starts with '/') should be rejected."""
    with pytest.raises(HTTPException) as exc_info:
        ensure_metadata_labels({"/name": "value"})
    assert exc_info.value.status_code == 400
    assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_METADATA_LABEL


def test_ensure_metadata_labels_rejects_reserved_prefix():
    """User metadata must not use the opensandbox.io/ reserved prefix."""
    with pytest.raises(HTTPException) as exc_info:
        ensure_metadata_labels({"opensandbox.io/expires-at": "2030-01-01T00:00:00Z"})
    assert exc_info.value.status_code == 400
    assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_METADATA_LABEL
    assert "reserved prefix" in exc_info.value.detail["message"]


def test_ensure_metadata_labels_rejects_manual_cleanup_key():
    """User must not inject the manual-cleanup lifecycle label."""
    with pytest.raises(HTTPException) as exc_info:
        ensure_metadata_labels({"opensandbox.io/manual-cleanup": "true"})
    assert exc_info.value.status_code == 400
    assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_METADATA_LABEL
    assert "reserved prefix" in exc_info.value.detail["message"]


def test_ensure_metadata_labels_rejects_arbitrary_reserved_key():
    """Any key under opensandbox.io/ should be rejected, not just known labels."""
    with pytest.raises(HTTPException) as exc_info:
        ensure_metadata_labels({"opensandbox.io/custom": "value"})
    assert exc_info.value.status_code == 400
    assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_METADATA_LABEL


def test_ensure_timeout_within_limit_allows_equal_boundary():
    ensure_timeout_within_limit(3600, 3600)


def test_ensure_timeout_within_limit_allows_disabled_upper_bound():
    ensure_timeout_within_limit(7200, None)


def test_ensure_timeout_within_limit_rejects_timeout_above_limit():
    with pytest.raises(HTTPException) as exc_info:
        ensure_timeout_within_limit(3601, 3600)

    assert exc_info.value.status_code == 400
    assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_PARAMETER


def test_ensure_timeout_within_limit_rejects_unrepresentable_timeout():
    with pytest.raises(HTTPException) as exc_info:
        ensure_timeout_within_limit(10**20, None)

    assert exc_info.value.status_code == 400
    assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_PARAMETER
    assert "too large" in exc_info.value.detail["message"]


# ============================================================================
# Volume Name Validation Tests
# ============================================================================


class TestEnsureValidVolumeName:
    """Tests for ensure_valid_volume_name function."""

    def test_valid_simple_name(self):
        """Simple lowercase names should be valid."""
        ensure_valid_volume_name("workdir")
        ensure_valid_volume_name("data")
        ensure_valid_volume_name("models")

    def test_valid_name_with_numbers(self):
        """Names with numbers should be valid."""
        ensure_valid_volume_name("data1")
        ensure_valid_volume_name("vol2")
        ensure_valid_volume_name("123")

    def test_valid_name_with_hyphens(self):
        """Names with hyphens should be valid."""
        ensure_valid_volume_name("my-volume")
        ensure_valid_volume_name("data-cache-1")
        ensure_valid_volume_name("a-b-c")

    def test_empty_name_raises(self):
        """Empty name should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_volume_name("")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_VOLUME_NAME

    def test_name_too_long_raises(self):
        """Name exceeding 63 characters should raise HTTPException."""
        long_name = "a" * 64
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_volume_name(long_name)
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_VOLUME_NAME

    def test_uppercase_name_raises(self):
        """Uppercase letters should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_volume_name("MyVolume")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_VOLUME_NAME

    def test_underscore_name_raises(self):
        """Underscores should raise HTTPException (not valid DNS label)."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_volume_name("my_volume")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_VOLUME_NAME

    def test_name_starting_with_hyphen_raises(self):
        """Names starting with hyphen should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_volume_name("-volume")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_VOLUME_NAME

    def test_name_ending_with_hyphen_raises(self):
        """Names ending with hyphen should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_volume_name("volume-")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_VOLUME_NAME


# ============================================================================
# Mount Path Validation Tests
# ============================================================================


class TestEnsureValidMountPath:
    """Tests for ensure_valid_mount_path function."""

    def test_valid_absolute_path(self):
        """Absolute paths should be valid."""
        ensure_valid_mount_path("/mnt/data")
        ensure_valid_mount_path("/")
        ensure_valid_mount_path("/home/user/work")

    def test_empty_path_raises(self):
        """Empty path should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_mount_path("")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_MOUNT_PATH

    def test_relative_path_raises(self):
        """Relative paths should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_mount_path("data/files")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_MOUNT_PATH

    def test_path_not_starting_with_slash_raises(self):
        """Paths not starting with '/' should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_mount_path("mnt/data")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_MOUNT_PATH


# ============================================================================
# SubPath Validation Tests
# ============================================================================


class TestEnsureValidSubPath:
    """Tests for ensure_valid_sub_path function."""

    def test_none_subpath_valid(self):
        """None subpath should be valid."""
        ensure_valid_sub_path(None)

    def test_empty_subpath_valid(self):
        """Empty string subpath should be valid."""
        ensure_valid_sub_path("")

    def test_relative_subpath_valid(self):
        """Relative paths should be valid."""
        ensure_valid_sub_path("task-001")
        ensure_valid_sub_path("user/data")
        ensure_valid_sub_path("a/b/c")

    def test_absolute_subpath_raises(self):
        """Absolute paths should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_sub_path("/absolute/path")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_SUB_PATH

    def test_path_traversal_raises(self):
        """Path traversal (..) should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_sub_path("../parent")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_SUB_PATH

    def test_embedded_path_traversal_raises(self):
        """Embedded path traversal should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_sub_path("a/../b")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_SUB_PATH


# ============================================================================
# Host Path Validation Tests
# ============================================================================


class TestEnsureValidHostPath:
    """Tests for ensure_valid_host_path function."""

    def test_valid_absolute_path(self):
        """Absolute paths should be valid."""
        ensure_valid_host_path("/data/opensandbox")
        ensure_valid_host_path("/tmp")

    def test_empty_path_raises(self):
        """Empty path should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_host_path("")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_HOST_PATH

    def test_relative_path_raises(self):
        """Relative paths should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_host_path("data/files")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_HOST_PATH

    def test_path_with_traversal_raises(self):
        """Paths with traversal should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_host_path("/data/../etc/passwd")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_HOST_PATH

    def test_path_with_double_slash_raises(self):
        """Paths with double slashes should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_host_path("/data//files")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_HOST_PATH

    def test_allowed_prefix_match(self):
        """Paths under allowed prefixes should be valid."""
        allowed = ["/data/opensandbox", "/tmp/sandbox"]
        ensure_valid_host_path("/data/opensandbox/user-a", allowed)
        ensure_valid_host_path("/tmp/sandbox/task-1", allowed)

    def test_allowed_prefix_exact_match(self):
        """Exact prefix match should be valid."""
        allowed = ["/data/opensandbox"]
        ensure_valid_host_path("/data/opensandbox", allowed)

    def test_path_not_in_allowed_prefix_raises(self):
        """Paths not under allowed prefixes should raise HTTPException."""
        allowed = ["/data/opensandbox"]
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_host_path("/etc/passwd", allowed)
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.HOST_PATH_NOT_ALLOWED

    def test_partial_prefix_match_raises(self):
        """Partial prefix matches should not be allowed."""
        allowed = ["/data/opensandbox"]
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_host_path("/data/opensandbox-evil", allowed)
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.HOST_PATH_NOT_ALLOWED


# ============================================================================
# PVC Name Validation Tests
# ============================================================================


class TestEnsureValidPvcName:
    """Tests for ensure_valid_pvc_name function."""

    def test_valid_simple_name(self):
        """Simple lowercase names should be valid."""
        ensure_valid_pvc_name("my-pvc")
        ensure_valid_pvc_name("data-volume")
        ensure_valid_pvc_name("pvc1")

    def test_empty_name_raises(self):
        """Empty name should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_pvc_name("")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_PVC_NAME

    def test_name_too_long_raises(self):
        """Name exceeding 253 characters should raise HTTPException."""
        long_name = "a" * 254
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_pvc_name(long_name)
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_PVC_NAME

    def test_uppercase_name_raises(self):
        """Uppercase letters should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_pvc_name("MyPVC")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_PVC_NAME

    def test_underscore_name_raises(self):
        """Underscores should raise HTTPException."""
        with pytest.raises(HTTPException) as exc_info:
            ensure_valid_pvc_name("my_pvc")
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_PVC_NAME


# ============================================================================
# Volumes List Validation Tests
# ============================================================================


class TestEnsureVolumesValid:
    """Tests for ensure_volumes_valid function."""

    def test_none_volumes_valid(self):
        """None volumes should be valid."""
        ensure_volumes_valid(None)

    def test_empty_volumes_valid(self):
        """Empty volumes list should be valid."""
        ensure_volumes_valid([])

    def test_valid_host_volume(self):
        """Valid host volume should pass validation."""
        volume = Volume(
            name="workdir",
            host=Host(path="/data/opensandbox"),
            mount_path="/mnt/work",
            read_only=False,
        )
        ensure_volumes_valid([volume])

    def test_valid_pvc_volume(self):
        """Valid PVC volume should pass validation."""
        volume = Volume(
            name="models",
            pvc=PVC(claim_name="shared-models-pvc"),
            mount_path="/mnt/models",
            read_only=True,
        )
        ensure_volumes_valid([volume])

    def test_valid_ossfs_volume(self):
        """Valid OSSFS volume should pass validation."""
        volume = Volume(
            name="oss-data",
            ossfs=OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                    access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
            ),
            mount_path="/mnt/data",
            read_only=False,
            sub_path="task-001",
        )
        ensure_volumes_valid([volume])

    def test_valid_volume_with_subpath(self):
        """Valid volume with subPath should pass validation."""
        volume = Volume(
            name="workdir",
            host=Host(path="/data/opensandbox"),
            mount_path="/mnt/work",
            read_only=False,
            sub_path="task-001",
        )
        ensure_volumes_valid([volume])

    def test_multiple_valid_volumes(self):
        """Multiple valid volumes should pass validation."""
        volumes = [
            Volume(
                name="workdir",
                host=Host(path="/data/opensandbox"),
                mount_path="/mnt/work",
                read_only=False,
            ),
            Volume(
                name="models",
                pvc=PVC(claim_name="shared-models-pvc"),
                mount_path="/mnt/models",
                read_only=True,
            ),
        ]
        ensure_volumes_valid(volumes)

    def test_duplicate_volume_name_raises(self):
        """Duplicate volume names should raise HTTPException."""
        volumes = [
            Volume(
                name="workdir",
                host=Host(path="/data/a"),
                mount_path="/mnt/a",
                read_only=False,
            ),
            Volume(
                name="workdir",  # Duplicate name
                host=Host(path="/data/b"),
                mount_path="/mnt/b",
                read_only=False,
            ),
        ]
        with pytest.raises(HTTPException) as exc_info:
            ensure_volumes_valid(volumes)
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.DUPLICATE_VOLUME_NAME

    def test_invalid_volume_name_rejected_by_pydantic(self):
        """Invalid volume name should be rejected by Pydantic pattern validation."""
        from pydantic import ValidationError

        # Pydantic validates the pattern before our validators run
        with pytest.raises(ValidationError) as exc_info:
            Volume(
                name="Invalid_Name",  # Invalid: uppercase and underscore
                host=Host(path="/data/opensandbox"),
                mount_path="/mnt/work",
                read_only=False,
            )
        assert "name" in str(exc_info.value)

    def test_invalid_mount_path_rejected_by_pydantic(self):
        """Invalid mount path should be rejected by Pydantic pattern validation."""
        from pydantic import ValidationError

        # Pydantic validates the pattern before our validators run
        with pytest.raises(ValidationError) as exc_info:
            Volume(
                name="workdir",
                host=Host(path="/data/opensandbox"),
                mount_path="relative/path",  # Invalid: not absolute
                read_only=False,
            )
        assert "mount_path" in str(exc_info.value)

    def test_invalid_subpath_raises(self):
        """Invalid subPath should raise HTTPException."""
        volume = Volume(
            name="workdir",
            host=Host(path="/data/opensandbox"),
            mount_path="/mnt/work",
            read_only=False,
            sub_path="../escape",  # Invalid: path traversal
        )
        with pytest.raises(HTTPException) as exc_info:
            ensure_volumes_valid([volume])
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_SUB_PATH

    def test_host_path_allowlist_enforced(self):
        """Host path allowlist should be enforced."""
        volume = Volume(
            name="workdir",
            host=Host(path="/etc/passwd"),  # Not in allowed list
            mount_path="/mnt/work",
            read_only=False,
        )
        with pytest.raises(HTTPException) as exc_info:
            ensure_volumes_valid([volume], allowed_host_prefixes=["/data/opensandbox"])
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.HOST_PATH_NOT_ALLOWED

    def test_ossfs_invalid_version_rejected_by_schema(self):
        """Unsupported OSSFS version should be rejected by schema validation."""
        from pydantic import ValidationError

        with pytest.raises(ValidationError):
            OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                version="3.0",  # type: ignore[arg-type]
                access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
            )

    def test_ossfs_missing_inline_credentials_raises(self):
        """Missing inline credentials should raise HTTPException."""
        volume = Volume(
            name="oss-data",
            ossfs=OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
            ),
            mount_path="/mnt/data",
        )
        volume.ossfs.access_key_id = None
        with pytest.raises(HTTPException) as exc_info:
            ensure_volumes_valid([volume])
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_OSSFS_CREDENTIALS

    def test_ossfs_v1_options_reject_prefixed_entries(self):
        """OSSFS options should reject prefixed entries for 1.0."""
        volume = Volume(
            name="oss-data",
            ossfs=OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                version="1.0",
                options=["--allow_other"],
                access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
            ),
            mount_path="/mnt/data",
        )
        with pytest.raises(HTTPException) as exc_info:
            ensure_volumes_valid([volume])
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_OSSFS_OPTION

    def test_ossfs_v2_options_reject_prefixed_entries(self):
        """OSSFS options should reject prefixed entries for 2.0."""
        volume = Volume(
            name="oss-data",
            ossfs=OSSFS(
                bucket="bucket-test-3",
                endpoint="oss-cn-hangzhou.aliyuncs.com",
                version="2.0",
                options=["-o allow_other"],
                access_key_id="AKIDEXAMPLE",
                access_key_secret="SECRETEXAMPLE",
            ),
            mount_path="/mnt/data",
        )
        with pytest.raises(HTTPException) as exc_info:
            ensure_volumes_valid([volume])
        assert exc_info.value.status_code == 400
        assert exc_info.value.detail["code"] == SandboxErrorCodes.INVALID_OSSFS_OPTION

    def test_invalid_pvc_name_rejected_by_pydantic(self):
        """Invalid PVC name should be rejected by Pydantic pattern validation."""
        from pydantic import ValidationError

        # Pydantic validates the pattern before our validators run
        with pytest.raises(ValidationError) as exc_info:
            PVC(claim_name="Invalid_PVC")  # Invalid: uppercase and underscore
        assert "claim_name" in str(exc_info.value)


================================================
FILE: server/tests/testdata/config.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[server]
host = "127.0.0.1"
port = 9000
log_level = "DEBUG"
api_key = "test-api-key-12345"

[runtime]
type = "docker"
execd_image = "ghcr.io/opensandbox/platform:latest"

[ingress]
mode = "direct"


================================================
FILE: server/tests/testdata/k8s_config.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Test configuration for Kubernetes runtime tests

[server]
host = "0.0.0.0"
port = 8080
log_level = "DEBUG"
api_key = "test-k8s-api-key"

[runtime]
type = "kubernetes"
execd_image = "ghcr.io/opensandbox/execd:test"

[kubernetes]
kubeconfig_path = "/tmp/test-kubeconfig"
namespace = "test-namespace"
service_account = "test-sa"
workload_provider = "batchsandbox"


================================================
FILE: specs/README.md
================================================
# OpenSandbox API Specifications

English | [中文](README_zh.md)

This directory contains OpenAPI specification documents for the OpenSandbox project, defining the complete API interfaces and data models. Use the server base URLs defined in each spec (for example, `http://localhost:8080/v1` for the lifecycle API, `http://localhost:44772` for execd, and `http://localhost:18080` for egress) when constructing requests.

## Specification Files

### 1. sandbox-lifecycle.yml

**Sandbox Lifecycle Management API**

Defines the complete lifecycle interfaces for creating, managing, and destroying sandbox environments directly from container images.

**Core Features:**
- **Sandbox Management**: Create, list, query, and delete sandbox instances with metadata filters and pagination
- **State Control**: Pause and resume sandbox execution
- **Lifecycle States**: Supports transitions across Pending → Running → Pausing → Paused → Stopping → Terminated, and error handling with `Failed`
- **Resource & Runtime Configuration**: Specify CPU/memory/GPU resource limits, required `entrypoint`, environment variables, and opaque `extensions`
- **Image Support**: Create sandboxes from public or private registries, including registry auth
- **Timeout Management**: Mandatory `timeout` on creation with explicit renewal via API
- **Endpoint Access**: Retrieve public access endpoints for services running inside sandboxes

**Main Endpoints (base path `/v1`):**
- `POST /sandboxes` - Create a sandbox from an image with timeout and resource limits
- `GET /sandboxes` - List sandboxes with state/metadata filters and pagination
- `GET /sandboxes/{sandboxId}` - Get full sandbox details (including image and entrypoint)
- `DELETE /sandboxes/{sandboxId}` - Delete a sandbox
- `POST /sandboxes/{sandboxId}/pause` - Pause a sandbox (asynchronous)
- `POST /sandboxes/{sandboxId}/resume` - Resume a paused sandbox
- `POST /sandboxes/{sandboxId}/renew-expiration` - Renew sandbox expiration (TTL)
- `GET /sandboxes/{sandboxId}/endpoints/{port}` - Get an access endpoint for a service port

**Authentication:**
- HTTP Header: `OPEN-SANDBOX-API-KEY: your-api-key`
- Environment Variable: `OPEN_SANDBOX_API_KEY` (for SDK clients)

### 2. execd-api.yaml

**Code Execution API Inside Sandbox**

Defines interfaces for executing code, commands, and file operations within sandbox environments, providing complete code interpreter and filesystem management capabilities. All endpoints require the `X-EXECD-ACCESS-TOKEN` header.

**Core Features:**
- **Code Execution**: Stateful code execution supporting Python, JavaScript, and other languages with context lifecycle management
- **Command Execution**: Shell command execution with foreground/background modes and polling endpoints for status/output
- **File Operations**: Complete CRUD operations for files and directories
- **Real-time Streaming**: Real-time output streaming via SSE (Server-Sent Events)
- **System Monitoring**: Real-time monitoring of CPU and memory metrics
- **Access Control**: Token-based API authentication via `X-EXECD-ACCESS-TOKEN`

**Main Endpoint Categories:**

**Health Check:**
- `GET /ping` - Service health check

**Code Interpreter:**
- `GET /code/contexts` - List active code execution contexts (filterable by language)
- `DELETE /code/contexts` - Delete all contexts for a language
- `DELETE /code/contexts/{context_id}` - Delete a specific context
- `POST /code/context` - Create a code execution context
- `POST /code` - Execute code in a context (streaming output)
- `DELETE /code` - Interrupt code execution

**Command Execution:**
- `POST /command` - Execute shell command (streaming output)
- `DELETE /command` - Interrupt command execution
- `GET /command/status/{session}` - Get foreground/background command status
- `GET /command/output/{session}` - Fetch accumulated stdout/stderr for a command

**Filesystem:**
- `GET /files/info` - Get metadata for files
- `DELETE /files` - Delete files (not directories)
- `POST /files/permissions` - Change file permissions
- `POST /files/mv` - Move/rename files
- `GET /files/search` - Search files (supports glob patterns)
- `POST /files/replace` - Batch replace file content
- `POST /files/upload` - Upload files (multipart)
- `GET /files/download` - Download files (supports range requests)

**Directory Operations:**
- `POST /directories` - Create directories with permissions (mkdir -p semantics)
- `DELETE /directories` - Recursively delete directories

**System Metrics:**
- `GET /metrics` - Get system resource metrics
- `GET /metrics/watch` - Watch system metrics in real-time (SSE stream)

### 3. egress-api.yaml

**Sandbox Egress Runtime API**

Defines the runtime egress policy interface exposed directly by the egress sidecar
inside a sandbox. Unlike lifecycle operations, this API is reached by first resolving
the sandbox endpoint for the egress port and then calling the sidecar endpoint directly.

**Core Features:**
- **Policy Inspection**: Retrieve the currently enforced egress policy and derived runtime mode
- **Policy Mutation**: Patch egress rules at runtime using sidecar merge semantics
- **Direct Sidecar Access**: Access via sandbox endpoint resolution instead of server-side lifecycle forwarding
- **Optional Sidecar Auth**: Supports endpoint-specific headers when the egress sidecar requires auth

**Main Endpoints:**
- `GET /policy` - Get the current egress policy
- `PATCH /policy` - Merge new egress rules into the current policy

## Technical Features

### Streaming Output (Server-Sent Events)

Code execution and command execution interfaces use SSE for real-time streaming output, supporting the following event types:
- `init` - Initialization event
- `status` - Status update
- `stdout` / `stderr` - Standard output/error streams
- `result` - Execution result
- `execution_complete` - Execution completed
- `execution_count` - Execution count
- `error` - Error information

### Resource Limits

Supports flexible resource configuration (similar to Kubernetes):
```json
{
  "cpu": "500m",
  "memory": "512Mi",
  "gpu": "1"
}
```

### File Permissions

Supports Unix-style file permission management:
- Owner
- Group
- Permission mode (octal format, e.g., 755)


================================================
FILE: specs/README_zh.md
================================================
# OpenSandbox API 规范文档

中文 | [English](README.md)

本目录包含 OpenSandbox 项目的 OpenAPI 规范文档，定义了完整的 API 接口和数据模型。发起请求时请使用各规范中定义的服务器地址（例如生命周期 API 的 `http://localhost:8080/v1`，execd 的 `http://localhost:44772`，egress 的 `http://localhost:18080`）。

## 规范文件

### 1. sandbox-lifecycle.yml

**沙箱生命周期管理 API**

定义了沙箱环境的创建、管理和销毁的完整生命周期接口，并可直接从容器镜像启动。

**核心功能：**
- **沙箱管理**：创建、列表、查询、删除沙箱实例，支持元数据过滤与分页
- **状态控制**：暂停 (Pause)、恢复 (Resume) 沙箱执行
- **生命周期**：支持 Pending → Running → Pausing → Paused → Stopping → Terminated，并包含错误态 `Failed`
- **资源与运行时配置**：指定 CPU/内存/GPU 资源限制、必填 `entrypoint`、环境变量，以及自定义 `extensions`
- **镜像支持**：从公共或私有镜像仓库创建沙箱，支持私有仓库认证
- **超时管理**：创建时必填 `timeout`，并可通过 API 续期
- **端点访问**：获取沙箱内服务的公共访问端点

**主要端点（基础路径 `/v1`）：**
- `POST /sandboxes` - 从镜像创建沙箱，设置超时与资源限制
- `GET /sandboxes` - 列出沙箱，支持状态/元数据过滤与分页
- `GET /sandboxes/{sandboxId}` - 获取完整沙箱详情（包含镜像与 entrypoint）
- `DELETE /sandboxes/{sandboxId}` - 删除沙箱
- `POST /sandboxes/{sandboxId}/pause` - 异步暂停沙箱
- `POST /sandboxes/{sandboxId}/resume` - 恢复已暂停的沙箱
- `POST /sandboxes/{sandboxId}/renew-expiration` - 续期沙箱 TTL
- `GET /sandboxes/{sandboxId}/endpoints/{port}` - 获取指定端口的访问端点

**认证方式：**
- HTTP Header: `OPEN-SANDBOX-API-KEY: your-api-key`
- 环境变量: `OPEN_SANDBOX_API_KEY`（SDK 客户端）

### 2. execd-api.yaml

**沙箱内代码执行 API**

定义了在沙箱环境内执行代码、命令和文件操作的接口，提供完整的代码解释器和文件系统管理能力。所有端点需要 `X-EXECD-ACCESS-TOKEN` 认证头。

**核心功能：**
- **代码执行**：支持 Python、JavaScript 等多语言的有状态代码执行，并提供上下文生命周期管理
- **命令执行**：Shell 命令执行，支持前台/后台模式，并可通过轮询端点查看状态和输出
- **文件操作**：完整的文件和目录 CRUD 操作（创建、读取、更新、删除）
- **实时流式输出**：基于 SSE (Server-Sent Events) 的实时输出流
- **系统监控**：CPU 和内存指标的实时监控
- **访问控制**：通过 `X-EXECD-ACCESS-TOKEN` 进行 Token 认证

**主要端点分类：**

**健康检查：**
- `GET /ping` - 服务健康检查

**代码解释器：**
- `GET /code/contexts` - 列出活跃的代码执行上下文（可按语言过滤）
- `DELETE /code/contexts` - 按语言批量删除上下文
- `DELETE /code/contexts/{context_id}` - 删除指定上下文
- `POST /code/context` - 创建代码执行上下文
- `POST /code` - 在上下文中执行代码（流式输出）
- `DELETE /code` - 中断代码执行

**命令执行：**
- `POST /command` - 执行 Shell 命令（流式输出）
- `DELETE /command` - 中断命令执行
- `GET /command/status/{session}` - 查询前台/后台命令状态
- `GET /command/output/{session}` - 获取命令的累积 stdout/stderr

**文件系统：**
- `GET /files/info` - 获取文件元数据
- `DELETE /files` - 删除文件（不包含目录）
- `POST /files/permissions` - 修改文件权限
- `POST /files/mv` - 移动/重命名文件
- `GET /files/search` - 搜索文件（支持 glob 模式）
- `POST /files/replace` - 批量替换文件内容
- `POST /files/upload` - 上传文件（multipart）
- `GET /files/download` - 下载文件（支持断点续传）

**目录操作：**
- `POST /directories` - 按权限配置创建目录（mkdir -p 语义）
- `DELETE /directories` - 递归删除目录

**系统指标：**
- `GET /metrics` - 获取系统资源指标
- `GET /metrics/watch` - 实时监控系统指标（SSE 流）

### 3. egress-api.yaml

**沙箱 Egress 运行时 API**

定义了由沙箱内 egress sidecar 直接暴露的运行时策略接口。与生命周期 API 不同，
该 API 需要先解析沙箱 egress 端口对应的 endpoint，再直接访问 sidecar。

**核心功能：**
- **策略查询**：获取当前生效的 egress 策略及其运行时模式
- **策略变更**：使用 sidecar 的 merge 语义在运行时 patch egress 规则
- **直连 Sidecar**：不再通过生命周期 API 做服务端转发
- **可选鉴权**：当 egress sidecar 需要鉴权时，支持携带 endpoint 返回的请求头

**主要端点：**
- `GET /policy` - 获取当前 egress 策略
- `PATCH /policy` - 将新的 egress 规则合并到当前策略

## 技术特性

### 流式输出 (Server-Sent Events)

代码执行和命令执行接口使用 SSE 提供实时流式输出，支持以下事件类型：
- `init` - 初始化事件
- `status` - 状态更新
- `stdout` / `stderr` - 标准输出/错误流
- `result` - 执行结果
- `execution_complete` - 执行完成
- `execution_count` - 执行计数
- `error` - 错误信息

### 资源限制

支持灵活的资源配置（类似 Kubernetes）：
```json
{
  "cpu": "500m",
  "memory": "512Mi",
  "gpu": "1"
}
```

### 文件权限

支持 Unix 风格的文件权限管理：
- 所有者 (owner)
- 用户组 (group)
- 权限模式 (mode) - 八进制格式，如 755


================================================
FILE: specs/egress-api.yaml
================================================
openapi: 3.1.0
info:
  title: OpenSandbox Egress API
  version: 0.1.0
  description: |
    The OpenSandbox Egress API exposes the runtime policy interface served by the
    egress sidecar inside a sandbox. Unlike the lifecycle API, these operations are
    performed by connecting to a sandbox endpoint for the egress port and calling
    the sidecar directly.

    This API is intended for runtime inspection and mutation of outbound network
    policy after sandbox creation. Initial egress policy configuration during sandbox
    provisioning remains part of the Sandbox Lifecycle API create request.

    ## Access Model

    Clients typically access this API in two steps:

    1. Use the Sandbox Lifecycle API to resolve the sandbox endpoint for the egress
       service port.
    2. Send requests directly to that endpoint's `/policy` route.

    ## Authentication

    The sidecar may optionally require the `OPENSANDBOX-EGRESS-AUTH` header. When
    the sandbox endpoint resolver returns required headers, clients must forward
    them on every egress API request.
servers:
  - url: http://localhost:18080
    description: Local egress sidecar
tags:
  - name: Policy
    description: Inspect and mutate sandbox egress policy at runtime
paths:
  /policy:
    get:
      tags: [Policy]
      summary: Get current egress policy
      description: |
        Returns the currently enforced egress policy and the sidecar's derived
        runtime mode metadata.
      responses:
        '200':
          description: Current policy returned successfully.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PolicyStatusResponse'
              examples:
                deny-with-allowlist:
                  summary: Current deny-by-default policy
                  value:
                    status: ok
                    mode: deny_all
                    enforcementMode: dns
                    policy:
                      defaultAction: deny
                      egress:
                        - action: allow
                          target: pypi.org
        '401':
          $ref: '#/components/responses/Unauthorized'
        '500':
          $ref: '#/components/responses/InternalServerError'
    patch:
      tags: [Policy]
      summary: Patch egress rules
      description: |
        Merge incoming egress rules with the currently enforced policy.

        This endpoint uses merge semantics:
        - Existing rules remain unless overridden by incoming rules.
        - Incoming rules are applied with higher priority than existing rules.
        - If multiple incoming rules refer to the same `target`, the first one wins.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: array
              minItems: 1
              items:
                $ref: '#/components/schemas/NetworkRule'
            examples:
              duplicate-target-first-wins:
                summary: First rule wins for duplicate target within the same patch payload
                value:
                  - action: allow
                    target: example.com
                  - action: deny
                    target: example.com
      responses:
        '200':
          description: Patch applied successfully.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PolicyStatusResponse'
              examples:
                patched:
                  summary: Patch applied
                  value:
                    status: ok
                    mode: deny_all
                    enforcementMode: dns
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '500':
          $ref: '#/components/responses/InternalServerError'
components:
  responses:
    BadRequest:
      description: The request was invalid or malformed.
      content:
        text/plain:
          schema:
            type: string
    Unauthorized:
      description: Authentication failed for the egress sidecar.
      content:
        text/plain:
          schema:
            type: string
    InternalServerError:
      description: The sidecar failed to apply or fetch policy state.
      content:
        text/plain:
          schema:
            type: string
  schemas:
    PolicyStatusResponse:
      type: object
      properties:
        status:
          type: string
          description: Operation status reported by the sidecar.
          example: ok
        mode:
          type: string
          description: Derived runtime mode for the current policy.
          example: deny_all
        enforcementMode:
          type: string
          description: Egress sidecar enforcement backend mode.
          example: dns
        reason:
          type: string
          description: Optional human-readable reason when the sidecar returns extra context.
        policy:
          $ref: '#/components/schemas/NetworkPolicy'
      additionalProperties: false
    NetworkPolicy:
      type: object
      description: |
        Egress network policy matching the sidecar `/policy` request body.
        If `defaultAction` is omitted, the sidecar defaults to "deny"; passing an empty
        object or null results in allow-all behavior at startup.
      properties:
        defaultAction:
          type: string
          enum: [allow, deny]
          description: Default action when no egress rule matches. Defaults to "deny".
        egress:
          type: array
          description: List of egress rules evaluated in order.
          items:
            $ref: '#/components/schemas/NetworkRule'
      additionalProperties: false
    NetworkRule:
      type: object
      properties:
        action:
          type: string
          enum: [allow, deny]
          description: Whether to allow or deny matching targets.
        target:
          type: string
          description: |
            FQDN or wildcard domain (e.g., "example.com", "*.example.com").
            IP/CIDR not yet supported in the egress MVP.
      required: [action, target]
      additionalProperties: false


================================================
FILE: specs/execd-api.yaml
================================================
openapi: 3.1.0
info:
  title: OpenSandbox Execd API
  version: 1.0.0
  description: |
    OpenSandbox Execd provides a comprehensive API for managing code execution, file operations,
    and system monitoring within a sandboxed environment. The API supports multiple programming
    languages, real-time streaming output via Server-Sent Events (SSE), and complete file system
    management capabilities.

    ## Key Features
    - **Code Execution**: Execute code in Python, JavaScript, and other languages with stateful contexts
    - **Command Execution**: Run shell commands with foreground/background modes
    - **File Operations**: Complete CRUD operations for files and directories
    - **Real-time Streaming**: SSE-based output streaming for code and command execution
    - **System Monitoring**: CPU and memory metrics with real-time watching
    - **Access Control**: Token-based authentication for all API endpoints

  contact:
    name: OpenSandbox Team
    url: https://github.com/alibaba/OpenSandbox
  license:
    name: Apache 2.0
    url: https://www.apache.org/licenses/LICENSE-2.0.html

servers:
  - url: http://localhost:44772
    description: Local development server
  - url: https://api.opensandbox.example.com
    description: Production server

security:
  - AccessToken: []

tags:
  - name: Health
    description: Server health check and status monitoring
  - name: CodeInterpreting
    description: Code execution and context management
  - name: Command
    description: Shell command execution and interruption
  - name: Filesystem
    description: File and directory operations
  - name: Metric
    description: System resource monitoring and metrics

paths:
  /ping:
    get:
      summary: Health check endpoint
      description: |
        Performs a simple health check to verify that the server is running and responsive.
        Returns HTTP 200 OK status if the server is healthy. This endpoint is typically used
        by load balancers, monitoring systems, and orchestration platforms (like Kubernetes)
        to check service availability.
      operationId: ping
      tags:
        - Health
      responses:
        "200":
          description: Server is alive and healthy

  /code/contexts:
    get:
      summary: List active code execution contexts
      description: |
        Lists all active/available code execution contexts.
        If `language` is provided, only contexts under that language/runtime are returned.
      operationId: listContexts
      tags:
        - CodeInterpreting
      parameters:
        - name: language
          in: query
          required: true
          description: Filter contexts by execution runtime (python, bash, java, etc.)
          schema:
            type: string
          example: python
      responses:
        "200":
          description: Array of active contexts
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: "#/components/schemas/CodeContext"
              examples:
                python_only:
                  summary: Context list filtered by language
                  value:
                    - id: session-abc123
                      language: python
                    - id: session-def456
                      language: python
        "400":
          $ref: "#/components/responses/BadRequest"
        "500":
          $ref: "#/components/responses/InternalServerError"

    delete:
      summary: Delete all contexts under a language
      description: |
        Deletes all existing code execution contexts under the specified `language`/runtime.
        This is a bulk operation intended for code-interpreter context cleanup.
      operationId: deleteContextsByLanguage
      tags:
        - CodeInterpreting
      parameters:
        - name: language
          in: query
          required: true
          description: Target execution runtime whose contexts should be deleted
          schema:
            type: string
          example: python
      responses:
        "200":
          description: Contexts deleted successfully
        "400":
          $ref: "#/components/responses/BadRequest"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /code/contexts/{context_id}:
    get:
      summary: Get a code execution context by id
      description: |
        Retrieves the details of an existing code execution context (session) by id.
        Returns the context ID, language, and any associated metadata.
      operationId: getContext
      tags:
        - CodeInterpreting
      parameters:
        - name: context_id
          in: path
          required: true
          description: Session/context id to get
          schema:
            type: string
          example: session-abc123
      responses:
        "200":
          description: Context details retrieved successfully
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CodeContext"
        "404":
          $ref: "#/components/responses/NotFound"
        "500":
          $ref: "#/components/responses/InternalServerError"
    delete:
      summary: Delete a code execution context by id
      description: |
        Deletes an existing code execution context (session) by id.
        This should terminate the underlying context thread/process and release resources.
      operationId: deleteContext
      tags:
        - CodeInterpreting
      parameters:
        - name: context_id
          in: path
          required: true
          description: Session/context id to delete
          schema:
            type: string
          example: session-abc123
      responses:
        "200":
          description: Context deleted successfully
        "400":
          $ref: "#/components/responses/BadRequest"
        "404":
          $ref: "#/components/responses/NotFound"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /code/context:
    post:
      summary: Create code execution context
      description: |
        Creates a new code execution environment and returns a session ID that can be used
        for subsequent code execution requests. The context maintains state across multiple
        code executions within the same session.
      operationId: createCodeContext
      tags:
        - CodeInterpreting
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/CodeContextRequest"
            examples:
              python:
                summary: Create Python context
                value:
                  language: python
              bash:
                summary: Create Bash context
                value:
                  language: bash
      responses:
        "200":
          description: Successfully created context with session ID
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CodeContext"
        "400":
          $ref: "#/components/responses/BadRequest"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /code:
    post:
      summary: Execute code in context
      description: |
        Executes code using Jupyter kernel in a specified execution context and streams
        the output in real-time using SSE (Server-Sent Events). Supports multiple programming
        languages (Python, JavaScript, etc.) and maintains execution state within the session.
        Returns execution results, output streams, execution count, and any errors.
      operationId: runCode
      tags:
        - CodeInterpreting
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/RunCodeRequest"
            examples:
              python:
                summary: Execute Python code
                value:
                  context:
                    id: session-123
                    language: python
                  code: |
                    print("Hello, World!")
                    result = 2 + 2
                    result
              stateless:
                summary: Stateless execution
                value:
                  code: echo "Hello from shell"
      responses:
        "200":
          description: Stream of code execution events
          content:
            text/event-stream:
              schema:
                $ref: "#/components/schemas/ServerStreamEvent"
        "400":
          $ref: "#/components/responses/BadRequest"
        "500":
          $ref: "#/components/responses/InternalServerError"

    delete:
      summary: Interrupt code execution
      description: |
        Interrupts the currently running code execution in the specified context.
        This sends a signal to terminate the execution process and releases associated resources.
      operationId: interruptCode
      tags:
        - CodeInterpreting
      parameters:
        - name: id
          in: query
          required: true
          description: Session ID of the execution context to interrupt
          schema:
            type: string
          example: session-123
      responses:
        "200":
          description: Code execution successfully interrupted
        "400":
          $ref: "#/components/responses/BadRequest"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /command:
    post:
      summary: Execute shell command
      description: |
        Executes a shell command and streams the output in real-time using SSE (Server-Sent Events).
        The command can run in foreground or background mode. The response includes stdout, stderr,
        execution status, and completion events.
        Optionally specify `timeout` (milliseconds) to enforce a maximum runtime; the server will
        terminate the process when the timeout is reached. You can also pass `uid`/`gid` to run
        with specific user/group IDs, and `envs` to inject environment variables.
      operationId: runCommand
      tags:
        - Command
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/RunCommandRequest"
            examples:
              foreground:
                summary: Foreground command
                value:
                  command: ls -la /workspace
                  cwd: /workspace
                  background: false
                  timeout: 30000
                  uid: 1000
                  gid: 1000
                  envs:
                    PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
                    PYTHONUNBUFFERED: "1"
              background:
                summary: Background command
                value:
                  command: python server.py
                  cwd: /app
                  background: true
                  timeout: 120000
                  uid: 1000
                  envs:
                    APP_ENV: production
                    LOG_LEVEL: info
      responses:
        "200":
          description: Stream of command execution events
          content:
            text/event-stream:
              schema:
                $ref: "#/components/schemas/ServerStreamEvent"
        "400":
          $ref: "#/components/responses/BadRequest"
        "500":
          $ref: "#/components/responses/InternalServerError"

    delete:
      summary: Interrupt command execution
      description: |
        Interrupts the currently running command execution in the specified context.
        This sends a signal to terminate the execution process and releases associated resources.
      operationId: interruptCommand
      tags:
        - Command
      parameters:
        - name: id
          in: query
          required: true
          description: Session ID of the execution context to interrupt
          schema:
            type: string
          example: session-456
      responses:
        "200":
          description: Command execution successfully interrupted
        "400":
          $ref: "#/components/responses/BadRequest"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /command/status/{id}:
    get:
      summary: Get command running status
      description: |
        Returns the current status of a command (foreground or background) by command ID.
        Includes running flag, exit code, error (if any), and start/finish timestamps.
      operationId: getCommandStatus
      tags:
        - Command
      parameters:
        - name: id
          in: path
          required: true
          description: Command ID returned by RunCommand
          schema:
            type: string
          example: cmd-abc123
      responses:
        "200":
          description: Command status
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CommandStatusResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "404":
          $ref: "#/components/responses/NotFound"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /command/{id}/logs:
    get:
      summary: Get background command stdout/stderr (non-streamed)
      description: |
        Returns stdout and stderr for a background (detached) command by command ID.
        Foreground commands should be consumed via SSE; this endpoint is intended for
        polling logs of background commands. Supports incremental reads similar to a file seek:
        pass a starting line via query to fetch output after that line and receive the latest
        tail cursor for the next poll. When no starting line is provided, the full logs are returned.
        Response body is plain text so it can be rendered directly in browsers; the latest line index
        is provided via response header `EXECD-COMMANDS-TAIL-CURSOR` for subsequent incremental requests.
      operationId: getBackgroundCommandLogs
      tags:
        - Command
      parameters:
        - name: id
          in: path
          required: true
          description: Command ID returned by RunCommand
          schema:
            type: string
          example: cmd-abc123
        - name: cursor
          in: query
          required: false
          description: |
            Optional 0-based line cursor (behaves like a file seek). When provided, only
            stdout/stderr lines after this line are returned. The response includes the
            latest line index (`cursor`) so the client can request incremental output
            on subsequent calls. If omitted, the full log is returned.
          schema:
            type: integer
            format: int64
            minimum: 0
          example: 120
      responses:
        "200":
          description: Command output (plain text) and status metadata via headers
          content:
            text/plain:
              schema:
                type: string
              example: |
                line1
                line2
                warn: something on stderr
          headers:
            EXECD-COMMANDS-TAIL-CURSOR:
              description: Highest available 0-based line index after applying the request cursor (use as the next cursor for incremental reads)
              schema:
                type: integer
                format: int64
        "400":
          $ref: "#/components/responses/BadRequest"
        "404":
          $ref: "#/components/responses/NotFound"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /files/info:
    get:
      summary: Get file metadata
      description: |
        Retrieves detailed metadata for one or multiple files including permissions, owner,
        group, size, and modification time. Returns a map of file paths to their corresponding
        FileInfo objects.
      operationId: getFilesInfo
      tags:
        - Filesystem
      parameters:
        - name: path
          in: query
          required: true
          description: File path(s) to get info for (can be specified multiple times)
          schema:
            type: array
            items:
              type: string
          style: form
          explode: true
          examples:
            single:
              summary: Single file
              value: ["/workspace/file.txt"]
            multiple:
              summary: Multiple files
              value: ["/workspace/file1.txt", "/workspace/file2.py"]
      responses:
        "200":
          description: Map of file paths to FileInfo objects
          content:
            application/json:
              schema:
                type: object
                additionalProperties:
                  $ref: "#/components/schemas/FileInfo"
        "404":
          $ref: "#/components/responses/NotFound"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /files:
    delete:
      summary: Delete files
      description: |
        Deletes one or multiple files from the sandbox. Only removes files, not directories.
        Use RemoveDirs for directory removal.
      operationId: removeFiles
      tags:
        - Filesystem
      parameters:
        - name: path
          in: query
          required: true
          description: File path(s) to delete (can be specified multiple times)
          schema:
            type: array
            items:
              type: string
          style: form
          explode: true
          example: ["/workspace/temp.txt"]
      responses:
        "200":
          description: Files deleted successfully
        "500":
          $ref: "#/components/responses/InternalServerError"

  /files/permissions:
    post:
      summary: Change file permissions
      description: |
        Changes permissions (mode), owner, and group for one or multiple files.
        Accepts a map of file paths to permission settings including octal mode,
        owner username, and group name.
      operationId: chmodFiles
      tags:
        - Filesystem
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              additionalProperties:
                $ref: "#/components/schemas/Permission"
            example:
              "/workspace/script.sh":
                owner: admin
                group: admin
                mode: 755
              "/workspace/config.json":
                owner: admin
                group: admin
                mode: 755
      responses:
        "200":
          description: Permissions changed successfully
        "400":
          $ref: "#/components/responses/BadRequest"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /files/mv:
    post:
      summary: Rename or move files
      description: |
        Renames or moves one or multiple files to new paths. Can be used for both
        renaming within the same directory and moving to different directories.
        Target directory must exist.
      operationId: renameFiles
      tags:
        - Filesystem
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: array
              items:
                $ref: "#/components/schemas/RenameFileItem"
            example:
              - src: /workspace/old_name.txt
                dest: /workspace/new_name.txt
              - src: /workspace/file.py
                dest: /archive/file.py
      responses:
        "200":
          description: Files renamed/moved successfully
        "400":
          $ref: "#/components/responses/BadRequest"
        "404":
          $ref: "#/components/responses/NotFound"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /files/search:
    get:
      summary: Search for files
      description: |
        Searches for files matching a glob pattern within a specified directory and
        its subdirectories. Returns file metadata including path, permissions, owner,
        and group. Supports glob patterns like **, *.txt, etc. Default pattern is ** (all files).
      operationId: searchFiles
      tags:
        - Filesystem
      parameters:
        - name: path
          in: query
          required: true
          description: Root directory path to search in
          schema:
            type: string
        - name: pattern
          in: query
          required: false
          description: Glob pattern to match files (default is **)
          schema:
            type: string
            default: "**"
      responses:
        "200":
          description: Array of matching files with metadata
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: "#/components/schemas/FileInfo"
        "400":
          $ref: "#/components/responses/BadRequest"
        "404":
          $ref: "#/components/responses/NotFound"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /files/replace:
    post:
      summary: Replace file content
      description: |
        Performs text replacement in one or multiple files. Replaces all occurrences
        of the old string with the new string (similar to strings.ReplaceAll).
        Preserves file permissions. Useful for batch text substitution across files.
      operationId: replaceContent
      tags:
        - Filesystem
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              additionalProperties:
                $ref: "#/components/schemas/ReplaceFileContentItem"
            example:
              "/workspace/config.yaml":
                old: "localhost:8080"
                new: "0.0.0.0:9090"
              "/workspace/app.py":
                old: "DEBUG = True"
                new: "DEBUG = False"
      responses:
        "200":
          description: Content replaced successfully
        "400":
          $ref: "#/components/responses/BadRequest"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /files/upload:
    post:
      summary: Upload files to sandbox
      description: |
        Uploads one or multiple files to specified paths within the sandbox.
        Reads metadata and file content from multipart form parts in sequence.
        Each file upload consists of two parts: a metadata part (JSON) followed
        by the actual file part.
      operationId: uploadFile
      tags:
        - Filesystem
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                metadata:
                  type: string
                  description: JSON-encoded file metadata (FileMetadata object)
                  example: '{"path":"/workspace/file.txt","owner":"admin","group":"admin","mode":755}'
                file:
                  type: string
                  format: binary
                  description: File to upload
            encoding:
              metadata:
                contentType: application/json
              file:
                contentType: application/octet-stream
      responses:
        "200":
          description: Files uploaded successfully
        "400":
          $ref: "#/components/responses/BadRequest"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /files/download:
    get:
      summary: Download file from sandbox
      description: |
        Downloads a file from the specified path within the sandbox. Supports HTTP
        range requests for resumable downloads and partial content retrieval.
        Returns file as octet-stream with appropriate headers.
      operationId: downloadFile
      tags:
        - Filesystem
      parameters:
        - name: path
          in: query
          required: true
          description: Absolute or relative path of the file to download
          schema:
            type: string
          example: /workspace/data.csv
        - name: Range
          in: header
          required: false
          description: HTTP Range header for partial content requests
          schema:
            type: string
          example: "bytes=0-1023"
      responses:
        "200":
          description: File content
          content:
            application/octet-stream:
              schema:
                type: string
                format: binary
          headers:
            Content-Disposition:
              schema:
                type: string
              description: Attachment header with filename
            Content-Length:
              schema:
                type: integer
              description: File size in bytes
        "206":
          description: Partial file content (when Range header is provided)
          content:
            application/octet-stream:
              schema:
                type: string
                format: binary
          headers:
            Content-Range:
              schema:
                type: string
              description: Range of bytes being returned
            Content-Length:
              schema:
                type: integer
              description: Length of the returned range
        "400":
          $ref: "#/components/responses/BadRequest"
        "404":
          $ref: "#/components/responses/NotFound"
        "416":
          description: Requested range not satisfiable
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /directories:
    post:
      summary: Create directories
      description: |
        Creates one or multiple directories with specified permissions. Creates parent
        directories as needed (similar to mkdir -p). Accepts a map of directory paths
        to permission objects.
      operationId: makeDirs
      tags:
        - Filesystem
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              additionalProperties:
                $ref: "#/components/schemas/Permission"
            example:
              "/workspace/project":
                owner: admin
                group: admin
                mode: 755
              "/workspace/logs":
                owner: admin
                group: admin
                mode: 755
      responses:
        "200":
          description: Directories created successfully
        "400":
          $ref: "#/components/responses/BadRequest"
        "500":
          $ref: "#/components/responses/InternalServerError"

    delete:
      summary: Delete directories
      description: |
        Recursively deletes one or multiple directories and all their contents.
        Similar to rm -rf. Use with caution as this operation cannot be undone.
      operationId: removeDirs
      tags:
        - Filesystem
      parameters:
        - name: path
          in: query
          required: true
          description: Directory path(s) to delete (can be specified multiple times)
          schema:
            type: array
            items:
              type: string
          style: form
          explode: true
          example: ["/workspace/temp"]
      responses:
        "200":
          description: Directories deleted successfully
        "500":
          $ref: "#/components/responses/InternalServerError"

  /metrics:
    get:
      summary: Get system metrics
      description: |
        Retrieves current system resource metrics including CPU usage percentage,
        CPU core count, total memory, used memory, and timestamp. Provides a snapshot
        of system resource utilization at the time of request.
      operationId: getMetrics
      tags:
        - Metric
      responses:
        "200":
          description: Current system metrics including CPU and memory usage
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/Metrics"
        "500":
          $ref: "#/components/responses/InternalServerError"

  /metrics/watch:
    get:
      summary: Watch system metrics in real-time
      description: |
        Streams system resource metrics in real-time using Server-Sent Events (SSE).
        Updates are sent every second, providing continuous monitoring of CPU usage,
        memory usage, and other system metrics. The connection remains open until
        the client disconnects.
      operationId: watchMetrics
      tags:
        - Metric
      responses:
        "200":
          description: Stream of system metrics updated every second
          content:
            text/event-stream:
              schema:
                $ref: "#/components/schemas/Metrics"
        "500":
          $ref: "#/components/responses/InternalServerError"

components:
  securitySchemes:
    AccessToken:
      type: apiKey
      in: header
      name: X-EXECD-ACCESS-TOKEN
      description: |
        Access token for API authentication. All requests must include this header
        with a valid token. The token is configured during server initialization.

  schemas:
    CodeContextRequest:
      type: object
      description: Request to create a code execution context
      properties:
        language:
          type: string
          description: Execution runtime (python, bash, java, etc.)
          example: python

    CodeContext:
      type: object
      description: Code execution context with session identifier
      properties:
        id:
          type: string
          description: Unique session identifier returned by CreateContext
          example: session-abc123
        language:
          type: string
          description: Execution runtime
          example: python
      required:
        - language

    RunCodeRequest:
      type: object
      required:
        - code
      description: Request to execute code in a context
      properties:
        context:
          $ref: "#/components/schemas/CodeContext"
        code:
          type: string
          description: Source code to execute
          example: |
            import numpy as np
            result = np.array([1, 2, 3])
            print(result)

    RunCommandRequest:
      type: object
      required:
        - command
      description: Request to execute a shell command
      properties:
        command:
          type: string
          description: Shell command to execute
          example: ls -la /workspace
        cwd:
          type: string
          description: Working directory for command execution
          example: /workspace
        background:
          type: boolean
          description: Whether to run command in detached mode
          default: false
          example: false
        timeout:
          type: integer
          format: int64
          description: Maximum allowed execution time in milliseconds before the command is forcefully terminated by the server. If omitted, the server will not enforce any timeout.
          example: 60000
        uid:
          type: integer
          format: int32
          minimum: 0
          description: |
            Unix user ID used to run the command. If `gid` is provided, `uid` is required.
          example: 1000
        gid:
          type: integer
          format: int32
          minimum: 0
          description: |
            Unix group ID used to run the command. Requires `uid` to be provided.
          example: 1000
        envs:
          type: object
          description: Environment variables injected into the command process.
          additionalProperties:
            type: string
          example:
            PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
            PYTHONUNBUFFERED: "1"

    CommandStatusResponse:
      type: object
      description: Command execution status (foreground or background)
      properties:
        id:
          type: string
          description: Command ID returned by RunCommand
          example: cmd-abc123
        content:
          type: string
          description: Original command content
          example: ls -la
        running:
          type: boolean
          description: Whether the command is still running
          example: false
        exit_code:
          type: integer
          format: int32
          nullable: true
          description: Exit code if the command has finished
          example: 0
        error:
          type: string
          description: Error message if the command failed
          example: permission denied
        started_at:
          type: string
          format: date-time
          description: Start time in RFC3339 format
          example: "2025-12-22T09:08:05Z"
        finished_at:
          type: string
          format: date-time
          nullable: true
          description: Finish time in RFC3339 format (null if still running)
          example: "2025-12-22T09:08:09Z"

    ServerStreamEvent:
      type: object
      description: Server-sent event for streaming execution output
      properties:
        type:
          type: string
          enum:
            - init
            - status
            - error
            - stdout
            - stderr
            - result
            - execution_complete
            - execution_count
            - ping
          description: Event type for client-side handling
          example: stdout
        text:
          type: string
          description: Textual data for status, init, and stream events
          example: "Hello, World!\n"
        execution_count:
          type: integer
          description: Cell execution number in the session
          example: 1
        execution_time:
          type: integer
          format: int64
          description: Execution duration in milliseconds
          example: 150
        timestamp:
          type: integer
          format: int64
          description: When the event was generated (Unix milliseconds)
          example: 1700000000000
        results:
          type: object
          additionalProperties: true
          description: Execution output in various MIME types (e.g., "text/plain", "text/html")
          example:
            text/plain: "4"
        error:
          type: object
          description: Execution error details if an error occurred
          properties:
            ename:
              type: string
              description: Error name/type
              example: "NameError"
            evalue:
              type: string
              description: Error value/message
              example: "name 'undefined_var' is not defined"
            traceback:
              type: array
              items:
                type: string
              description: Stack trace lines
              example:
                - "Traceback (most recent call last):"
                - '  File "<stdin>", line 1, in <module>'
                - "NameError: name 'undefined_var' is not defined"

    FileInfo:
      type: object
      description: File metadata including path and permissions
      properties:
        path:
          type: string
          description: Absolute file path
          example: /workspace/file.txt
        size:
          type: integer
          format: int64
          description: File size in bytes
          example: 2048
        modified_at:
          type: string
          format: date-time
          description: Last modification time
          example: 2025-11-16T14:30:45Z
        created_at:
          type: string
          format: date-time
          description: File creation time
          example: 2025-11-16T14:30:45Z
        owner:
          type: string
          description: File owner username
          example: admin
        group:
          type: string
          description: File group name
          example: admin
        mode:
          type: integer
          description: File permissions in octal format
          example: 755
      required: [path, size, modified_at, created_at, owner, group, mode]

    Permission:
      type: object
      description: File ownership and mode settings
      properties:
        owner:
          type: string
          description: Owner username
          example: root
        group:
          type: string
          description: Group name
          example: root
        mode:
          type: integer
          description: Permission mode in octal format (e.g., 644, 755)
          default: 755
          example: 755
      required: [mode]

    FileMetadata:
      type: object
      description: File metadata for upload operations
      properties:
        path:
          type: string
          description: Target file path
          example: /workspace/upload.txt
        owner:
          type: string
          description: File owner
          example: admin
        group:
          type: string
          description: File group
          example: admin
        mode:
          type: integer
          description: File permissions in octal
          example: 755

    RenameFileItem:
      type: object
      description: File rename/move operation
      properties:
        src:
          type: string
          description: Source file path
          example: /workspace/old.txt
        dest:
          type: string
          description: Destination file path
          example: /workspace/new.txt
      required: [src, dest]

    ReplaceFileContentItem:
      type: object
      description: Content replacement operation
      properties:
        old:
          type: string
          description: String to be replaced
          example: "localhost"
        new:
          type: string
          description: Replacement string
          example: "0.0.0.0"
      required: [old, new]

    Metrics:
      type: object
      description: System resource usage metrics
      properties:
        cpu_count:
          type: number
          format: float
          description: Number of CPU cores
          example: 4.0
        cpu_used_pct:
          type: number
          format: float
          description: CPU usage percentage
          example: 45.5
        mem_total_mib:
          type: number
          format: float
          description: Total memory in MiB
          example: 8192.0
        mem_used_mib:
          type: number
          format: float
          description: Used memory in MiB
          example: 4096.0
        timestamp:
          type: integer
          format: int64
          description: Timestamp when metrics were collected (Unix milliseconds)
          example: 1700000000000
      required:
        [cpu_count, cpu_used_pct, mem_total_mib, mem_used_mib, timestamp]

    ErrorResponse:
      type: object
      description: Standard error response format
      properties:
        code:
          type: string
          description: Error code for programmatic handling
          example: INVALID_REQUEST_BODY
        message:
          type: string
          description: Human-readable error message
          example: "error parsing request, MAYBE invalid body format"
      required: [code, message]

  responses:
    BadRequest:
      description: Invalid request body format or missing required fields
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
          example:
            code: INVALID_REQUEST_BODY
            message: "error parsing request, MAYBE invalid body format"

    NotFound:
      description: File or resource not found
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
          example:
            code: FILE_NOT_FOUND
            message: "file not found"

    InternalServerError:
      description: Runtime server error during operation
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
          example:
            code: RUNTIME_ERROR
            message: "error running code execution"


================================================
FILE: specs/sandbox-lifecycle.yml
================================================
openapi: 3.1.0
info:
  title: OpenSandbox Lifecycle API
  version: 0.1.0
  description: |
    The Sandbox Lifecycle API coordinates how untrusted workloads are created,
    executed, paused, resumed, and finally disposed. This specification focuses on
    the primary lifecycle flows for the `sandbox` domain concept. Sandboxes are
    provisioned directly from container images without requiring pre-created templates.

    ## Sandbox Lifecycle

    A sandbox follows this lifecycle:

    1. **Creation** → Sandbox enters `Pending` state (auto-starts)
    2. **Execution** → Transitions to `Running` state
    3. **Pause** (optional) → `Pausing` → `Paused` (asynchronous process)
    4. **Resume** (optional) → Returns to `Running` from `Paused`
    5. **Termination** → `Stopping` → `Terminated` (can be triggered by kill action, TTL expiry, or error)
    6. **Error** → Any state can transition to `Failed` on critical errors

    The `status` field provides fine-grained details through `state`, `reason`, and `message`.

    ## Authentication

    API Key authentication is required for all operations:

    1. **HTTP Header**
       ```
       OPEN-SANDBOX-API-KEY: your-api-key
       ```

    2. **Environment Variable** (for SDK clients)
       ```
       OPEN_SANDBOX_API_KEY=your-api-key
       ```

       SDK clients will automatically pick up this environment variable.
servers:
  - url: http://localhost:8080/v1
    description: Local development
security:
  - apiKeyAuth: []
tags:
  - name: Sandboxes
    description: Provision and transition sandboxes through their lifecycle
paths:
  /sandboxes:
    get:
      tags: [ Sandboxes ]
      summary: List sandboxes
      description: |
        List all sandboxes with optional filtering and pagination using query parameters.
        All filter conditions use AND logic. Multiple `state` parameters use OR logic within states.
      parameters:
        - name: state
          in: query
          description: |
            Filter by lifecycle state. Pass multiple times for OR logic.
            Example: `?state=Running&state=Paused`
          schema:
            type: array
            items:
              type: string
          style: form
          explode: true
        - name: metadata
          in: query
          description: |
            Arbitrary metadata key-value pairs for filtering，keys and values must be url encoded
            Example: To filter by `project=Apollo` and `note=Demo Test`: `?metadata=project%3DApollo%26note%3DDemo%252520Test`
          schema:
            type: string
          style: form
        - name: page
          in: query
          description: Page number for pagination
          schema:
            type: integer
            minimum: 1
            default: 1
        - name: pageSize
          in: query
          description: Number of items per page
          schema:
            type: integer
            minimum: 1
            default: 20
      responses:
        '200':
          description: Paginated collection of sandboxes
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListSandboxesResponse'
          headers:
            X-Request-ID:
              $ref: '#/components/headers/XRequestId'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '500':
          $ref: '#/components/responses/InternalServerError'
    post:
      tags: [Sandboxes]
      summary: Create a sandbox from a container image
      description: |
        Creates a new sandbox from a container image with optional resource limits,
        environment variables, and metadata. Sandboxes are provisioned directly from
        the specified image without requiring a pre-created template.

        ## Authentication

        API Key authentication is required via:
        - `OPEN-SANDBOX-API-KEY: <api-key>` header
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateSandboxRequest'
            examples:
              deny-with-allowlist:
                summary: Deny by default with allowed domains
                value:
                  image:
                    uri: python:3.11
                  timeout: 3600
                  resourceLimits:
                    cpu: "500m"
                    memory: "512Mi"
                  entrypoint: ["python", "/app/main.py"]
                  networkPolicy:
                    defaultAction: deny
                    egress:
                      - action: allow
                        target: "pypi.org"
              allow-with-denylist:
                summary: Allow by default with a deny rule
                value:
                  image:
                    uri: python:3.11
                  timeout: 3600
                  resourceLimits:
                    cpu: "500m"
                    memory: "512Mi"
                  entrypoint: ["python", "/app/main.py"]
                  networkPolicy:
                    defaultAction: allow
                    egress:
                      - action: deny
                        target: "bad.example.com"
              manual-cleanup:
                summary: Manual cleanup without automatic expiration
                value:
                  image:
                    uri: python:3.11
                  resourceLimits:
                    cpu: "500m"
                    memory: "512Mi"
                  entrypoint: ["python", "/app/main.py"]
      responses:
        '202':
          description: |
            Sandbox created and accepted for provisioning.

            The returned sandbox includes:
            - `id`: Unique sandbox identifier
            - `status.state: "Pending"` (auto-starting provisioning)
            - `status.reason` and `status.message` indicating initialization stage
            - `metadata`, `expiresAt`, `createdAt`: Core sandbox information

            Note: `image` and `updatedAt` are not included in the create response.
            Use GET /sandboxes/{sandboxId} to retrieve the complete sandbox information including image spec.

            To track provisioning progress, poll GET /sandboxes/{sandboxId}.
            The sandbox will automatically transition to `Running` state once provisioning completes.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CreateSandboxResponse'
          headers:
            X-Request-ID:
              $ref: '#/components/headers/XRequestId'
            Location:
              $ref: '#/components/headers/Location'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '409':
          $ref: '#/components/responses/Conflict'
        '500':
          $ref: '#/components/responses/InternalServerError'
  /sandboxes/{sandboxId}:
    parameters:
      - $ref: '#/components/parameters/SandboxId'
    get:
      tags: [Sandboxes]
      summary: Fetch a sandbox by id
      description: |
        Returns the complete sandbox information including:
        - `id`, `status`, `metadata`, `expiresAt`, `createdAt`: Core information
        - `image`: Container image specification (not included in create response)
        - `entrypoint`: Entry process specification

        This is the complete representation of the sandbox resource.
      responses:
        '200':
          description: Sandbox current state and metadata
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Sandbox'
          headers:
            X-Request-ID:
              $ref: '#/components/headers/XRequestId'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '403':
          $ref: '#/components/responses/Forbidden'
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'
    delete:
      tags: [Sandboxes]
      summary: Delete a sandbox
      description: Delete a sandbox, terminating its execution. The sandbox will transition through Stopping state to Terminated.
      responses:
        '204':
          description: |
            Sandbox successfully deleted.

            Sandbox has been scheduled for termination and will transition to Stopping state, then Terminated.
          headers:
            X-Request-ID:
              $ref: '#/components/headers/XRequestId'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '403':
          $ref: '#/components/responses/Forbidden'
        '404':
          $ref: '#/components/responses/NotFound'
        '409':
          $ref: '#/components/responses/Conflict'
        '500':
          $ref: '#/components/responses/InternalServerError'
  /sandboxes/{sandboxId}/pause:
    post:
      tags: [Sandboxes]
      summary: Pause execution while retaining state
      description: Pause a running sandbox while preserving its state. Poll GET /sandboxes/{sandboxId} to track state transition to Paused.
      parameters:
        - $ref: '#/components/parameters/SandboxId'
      responses:
        '202':
          description: |
            Pause operation accepted.

            Sandbox will transition to Pausing state.
            Poll GET /sandboxes/{sandboxId} to track progress.
          headers:
            X-Request-ID:
              $ref: '#/components/headers/XRequestId'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '403':
          $ref: '#/components/responses/Forbidden'
        '404':
          $ref: '#/components/responses/NotFound'
        '409':
          $ref: '#/components/responses/Conflict'
        '500':
          $ref: '#/components/responses/InternalServerError'
  /sandboxes/{sandboxId}/resume:
    post:
      tags: [Sandboxes]
      summary: Resume a paused sandbox
      description: Resume execution of a paused sandbox. Poll GET /sandboxes/{sandboxId} to track state transition to Running.
      parameters:
        - $ref: '#/components/parameters/SandboxId'
      responses:
        '202':
          description: |
            Resume operation accepted.

            Sandbox will transition from Paused → Running.
            Poll GET /sandboxes/{sandboxId} to track progress.
          headers:
            X-Request-ID:
              $ref: '#/components/headers/XRequestId'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '403':
          $ref: '#/components/responses/Forbidden'
        '404':
          $ref: '#/components/responses/NotFound'
        '409':
          $ref: '#/components/responses/Conflict'
        '500':
          $ref: '#/components/responses/InternalServerError'
  /sandboxes/{sandboxId}/renew-expiration:
    post:
      tags: [Sandboxes]
      summary: Renew sandbox expiration
      description: Renew the absolute expiration time of a sandbox.
      parameters:
        - $ref: '#/components/parameters/SandboxId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RenewSandboxExpirationRequest'
      responses:
        '200':
          description: |
            Sandbox expiration updated successfully.

            Returns only the updated expiresAt field.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RenewSandboxExpirationResponse'
          headers:
            X-Request-ID:
              $ref: '#/components/headers/XRequestId'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '403':
          $ref: '#/components/responses/Forbidden'
        '404':
          $ref: '#/components/responses/NotFound'
        '409':
          $ref: '#/components/responses/Conflict'
        '500':
          $ref: '#/components/responses/InternalServerError'
  /sandboxes/{sandboxId}/endpoints/{port}:
    get:
      tags: [Sandboxes]
      summary: Get sandbox access endpoint
      description: |
        Get the public access endpoint URL for accessing a service running on a specific port
        within the sandbox. The service must be listening on the specified port inside
        the sandbox for the endpoint to be available.
      parameters:
        - $ref: '#/components/parameters/SandboxId'
        - name: port
          in: path
          required: true
          description: Port number where the service is listening inside the sandbox
          schema:
            type: integer
            minimum: 1
            maximum: 65535
        - name: use_server_proxy
          in: query
          description: Whether to return a server-proxied URL
          schema:
            type: boolean
            default: false
      responses:
        '200':
          description: |
            Endpoint retrieved successfully.

            Returns the public URL for accessing the service on the specified port.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Endpoint'
          headers:
            X-Request-ID:
              $ref: '#/components/headers/XRequestId'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '403':
          $ref: '#/components/responses/Forbidden'
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'
components:
  securitySchemes:
    apiKeyAuth:
      type: apiKey
      in: header
      name: OPEN-SANDBOX-API-KEY
      description: |
        API Key for authentication. Can be provided via:
        1. HTTP Header: OPEN-SANDBOX-API-KEY: your-api-key
        2. Environment variable: OPEN_SANDBOX_API_KEY (for SDK clients)
  parameters:
    SandboxId:
      name: sandboxId
      in: path
      required: true
      description: Unique sandbox identifier
      schema:
        type: string
  headers:
    XRequestId:
      description: Unique request identifier for tracing
      schema:
        type: string
        format: uuid
    Location:
      description: URI of the newly created or related resource
      schema:
        type: string
        format: uri
    RetryAfter:
      description: Suggested delay in seconds before retrying
      schema:
        type: integer
        minimum: 1
  responses:
    Error:
      description: Error response envelope
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
    BadRequest:
      description: The request was invalid or malformed
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
      headers:
        X-Request-ID:
          $ref: '#/components/headers/XRequestId'
    Unauthorized:
      description: Authentication credentials are missing or invalid
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
      headers:
        X-Request-ID:
          $ref: '#/components/headers/XRequestId'
    Forbidden:
      description: The authenticated user lacks permission for this operation
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
      headers:
        X-Request-ID:
          $ref: '#/components/headers/XRequestId'
    NotFound:
      description: The requested resource does not exist
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
      headers:
        X-Request-ID:
          $ref: '#/components/headers/XRequestId'
    Conflict:
      description: The operation conflicts with the current state
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
      headers:
        X-Request-ID:
          $ref: '#/components/headers/XRequestId'
    InternalServerError:
      description: An unexpected server error occurred
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
      headers:
        X-Request-ID:
          $ref: '#/components/headers/XRequestId'
  schemas:
    ListSandboxesResponse:
      type: object
      properties:
        items:
          type: array
          items:
            $ref: '#/components/schemas/Sandbox'
        pagination:
          $ref: '#/components/schemas/PaginationInfo'
      required: [items, pagination]
    PaginationInfo:
      type: object
      description: Pagination metadata for list responses
      properties:
        page:
          type: integer
          minimum: 1
          description: Current page number
        pageSize:
          type: integer
          minimum: 1
          description: Number of items per page
        totalItems:
          type: integer
          minimum: 0
          description: Total number of items matching the filter
        totalPages:
          type: integer
          minimum: 0
          description: Total number of pages
        hasNextPage:
          type: boolean
          description: Whether there are more pages after the current one
      required: [page, pageSize, totalItems, totalPages, hasNextPage]
    CreateSandboxResponse:
      type: object
      description: Response from creating a new sandbox. Contains essential information without image and updatedAt.
      properties:
        id:
          type: string
          description: Unique sandbox identifier

        status:
          $ref: '#/components/schemas/SandboxStatus'
          description: Current lifecycle status and detailed state information

        metadata:
          type: object
          additionalProperties:
            type: string
          description: Custom metadata from creation request

        expiresAt:
          oneOf:
            - type: string
              format: date-time
            - type: 'null'
          description: Timestamp when sandbox will auto-terminate. Null when manual cleanup is enabled.

        createdAt:
          type: string
          format: date-time
          description: Sandbox creation timestamp

        entrypoint:
          type: array
          items:
            type: string
          description: Entry process specification from creation request

      required:
        - id
        - status
        - createdAt
        - entrypoint

    Sandbox:
      type: object
      description: Runtime execution environment provisioned from a container image
      properties:
        id:
          type: string
          description: Unique sandbox identifier

        image:
          $ref: '#/components/schemas/ImageSpec'
          description: |
            Container image specification used to provision this sandbox.
            Only present in responses for GET/LIST operations. Not returned in createSandbox response.

        status:
          $ref: '#/components/schemas/SandboxStatus'
          description: Current lifecycle status and detailed state information

        metadata:
          type: object
          additionalProperties:
            type: string
          description: Custom metadata from creation request

        entrypoint:
          type: array
          items:
            type: string
          description: |
            The command to execute as the sandbox's entry process.
            Always present in responses since entrypoint is required in creation requests.

        expiresAt:
          oneOf:
            - type: string
              format: date-time
            - type: 'null'
          description: Timestamp when sandbox will auto-terminate. Null when manual cleanup is enabled.

        createdAt:
          type: string
          format: date-time
          description: Sandbox creation timestamp

      required:
        - id
        - status
        - createdAt
        - entrypoint
        - image
    SandboxState:
      type: string
      description: |
        High-level lifecycle state of the sandbox.

        Common state values:
        - Pending: Sandbox is being provisioned
        - Running: Sandbox is running and ready to accept requests
        - Pausing: Sandbox is in the process of pausing
        - Paused: Sandbox has been paused while retaining its state
        - Stopping: Sandbox is being terminated
        - Terminated: Sandbox has been successfully terminated
        - Failed: Sandbox encountered a critical error

        State transitions:
        - Pending → Running (after creation completes)
        - Running → Pausing (when pause is requested)
        - Pausing → Paused (pause operation completes)
        - Paused → Running (when resume is requested)
        - Running/Paused → Stopping (when kill is requested or TTL expires)
        - Stopping → Terminated (kill/timeout operation completes)
        - Pending/Running/Paused → Failed (on error)

        Note: New state values may be added in future versions.
        Clients should handle unknown state values gracefully.
    SandboxStatus:
      type: object
      description: Detailed status information with lifecycle state and transition details
      properties:
        state:
          $ref: '#/components/schemas/SandboxState'
          description: Current lifecycle state of the sandbox

        reason:
          type: string
          description: |
            Short machine-readable reason code for the current state.
            Examples: "user_delete", "ttl_expiry", "provision_timeout", "runtime_error"

        message:
          type: string
          description: Human-readable message describing the current state or reason for state transition

        lastTransitionAt:
          type: string
          format: date-time
          description: Timestamp of the last state transition

      required: [state]
    ImageSpec:
      type: object
      required: [uri]
      description: |
        Container image specification for sandbox provisioning.

        Supports public registry images and private registry images with authentication.
      properties:
        uri:
          type: string
          description: |
            Container image URI in standard format.

            Examples:
              - "python:3.11" (Docker Hub)
              - "ubuntu:22.04"
              - "gcr.io/my-project/model-server:v1.0"
              - "private-registry.company.com:5000/app:latest"

        auth:
          type: object
          description: Registry authentication credentials (required for private registries)
          properties:
            username:
              type: string
              description: Registry username or service account
            password:
              type: string
              description: Registry password or authentication token
          additionalProperties: false

      additionalProperties: false
    CreateSandboxRequest:
      type: object
      required: [image, resourceLimits, entrypoint]
      description: |
        Request to create a new sandbox from a container image.

        **Note**: API Key authentication is required via the `OPEN-SANDBOX-API-KEY` header.
      properties:
        image:
          $ref: '#/components/schemas/ImageSpec'
          description: Container image specification for the sandbox

        timeout:
          oneOf:
            - type: integer
              minimum: 60
            - type: 'null'
          description: |
            Sandbox timeout in seconds. The sandbox will automatically terminate after this duration.
            The maximum is controlled by the server configuration (`server.max_sandbox_timeout_seconds`).
            Omit or set null to disable automatic expiration and require explicit cleanup.
            Note: manual cleanup support is runtime-dependent; Kubernetes providers may reject
            null timeout when the underlying workload provider does not support non-expiring sandboxes.

        resourceLimits:
          $ref: '#/components/schemas/ResourceLimits'
          description: |
            Runtime resource constraints for the sandbox instance.
            SDK clients should provide sensible defaults (e.g., cpu: "500m", memory: "512Mi").

        env:
          type: object
          additionalProperties:
            type: string
          description: Environment variables to inject into the sandbox runtime.
          example:
            API_KEY: "secret-key"
            DEBUG: "true"
            LOG_LEVEL: "info"

        metadata:
          type: object
          additionalProperties:
            type: string
          description: |
            Custom key-value metadata for management, filtering, and tagging.
            Use "name" key for a human-readable identifier.
          example:
            name: "Data Processing Sandbox"
            project: "data-processing"
            team: "ml"
            environment: "staging"

        entrypoint:
          type: array
          items:
            type: string
          minItems: 1
          description: |
            The command to execute as the sandbox's entry process (required).

            Explicitly specifies the user's expected main process, allowing the sandbox management
            service to reliably inject control processes before executing this command.

            Format: [executable, arg1, arg2, ...]

            Examples:
            - ["python", "/app/main.py"]
            - ["/bin/bash"]
            - ["java", "-jar", "/app/app.jar"]
            - ["node", "server.js"]
          example:
            - "python"
            - "/app/main.py"

        networkPolicy:
          $ref: '#/components/schemas/NetworkPolicy'
          description: |
            Optional outbound network policy for the sandbox.
            Shape matches the sidecar `/policy` endpoint. If omitted or empty,
            the sidecar starts in allow-all mode until updated.

        volumes:
          type: array
          description: |
            Storage mounts for the sandbox. Each volume entry specifies a named backend-specific
            storage source and common mount settings. Exactly one backend type must be specified
            per volume entry.
          items:
            $ref: '#/components/schemas/Volume'

        extensions:
          type: object
          additionalProperties:
            type: string
          description: |
            Opaque container for provider-specific or transient parameters not supported by the core API.

            **Note**: This field is reserved for internal features, experimental flags, or temporary behaviors. Standard parameters should be proposed as core API fields.

            **Best Practices**:
            - **Namespacing**: Use prefixed keys (e.g., `storage.id`) to prevent collisions.
            - **Pass-through**: SDKs and middleware must treat this object as opaque and pass it through transparently.
    ResourceLimits:
      type: object
      description: |
        Runtime resource constraints as key-value pairs. Similar to Kubernetes resource specifications,
        allows flexible definition of resource limits. Common resource types include:
        - `cpu`: CPU allocation in millicores (e.g., "250m" for 0.25 CPU cores)
        - `memory`: Memory allocation in bytes or human-readable format (e.g., "512Mi", "1Gi")
        - `gpu`: Number of GPU devices (e.g., "1")

        New resource types can be added without API changes.
      additionalProperties:
        type: string
      example:
        cpu: "500m"
        memory: "512Mi"
        gpu: "1"
    RenewSandboxExpirationRequest:
      type: object
      required: [expiresAt]
      properties:
        expiresAt:
          type: string
          format: date-time
          description: |
            New absolute expiration time in UTC (RFC 3339 format).
            Must be in the future and after the current expiresAt time.

            Example: "2025-11-16T14:30:45Z"
      additionalProperties: false
    RenewSandboxExpirationResponse:
      type: object
      required: [expiresAt]
      properties:
        expiresAt:
          type: string
          format: date-time
          description: |
            The new absolute expiration time in UTC (RFC 3339 format).

            Example: "2025-11-16T14:30:45Z"
      additionalProperties: false
    ErrorResponse:
      type: object
      description: |
        Standard error response for all non-2xx HTTP responses.
        HTTP status code indicates the error category; code and message provide details.
      properties:
        code:
          type: string
          description: |
            Machine-readable error code (e.g., INVALID_REQUEST, NOT_FOUND, INTERNAL_ERROR).
            Use this for programmatic error handling.
        message:
          type: string
          description: Human-readable error message describing what went wrong and how to fix it.
      required: [code, message]
      additionalProperties: false
    Endpoint:
      type: object
      description: |
        Endpoint for accessing a service running in the sandbox.
        The service must be listening on the specified port inside the sandbox for the endpoint to be available.
      properties:
        endpoint:
          type: string
          description: |
            Public URL to access the service from outside the sandbox.
            Format: {endpoint-host}/sandboxes/{sandboxId}/port/{port}
            Example: endpoint.opensandbox.io/sandboxes/abc123/port/8080
        headers:
          type: object
          additionalProperties:
            type: string
          description: |
            Requests targeting the sandbox must include the corresponding header(s).
      required:
        - endpoint
      additionalProperties: false

    NetworkPolicy:
      type: object
      description: |
        Egress network policy matching the sidecar `/policy` request body.
        If `defaultAction` is omitted, the sidecar defaults to "deny"; passing an empty
        object or null results in allow-all behavior at startup.
      properties:
        defaultAction:
          type: string
          enum: [allow, deny]
          description: Default action when no egress rule matches. Defaults to "deny".
        egress:
          type: array
          description: List of egress rules evaluated in order.
          items:
            $ref: '#/components/schemas/NetworkRule'
      additionalProperties: false

    NetworkRule:
      type: object
      properties:
        action:
          type: string
          enum: [allow, deny]
          description: Whether to allow or deny matching targets.
        target:
          type: string
          description: |
            FQDN or wildcard domain (e.g., "example.com", "*.example.com").
            IP/CIDR not yet supported in the egress MVP.
      required: [action, target]
      additionalProperties: false

    Volume:
      type: object
      description: |
        Storage mount definition for a sandbox. Each volume entry contains:
        - A unique name identifier
        - Exactly one backend struct (host, pvc, ossfs, etc.) with backend-specific fields
        - Common mount settings (mountPath, readOnly, subPath)
      required: [name, mountPath]
      properties:
        name:
          type: string
          description: |
            Unique identifier for the volume within the sandbox.
            Must be a valid DNS label (lowercase alphanumeric, hyphens allowed, max 63 chars).
          pattern: "^[a-z0-9]([-a-z0-9]*[a-z0-9])?$"
          maxLength: 63
        host:
          $ref: '#/components/schemas/Host'
        pvc:
          $ref: '#/components/schemas/PVC'
        ossfs:
          $ref: '#/components/schemas/OSSFS'
        mountPath:
          type: string
          description: |
            Absolute path inside the container where the volume is mounted.
            Must start with '/'.
          pattern: "^/.*"
        readOnly:
          type: boolean
          description: |
            If true, the volume is mounted as read-only. Defaults to false (read-write).
          default: false
        subPath:
          type: string
          description: |
            Optional subdirectory under the backend path to mount.
            For `ossfs` backend, this field is used as the bucket prefix.
            Must be a relative path without '..' components.
      additionalProperties: false

    Host:
      type: object
      description: |
        Host path bind mount backend. Maps a directory on the host filesystem
        into the container. Only available when the runtime supports host mounts.

        Security note: Host paths are restricted by server-side allowlist.
        Users must specify paths under permitted prefixes.
      required: [path]
      properties:
        path:
          type: string
          description: |
            Absolute path on the host filesystem to mount.
            Must start with '/' and be under an allowed prefix.
          pattern: "^/.*"
      additionalProperties: false

    PVC:
      type: object
      description: |
        Platform-managed named volume backend. A runtime-neutral abstraction
        for referencing a pre-existing, platform-managed named volume.

        - Kubernetes: maps to a PersistentVolumeClaim in the same namespace.
        - Docker: maps to a Docker named volume (created via `docker volume create`).

        The volume must already exist on the target platform before sandbox
        creation.
      required: [claimName]
      properties:
        claimName:
          type: string
          description: |
            Name of the volume on the target platform.
            In Kubernetes this is the PVC name; in Docker this is the named
            volume name. Must be a valid DNS label.
          pattern: "^[a-z0-9]([-a-z0-9]*[a-z0-9])?$"
          maxLength: 253
      additionalProperties: false

    OSSFS:
      type: object
      description: |
        Alibaba Cloud OSS mount backend via ossfs.

        The runtime mounts a host-side OSS path under `storage.ossfs_mount_root`
        and bind-mounts the resolved path into the sandbox container.
        Prefix selection is expressed via `Volume.subPath`.
        In Docker runtime, OSSFS backend requires OpenSandbox Server to run on a Linux host with FUSE support.
      required: [bucket, endpoint, accessKeyId, accessKeySecret]
      properties:
        bucket:
          type: string
          description: OSS bucket name.
          minLength: 3
          maxLength: 63
        endpoint:
          type: string
          description: OSS endpoint (e.g., `oss-cn-hangzhou.aliyuncs.com`).
          minLength: 1
        version:
          type: string
          description: ossfs major version used by runtime mount integration.
          enum: ["1.0", "2.0"]
          default: "2.0"
        options:
          type: array
          description: |
            Additional ossfs mount options.
            Runtime encodes options by `version`:
            - `1.0`: mounts with `ossfs ... -o <option>`
            - `2.0`: mounts with `ossfs2 mount ... -c <config-file>` and encodes options as `--<option>` lines in the config file
            Option values must be provided as raw payloads without leading `-`.
          items:
            type: string
        accessKeyId:
          type: string
          description: OSS access key ID for inline credentials mode.
          minLength: 1
        accessKeySecret:
          type: string
          description: OSS access key secret for inline credentials mode.
          minLength: 1
      additionalProperties: false


================================================
FILE: tests/csharp/OpenSandbox.E2ETests/CodeInterpreterE2ETests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.CodeInterpreter;
using OpenSandbox.CodeInterpreter.Models;
using OpenSandbox.Models;
using Xunit;
using CodeInterpreterClient = OpenSandbox.CodeInterpreter.CodeInterpreter;

namespace OpenSandbox.E2ETests;

[Collection("CSharp E2E Tests")]
public class CodeInterpreterE2ETests : IClassFixture<CodeInterpreterE2ETestFixture>
{
    private readonly CodeInterpreterE2ETestFixture _fixture;

    public CodeInterpreterE2ETests(CodeInterpreterE2ETestFixture fixture)
    {
        _fixture = fixture;
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task CreateInterpreter_ExposesSandboxServices()
    {
        var sandbox = _fixture.Sandbox;
        var interpreter = _fixture.Interpreter;

        Assert.Equal(sandbox.Id, interpreter.Id);
        Assert.NotNull(interpreter.Codes);
        Assert.NotNull(interpreter.Files);
        Assert.NotNull(interpreter.Commands);
        Assert.NotNull(interpreter.Metrics);

        var metrics = await interpreter.Metrics.GetMetricsAsync();
        Assert.True(metrics.CpuCount > 0);

        var cmd = await RunCommandWithRetryAsync(interpreter, "echo code-interpreter-ready");
        Assert.Null(cmd.Error);
        Assert.Contains(cmd.Logs.Stdout, m => m.Text.Contains("code-interpreter-ready", StringComparison.Ordinal));
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task ContextManagement_CreateGetListDelete()
    {
        var interpreter = _fixture.Interpreter;

        var ctx = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Python);
        Assert.NotNull(ctx.Id);
        Assert.Equal(SupportedLanguage.Python, ctx.Language);

        var fetched = await interpreter.Codes.GetContextAsync(ctx.Id!);
        Assert.Equal(ctx.Id, fetched.Id);
        Assert.Equal(SupportedLanguage.Python, fetched.Language);

        var listed = await interpreter.Codes.ListContextsAsync(SupportedLanguage.Python);
        Assert.Contains(listed, c => c.Id == ctx.Id);

        await interpreter.Codes.DeleteContextAsync(ctx.Id!);

        var listedAfterDelete = await interpreter.Codes.ListContextsAsync(SupportedLanguage.Python);
        Assert.DoesNotContain(listedAfterDelete, c => c.Id == ctx.Id);
    }

    [Fact(Timeout = 4 * 60 * 1000)]
    public async Task RunAsync_ContextPersistence_AndIsolation()
    {
        var interpreter = _fixture.Interpreter;

        var ctx1 = await CreateContextWithRetryAsync(interpreter, SupportedLanguage.Python);
        var ctx2 = await CreateContextWithRetryAsync(interpreter, SupportedLanguage.Python);

        await RunWithRetryAsync(interpreter, "x = 42", new RunCodeOptions { Context = ctx1 });
        var persisted = await RunWithRetryAsync(interpreter, "print(x)", new RunCodeOptions { Context = ctx1 });
        Assert.Contains(persisted.Logs.Stdout, s => s.Text.Contains("42", StringComparison.Ordinal));

        var isolated = await RunWithRetryAsync(interpreter, "print('x' in globals())", new RunCodeOptions { Context = ctx2 });
        Assert.Contains(isolated.Logs.Stdout, s => s.Text.Contains("False", StringComparison.OrdinalIgnoreCase));

        await interpreter.Codes.DeleteContextAsync(ctx1.Id!);
        await interpreter.Codes.DeleteContextAsync(ctx2.Id!);
    }

    [Fact(Timeout = 3 * 60 * 1000)]
    public async Task RunAsync_MultiLanguage_BasicExecution()
    {
        var interpreter = _fixture.Interpreter;

        var py = await interpreter.Codes.RunAsync("print(1+2)", new RunCodeOptions { Language = SupportedLanguage.Python });
        Assert.Contains(py.Logs.Stdout, s => s.Text.Contains("3", StringComparison.Ordinal));

        var js = await interpreter.Codes.RunAsync("console.log(3+4)", new RunCodeOptions { Language = SupportedLanguage.JavaScript });
        Assert.Contains(js.Logs.Stdout, s => s.Text.Contains("7", StringComparison.Ordinal));

        var bash = await interpreter.Codes.RunAsync("echo $((8+9))", new RunCodeOptions { Language = SupportedLanguage.Bash });
        Assert.Contains(bash.Logs.Stdout, s => s.Text.Contains("17", StringComparison.Ordinal));
    }

    [Fact(Timeout = 6 * 60 * 1000)]
    public async Task RunAsync_MultiLanguage_Java_Go_TypeScript()
    {
        var interpreter = _fixture.Interpreter;

        var javaCtx = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Java);
        var goCtx = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Go);
        var tsCtx = await interpreter.Codes.CreateContextAsync(SupportedLanguage.TypeScript);

        try
        {
            var javaResult = await interpreter.Codes.RunAsync(
                "System.out.println(\"java-ok\");\nint v = 2 + 3;\nSystem.out.println(v);\n",
                new RunCodeOptions { Context = javaCtx });
            Assert.Null(javaResult.Error);
            Assert.True(HasText(javaResult, "java-ok") || HasText(javaResult, "5"));

            var goResult = await interpreter.Codes.RunAsync(
                "package main\nimport \"fmt\"\nfunc main(){ fmt.Print(\"go-ok\") }",
                new RunCodeOptions { Context = goCtx });
            Assert.Null(goResult.Error);
            Assert.True(HasText(goResult, "go-ok"));

            var tsResult = await interpreter.Codes.RunAsync(
                "console.log('ts-ok'); const n: number = 3 + 4; console.log(n);",
                new RunCodeOptions { Context = tsCtx });
            Assert.Null(tsResult.Error);
            Assert.True(HasText(tsResult, "ts-ok") || HasText(tsResult, "7"));
        }
        finally
        {
            await interpreter.Codes.DeleteContextAsync(javaCtx.Id!);
            await interpreter.Codes.DeleteContextAsync(goCtx.Id!);
            await interpreter.Codes.DeleteContextAsync(tsCtx.Id!);
        }
    }

    [Fact(Timeout = 3 * 60 * 1000)]
    public async Task ContextManagement_DeleteContexts_ByLanguage()
    {
        var interpreter = _fixture.Interpreter;

        await interpreter.Codes.DeleteContextsAsync(SupportedLanguage.Bash);

        var ctx1 = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Bash);
        var ctx2 = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Bash);

        var listed = await interpreter.Codes.ListContextsAsync(SupportedLanguage.Bash);
        Assert.Contains(listed, c => c.Id == ctx1.Id);
        Assert.Contains(listed, c => c.Id == ctx2.Id);

        await interpreter.Codes.DeleteContextsAsync(SupportedLanguage.Bash);

        var afterDelete = await interpreter.Codes.ListContextsAsync(SupportedLanguage.Bash);
        Assert.DoesNotContain(afterDelete, c => c.Id == ctx1.Id);
        Assert.DoesNotContain(afterDelete, c => c.Id == ctx2.Id);
    }

    [Fact(Timeout = 3 * 60 * 1000)]
    public async Task RunStreamAsync_ReturnsRealtimeEvents()
    {
        var interpreter = _fixture.Interpreter;

        var request = new RunCodeRequest
        {
            Code = "for i in range(3): print(i)",
            Context = new CodeContext { Language = SupportedLanguage.Python }
        };

        var events = await RunStreamCollectWithRetryAsync(interpreter, request);

        Assert.True(events.Count > 0);
        Assert.Contains(
            events,
            ev => ev.Type == ServerStreamEventTypes.Stdout ||
                  ev.Type == ServerStreamEventTypes.Result ||
                  ev.Type == ServerStreamEventTypes.Error ||
                  ev.Type == ServerStreamEventTypes.ExecutionComplete);
    }

    [Fact(Timeout = 3 * 60 * 1000)]
    public async Task InterruptAsync_StopsLongRunningExecution()
    {
        var interpreter = _fixture.Interpreter;

        var ctx = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Python);
        var runTask = interpreter.Codes.RunAsync(
            "import time\nwhile True: time.sleep(1)",
            new RunCodeOptions { Context = ctx });

        await Task.Delay(2000);
        await interpreter.Codes.InterruptAsync(ctx.Id!);

        var execution = await runTask.WaitAsync(TimeSpan.FromSeconds(30));
        Assert.True(execution.Error != null || execution.Logs.Stderr.Count > 0 || execution.Complete != null);

        await interpreter.Codes.DeleteContextAsync(ctx.Id!);
    }

    [Fact(Timeout = 6 * 60 * 1000)]
    public async Task RunAsync_ConcurrentExecution_MultipleContexts()
    {
        var interpreter = _fixture.Interpreter;

        var py = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Python);
        var java = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Java);
        var go = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Go);

        try
        {
            var tasks = new[]
            {
                interpreter.Codes.RunAsync(
                    "import time\nfor i in range(3):\n print(f'py-{i}')\n time.sleep(0.1)\nprint('py-done')",
                    new RunCodeOptions { Context = py }),
                interpreter.Codes.RunAsync(
                    "for (int i=0;i<3;i++){System.out.println(\"java-\" + i);} System.out.println(\"java-done\");",
                    new RunCodeOptions { Context = java }),
                interpreter.Codes.RunAsync(
                    "package main\nimport \"fmt\"\nfunc main(){for i:=0;i<3;i++{fmt.Println(i)}; fmt.Print(\"go-done\")}",
                    new RunCodeOptions { Context = go })
            };

            var results = await Task.WhenAll(tasks);
            var succeeded = results.Count(r => r != null && r.Error == null && !string.IsNullOrWhiteSpace(r.Id));
            Assert.True(succeeded >= 2, $"expected at least 2 successful concurrent runs, actual={succeeded}");
        }
        finally
        {
            await interpreter.Codes.DeleteContextAsync(py.Id!);
            await interpreter.Codes.DeleteContextAsync(java.Id!);
            await interpreter.Codes.DeleteContextAsync(go.Id!);
        }
    }

    [Fact(Timeout = 8 * 60 * 1000)]
    public async Task RunAsync_MultiLanguage_ErrorHandling_WithEventContract()
    {
        var interpreter = _fixture.Interpreter;

        var py = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Python);
        var java = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Java);
        var go = await interpreter.Codes.CreateContextAsync(SupportedLanguage.Go);
        var ts = await interpreter.Codes.CreateContextAsync(SupportedLanguage.TypeScript);

        try
        {
            var pyExecution = await RunWithTrackedEventsAsync(
                interpreter,
                "print(undefined_variable)",
                py);
            Assert.True(pyExecution.Execution.Error != null || pyExecution.Execution.Logs.Stderr.Count > 0);
            if (pyExecution.Execution.Error != null)
            {
                Assert.Contains("NameError", pyExecution.Execution.Error.Name, StringComparison.OrdinalIgnoreCase);
            }
            AssertTerminalEventContract(pyExecution.InitEvents, pyExecution.CompleteEvents, pyExecution.ErrorEvents, pyExecution.Execution.Id);

            var javaExecution = await RunWithTrackedEventsAsync(
                interpreter,
                "int x = 10 / 0;",
                java);
            Assert.True(javaExecution.Execution.Error != null || javaExecution.Execution.Logs.Stderr.Count > 0);
            AssertTerminalEventContract(javaExecution.InitEvents, javaExecution.CompleteEvents, javaExecution.ErrorEvents, javaExecution.Execution.Id);

            var goExecution = await RunWithTrackedEventsAsync(
                interpreter,
                "package main\nfunc main(){ undeclaredVariable++ }",
                go);
            Assert.True(goExecution.Execution.Error != null || goExecution.Execution.Logs.Stderr.Count > 0);
            AssertTerminalEventContract(goExecution.InitEvents, goExecution.CompleteEvents, goExecution.ErrorEvents, goExecution.Execution.Id);

            var tsExecution = await RunWithTrackedEventsAsync(
                interpreter,
                "throw new Error('ts-runtime-error');",
                ts);
            Assert.True(tsExecution.Execution.Error != null || tsExecution.Execution.Logs.Stderr.Count > 0);
            AssertTerminalEventContract(tsExecution.InitEvents, tsExecution.CompleteEvents, tsExecution.ErrorEvents, tsExecution.Execution.Id);
        }
        finally
        {
            await interpreter.Codes.DeleteContextAsync(py.Id!);
            await interpreter.Codes.DeleteContextAsync(java.Id!);
            await interpreter.Codes.DeleteContextAsync(go.Id!);
            await interpreter.Codes.DeleteContextAsync(ts.Id!);
        }
    }

    private static async Task<TrackedExecution> RunWithTrackedEventsAsync(
        CodeInterpreterClient interpreter,
        string code,
        CodeContext context)
    {
        var initEvents = new List<ExecutionInit>();
        var completeEvents = new List<ExecutionComplete>();
        var errorEvents = new List<ExecutionError>();
        var handlers = new ExecutionHandlers
        {
            OnInit = ev =>
            {
                initEvents.Add(ev);
                return Task.CompletedTask;
            },
            OnExecutionComplete = ev =>
            {
                completeEvents.Add(ev);
                return Task.CompletedTask;
            },
            OnError = ev =>
            {
                errorEvents.Add(ev);
                return Task.CompletedTask;
            }
        };

        var execution = await interpreter.Codes.RunAsync(
            code,
            new RunCodeOptions
            {
                Context = context,
                Handlers = handlers
            });

        return new TrackedExecution(execution, initEvents, completeEvents, errorEvents);
    }

    private static void AssertTerminalEventContract(
        IReadOnlyList<ExecutionInit> initEvents,
        IReadOnlyList<ExecutionComplete> completeEvents,
        IReadOnlyList<ExecutionError> errorEvents,
        string? executionId)
    {
        Assert.Single(initEvents);
        Assert.False(string.IsNullOrWhiteSpace(initEvents[0].Id));
        if (!string.IsNullOrWhiteSpace(executionId))
        {
            Assert.Equal(executionId, initEvents[0].Id);
        }
        AssertRecentTimestampMs(initEvents[0].Timestamp, 180_000);

        var hasComplete = completeEvents.Count > 0;
        var hasError = errorEvents.Count > 0;
        Assert.True(hasComplete || hasError);

        if (hasComplete)
        {
            Assert.Single(completeEvents);
            Assert.True(completeEvents[0].ExecutionTimeMs >= 0);
            AssertRecentTimestampMs(completeEvents[0].Timestamp, 180_000);
        }

        if (hasError)
        {
            Assert.False(string.IsNullOrWhiteSpace(errorEvents[0].Name));
            Assert.False(string.IsNullOrWhiteSpace(errorEvents[0].Value));
            AssertRecentTimestampMs(errorEvents[0].Timestamp, 180_000);
        }
    }

    private static void AssertRecentTimestampMs(long ts, long toleranceMs)
    {
        Assert.True(ts > 0);
        var delta = Math.Abs(DateTimeOffset.UtcNow.ToUnixTimeMilliseconds() - ts);
        Assert.True(delta <= toleranceMs, $"timestamp too far from now: delta={delta}ms (ts={ts})");
    }

    private static bool HasText(Execution execution, string expected)
    {
        return execution.Logs.Stdout.Any(x => x.Text.Contains(expected, StringComparison.Ordinal)) ||
               execution.Logs.Stderr.Any(x => x.Text.Contains(expected, StringComparison.Ordinal)) ||
               execution.Results.Any(x => (x.Text ?? string.Empty).Contains(expected, StringComparison.Ordinal));
    }

    private static async Task<CodeContext> CreateContextWithRetryAsync(
        CodeInterpreterClient interpreter,
        string language,
        int maxRetries = 3)
    {
        Exception? lastError = null;
        var delayMs = 1000;
        for (var attempt = 1; attempt <= maxRetries; attempt++)
        {
            try
            {
                var ctx = await interpreter.Codes.CreateContextAsync(language).WaitAsync(TimeSpan.FromSeconds(60));
                await Task.Delay(500);
                return ctx;
            }
            catch (Exception ex) when (IsRetryable(ex) && attempt < maxRetries)
            {
                lastError = ex;
                await Task.Delay(delayMs);
                delayMs = (int)(delayMs * 1.5);
            }
            catch (Exception ex)
            {
                lastError = ex;
                break;
            }
        }

        throw lastError ?? new TimeoutException("CreateContextWithRetryAsync failed unexpectedly.");
    }

    private static async Task<Execution> RunWithRetryAsync(
        CodeInterpreterClient interpreter,
        string code,
        RunCodeOptions? options = null,
        int maxRetries = 3,
        int perCallTimeoutSeconds = 120)
    {
        Exception? lastError = null;
        var delayMs = 1000;
        for (var attempt = 1; attempt <= maxRetries; attempt++)
        {
            try
            {
                var result = await interpreter.Codes
                    .RunAsync(code, options)
                    .WaitAsync(TimeSpan.FromSeconds(perCallTimeoutSeconds));

                if (!string.IsNullOrWhiteSpace(result.Id))
                {
                    return result;
                }

                if (attempt < maxRetries)
                {
                    await Task.Delay(delayMs);
                    delayMs = (int)(delayMs * 1.5);
                    continue;
                }

                return result;
            }
            catch (Exception ex) when (IsRetryable(ex) && attempt < maxRetries)
            {
                lastError = ex;
                await Task.Delay(delayMs);
                delayMs = (int)(delayMs * 1.5);
            }
            catch (Exception ex)
            {
                lastError = ex;
                break;
            }
        }

        throw lastError ?? new TimeoutException("RunWithRetryAsync failed unexpectedly.");
    }

    private static async Task<List<ServerStreamEvent>> RunStreamCollectWithRetryAsync(
        CodeInterpreterClient interpreter,
        RunCodeRequest request,
        int maxRetries = 3,
        int perCallTimeoutSeconds = 120)
    {
        Exception? lastError = null;
        var delayMs = 1000;
        for (var attempt = 1; attempt <= maxRetries; attempt++)
        {
            try
            {
                var events = new List<ServerStreamEvent>();
                using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(perCallTimeoutSeconds));
                await foreach (var ev in interpreter.Codes.RunStreamAsync(request, cts.Token))
                {
                    events.Add(ev);
                }

                var hasBusinessEvent = events.Any(ev =>
                    ev.Type == ServerStreamEventTypes.Stdout ||
                    ev.Type == ServerStreamEventTypes.Result ||
                    ev.Type == ServerStreamEventTypes.Error ||
                    ev.Type == ServerStreamEventTypes.ExecutionComplete);

                if (hasBusinessEvent || attempt == maxRetries)
                {
                    return events;
                }

                await Task.Delay(delayMs);
                delayMs = (int)(delayMs * 1.5);
            }
            catch (Exception ex) when (IsRetryable(ex) && attempt < maxRetries)
            {
                lastError = ex;
                await Task.Delay(delayMs);
                delayMs = (int)(delayMs * 1.5);
            }
            catch (Exception ex)
            {
                lastError = ex;
                break;
            }
        }

        throw lastError ?? new TimeoutException("RunStreamCollectWithRetryAsync failed unexpectedly.");
    }

    private static async Task<Execution> RunCommandWithRetryAsync(
        CodeInterpreterClient interpreter,
        string command,
        int maxRetries = 3,
        int perCallTimeoutSeconds = 30)
    {
        Exception? lastError = null;
        Execution? lastResult = null;
        var delayMs = 1000;

        for (var attempt = 1; attempt <= maxRetries; attempt++)
        {
            try
            {
                var result = await interpreter.Commands
                    .RunAsync(command)
                    .WaitAsync(TimeSpan.FromSeconds(perCallTimeoutSeconds));

                lastResult = result;
                var hasExpectedStdout = result.Logs.Stdout.Any(log =>
                    log.Text.Contains("code-interpreter-ready", StringComparison.Ordinal));
                if (result.Error == null && hasExpectedStdout)
                {
                    return result;
                }

                if (attempt < maxRetries)
                {
                    await Task.Delay(delayMs);
                    delayMs = (int)(delayMs * 1.5);
                    continue;
                }

                return result;
            }
            catch (Exception ex) when (IsRetryable(ex) && attempt < maxRetries)
            {
                lastError = ex;
                await Task.Delay(delayMs);
                delayMs = (int)(delayMs * 1.5);
            }
            catch (Exception ex)
            {
                lastError = ex;
                break;
            }
        }

        if (lastResult != null)
        {
            return lastResult;
        }

        throw lastError ?? new TimeoutException("RunCommandWithRetryAsync failed unexpectedly.");
    }

    private static bool IsRetryable(Exception ex)
    {
        if (ex is TimeoutException || ex is TaskCanceledException)
        {
            return true;
        }

        var message = ex.ToString();
        var lowered = message.ToLowerInvariant();
        return lowered.Contains("disconnected", StringComparison.Ordinal) ||
               lowered.Contains("connection", StringComparison.Ordinal) ||
               lowered.Contains("reset", StringComparison.Ordinal) ||
               lowered.Contains("closed", StringComparison.Ordinal) ||
               lowered.Contains("timeout", StringComparison.Ordinal) ||
               lowered.Contains("peer", StringComparison.Ordinal) ||
               lowered.Contains("response ended prematurely", StringComparison.Ordinal);
    }

    private sealed record TrackedExecution(
        Execution Execution,
        IReadOnlyList<ExecutionInit> InitEvents,
        IReadOnlyList<ExecutionComplete> CompleteEvents,
        IReadOnlyList<ExecutionError> ErrorEvents);
}

public sealed class CodeInterpreterE2ETestFixture : IAsyncLifetime
{
    private readonly E2ETestFixture _baseFixture = new();
    private Sandbox? _sandbox;
    private CodeInterpreterClient? _interpreter;

    public Sandbox Sandbox => _sandbox ?? throw new InvalidOperationException("Sandbox is not initialized.");
    public CodeInterpreterClient Interpreter => _interpreter ?? throw new InvalidOperationException("Interpreter is not initialized.");

    public async Task InitializeAsync()
    {
        _sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
        {
            ConnectionConfig = _baseFixture.ConnectionConfig,
            Image = _baseFixture.DefaultImage,
            Entrypoint = new[] { "/opt/opensandbox/code-interpreter.sh" },
            TimeoutSeconds = _baseFixture.DefaultTimeoutSeconds,
            ReadyTimeoutSeconds = _baseFixture.DefaultReadyTimeoutSeconds,
            Resource = new Dictionary<string, string>
            {
                ["cpu"] = "2",
                ["memory"] = "4Gi"
            },
            Env = new Dictionary<string, string>
            {
                ["E2E_TEST"] = "true",
                ["GO_VERSION"] = "1.25",
                ["JAVA_VERSION"] = "21",
                ["NODE_VERSION"] = "22",
                ["PYTHON_VERSION"] = "3.12",
                ["EXECD_LOG_FILE"] = "/tmp/opensandbox-e2e/logs/execd.log"
            },
            Volumes = new[]
            {
                new Volume
                {
                    Name = "execd-log",
                    Host = new Host { Path = "/tmp/opensandbox-e2e/logs" },
                    MountPath = "/tmp/opensandbox-e2e/logs",
                    ReadOnly = false
                }
            },
            Metadata = new Dictionary<string, string> { ["tag"] = "csharp-code-interpreter-e2e" },
            HealthCheckPollingInterval = 500
        });

        _interpreter = await CodeInterpreterClient.CreateAsync(_sandbox);
    }

    public async Task DisposeAsync()
    {
        if (_sandbox == null)
        {
            return;
        }

        try
        {
            await _sandbox.KillAsync();
        }
        catch
        {
        }

        await _sandbox.DisposeAsync();
    }
}


================================================
FILE: tests/csharp/OpenSandbox.E2ETests/E2ETestFixture.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Config;
using Xunit;

namespace OpenSandbox.E2ETests;

/// <summary>
/// Shared fixture for E2E tests providing common configuration.
/// </summary>
public sealed class E2ETestFixture : IAsyncLifetime
{
    public string DefaultImage { get; }

    public ConnectionConfig ConnectionConfig { get; }

    public ConnectionConfig ServerProxyConnectionConfig { get; }

    public int DefaultTimeoutSeconds { get; } = 1200;

    public int DefaultReadyTimeoutSeconds { get; } = 90;

    public E2ETestFixture()
    {
        DefaultImage =
            Environment.GetEnvironmentVariable("OPENSANDBOX_SANDBOX_DEFAULT_IMAGE")
            ?? Environment.GetEnvironmentVariable("SANDBOX_IMAGE")
            ?? "opensandbox/code-interpreter:latest";

        var domain =
            Environment.GetEnvironmentVariable("OPENSANDBOX_TEST_DOMAIN")
            ?? Environment.GetEnvironmentVariable("SANDBOX_DOMAIN")
            ?? "localhost:8080";

        var apiKey =
            Environment.GetEnvironmentVariable("OPENSANDBOX_TEST_API_KEY")
            ?? Environment.GetEnvironmentVariable("SANDBOX_API_KEY");

        var protocolRaw =
            Environment.GetEnvironmentVariable("OPENSANDBOX_TEST_PROTOCOL")
            ?? Environment.GetEnvironmentVariable("SANDBOX_PROTOCOL")
            ?? "http";

        var protocol = protocolRaw.Equals("https", StringComparison.OrdinalIgnoreCase)
            ? ConnectionProtocol.Https
            : ConnectionProtocol.Http;

        ConnectionConfig = new ConnectionConfig(new ConnectionConfigOptions
        {
            Domain = domain,
            Protocol = protocol,
            ApiKey = apiKey,
            RequestTimeoutSeconds = 180
        });

        ServerProxyConnectionConfig = new ConnectionConfig(new ConnectionConfigOptions
        {
            Domain = domain,
            Protocol = protocol,
            ApiKey = apiKey,
            RequestTimeoutSeconds = 180,
            UseServerProxy = true
        });
    }

    public Task InitializeAsync()
    {
        return Task.CompletedTask;
    }

    public Task DisposeAsync()
    {
        return Task.CompletedTask;
    }
}

[CollectionDefinition("CSharp E2E Tests")]
public sealed class E2ETestCollection : ICollectionFixture<E2ETestFixture>
{
}


================================================
FILE: tests/csharp/OpenSandbox.E2ETests/OpenSandbox.E2ETests.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <TargetFramework>net10.0</TargetFramework>
    <LangVersion>12.0</LangVersion>
    <Nullable>enable</Nullable>
    <ImplicitUsings>enable</ImplicitUsings>
    <IsPackable>false</IsPackable>
    <IsTestProject>true</IsTestProject>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
    <PackageReference Include="xunit" Version="2.9.2" />
    <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
      <PrivateAssets>all</PrivateAssets>
    </PackageReference>
    <PackageReference Include="coverlet.collector" Version="6.0.2">
      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
      <PrivateAssets>all</PrivateAssets>
    </PackageReference>
    <PackageReference Include="FluentAssertions" Version="6.12.2" />
  </ItemGroup>

  <ItemGroup>
    <ProjectReference Include="..\..\..\sdks\sandbox\csharp\src\OpenSandbox\OpenSandbox.csproj" />
    <ProjectReference Include="..\..\..\sdks\code-interpreter\csharp\src\OpenSandbox.CodeInterpreter\OpenSandbox.CodeInterpreter.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: tests/csharp/OpenSandbox.E2ETests/SandboxE2ETests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Collections.Concurrent;
using System.Text;
using OpenSandbox.Config;
using OpenSandbox.Core;
using OpenSandbox.Models;
using Xunit;

namespace OpenSandbox.E2ETests;

[Collection("CSharp E2E Tests")]
public class SandboxE2ETests : IClassFixture<SandboxE2ETestFixture>
{
    private readonly SandboxE2ETestFixture _fixture;

    public SandboxE2ETests(SandboxE2ETestFixture fixture)
    {
        _fixture = fixture;
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Sandbox_Lifecycle_Health_Endpoint_Metrics_Renew_Connect()
    {
        var sandbox = _fixture.Sandbox;
        Assert.False(string.IsNullOrWhiteSpace(sandbox.Id));
        Assert.True(await sandbox.IsHealthyAsync());

        var info = await sandbox.GetInfoAsync();
        Assert.Equal(sandbox.Id, info.Id);
        Assert.Equal(SandboxStates.Running, info.Status.State);
        Assert.Equal(Constants.DefaultEntrypoint, info.Entrypoint);
        Assert.NotNull(info.Metadata);
        Assert.Equal("csharp-e2e-test", info.Metadata!["tag"]);
        Assert.True(info.ExpiresAt > info.CreatedAt);

        var endpoint = await sandbox.GetEndpointAsync(Constants.DefaultExecdPort);
        AssertEndpointHasPort(endpoint.EndpointAddress, Constants.DefaultExecdPort);

        var metrics = await sandbox.GetMetricsAsync();
        Assert.True(metrics.CpuCount > 0);
        Assert.True(metrics.CpuUsedPercentage is >= 0.0 and <= 100.0);
        Assert.True(metrics.MemoryTotalMiB > 0);
        Assert.True(metrics.MemoryUsedMiB <= metrics.MemoryTotalMiB);
        AssertRecentTimestampMs(metrics.Timestamp, 120_000);

        var renewResponse = await sandbox.RenewAsync(30 * 60);
        Assert.NotNull(renewResponse);
        Assert.NotNull(renewResponse.ExpiresAt);
        var renewedInfo = await sandbox.GetInfoAsync();
        Assert.True(renewedInfo.ExpiresAt > info.ExpiresAt);
        Assert.True(renewResponse.ExpiresAt > info.ExpiresAt);

        var sandbox2 = await Sandbox.ConnectAsync(new SandboxConnectOptions
        {
            ConnectionConfig = _fixture.ConnectionConfig,
            SandboxId = sandbox.Id
        });

        try
        {
            Assert.Equal(sandbox.Id, sandbox2.Id);
            Assert.True(await sandbox2.IsHealthyAsync());
            var result = await sandbox2.Commands.RunAsync("echo connect-ok");
            Assert.Null(result.Error);
            Assert.Single(result.Logs.Stdout);
            Assert.Equal("connect-ok", result.Logs.Stdout[0].Text);
        }
        finally
        {
            await sandbox2.DisposeAsync();
        }
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Sandbox_XRequestId_Passthrough_OnServerError()
    {
        var requestId = $"e2e-csharp-server-{DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()}";
        var missingSandboxId = $"missing-{requestId}";
        var baseConfig = _fixture.ConnectionConfig;
        var config = new ConnectionConfig(new ConnectionConfigOptions
        {
            Domain = baseConfig.Domain,
            Protocol = baseConfig.Protocol,
            ApiKey = baseConfig.ApiKey,
            RequestTimeoutSeconds = baseConfig.RequestTimeoutSeconds,
            Headers = new Dictionary<string, string> { ["X-Request-ID"] = requestId }
        });

        var ex = await Assert.ThrowsAsync<SandboxApiException>(async () =>
        {
            var connected = await Sandbox.ConnectAsync(new SandboxConnectOptions
            {
                ConnectionConfig = config,
                SandboxId = missingSandboxId
            });
            try
            {
                await connected.GetInfoAsync();
            }
            finally
            {
                await connected.DisposeAsync();
            }
        });

        Assert.Equal(requestId, ex.RequestId);
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Sandbox_ManualCleanup_Returns_Null_ExpiresAt()
    {
        var sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
        {
            ConnectionConfig = _fixture.ConnectionConfig,
            Image = _fixture.DefaultImage,
            ManualCleanup = true,
            ReadyTimeoutSeconds = _fixture.DefaultReadyTimeoutSeconds,
            Metadata = new Dictionary<string, string> { ["tag"] = "manual-csharp-e2e-test" }
        });

        try
        {
            var info = await sandbox.GetInfoAsync();
            Assert.Null(info.ExpiresAt);
            Assert.NotNull(info.Metadata);
            Assert.Equal("manual-csharp-e2e-test", info.Metadata!["tag"]);
        }
        finally
        {
            await sandbox.KillAsync();
            await sandbox.DisposeAsync();
        }
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Sandbox_Create_With_NetworkPolicy_Get_And_Patch_Egress()
    {
        var policySandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
        {
            ConnectionConfig = _fixture.ConnectionConfig,
            Image = _fixture.DefaultImage,
            TimeoutSeconds = _fixture.DefaultTimeoutSeconds,
            ReadyTimeoutSeconds = _fixture.DefaultReadyTimeoutSeconds,
            NetworkPolicy = new NetworkPolicy
            {
                DefaultAction = NetworkRuleAction.Deny,
                Egress = new List<NetworkRule> { new() { Action = NetworkRuleAction.Allow, Target = "pypi.org" } }
            }
        });

        try
        {
            await Task.Delay(5000);

            var initialPolicy = await policySandbox.GetEgressPolicyAsync();
            Assert.NotNull(initialPolicy);
            Assert.Equal(NetworkRuleAction.Deny, initialPolicy.DefaultAction);
            Assert.NotNull(initialPolicy.Egress);
            Assert.Contains(
                initialPolicy.Egress!,
                rule => rule.Target == "pypi.org" && rule.Action == NetworkRuleAction.Allow);

            var blocked = await policySandbox.Commands.RunAsync("curl -I https://www.github.com");
            Assert.NotNull(blocked.Error);

            var allowed = await policySandbox.Commands.RunAsync("curl -I https://pypi.org");
            Assert.Null(allowed.Error);

            await policySandbox.PatchEgressRulesAsync(new List<NetworkRule>
            {
                new() { Action = NetworkRuleAction.Allow, Target = "www.github.com" },
                new() { Action = NetworkRuleAction.Deny, Target = "pypi.org" }
            });
            await Task.Delay(2000);

            var patchedPolicy = await policySandbox.GetEgressPolicyAsync();
            Assert.NotNull(patchedPolicy.Egress);
            Assert.Contains(
                patchedPolicy.Egress!,
                rule => rule.Target == "www.github.com" && rule.Action == NetworkRuleAction.Allow);
            Assert.Contains(
                patchedPolicy.Egress!,
                rule => rule.Target == "pypi.org" && rule.Action == NetworkRuleAction.Deny);

            var githubAllowed = await policySandbox.Commands.RunAsync("curl -I https://www.github.com");
            Assert.Null(githubAllowed.Error);

            var pypiDenied = await policySandbox.Commands.RunAsync("curl -I https://pypi.org");
            Assert.NotNull(pypiDenied.Error);
        }
        finally
        {
            try
            {
                await policySandbox.KillAsync();
            }
            catch
            {
            }

            await policySandbox.DisposeAsync();
        }
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Sandbox_Create_With_NetworkPolicy_Get_And_Patch_Egress_Via_ServerProxy()
    {
        var policySandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
        {
            ConnectionConfig = _fixture.ServerProxyConnectionConfig,
            Image = _fixture.DefaultImage,
            TimeoutSeconds = _fixture.DefaultTimeoutSeconds,
            ReadyTimeoutSeconds = _fixture.DefaultReadyTimeoutSeconds,
            NetworkPolicy = new NetworkPolicy
            {
                DefaultAction = NetworkRuleAction.Deny,
                Egress = new List<NetworkRule> { new() { Action = NetworkRuleAction.Allow, Target = "pypi.org" } }
            }
        });

        try
        {
            await Task.Delay(5000);

            var egressEndpoint = await policySandbox.GetEndpointAsync(Constants.DefaultEgressPort);
            Assert.Contains(
                $"/sandboxes/{policySandbox.Id}/proxy/{Constants.DefaultEgressPort}",
                egressEndpoint.EndpointAddress);

            var initialPolicy = await policySandbox.GetEgressPolicyAsync();
            Assert.NotNull(initialPolicy);
            Assert.Equal(NetworkRuleAction.Deny, initialPolicy.DefaultAction);
            Assert.NotNull(initialPolicy.Egress);
            Assert.Contains(
                initialPolicy.Egress!,
                rule => rule.Target == "pypi.org" && rule.Action == NetworkRuleAction.Allow);

            var blocked = await policySandbox.Commands.RunAsync("curl -I https://www.github.com");
            Assert.NotNull(blocked.Error);

            var allowed = await policySandbox.Commands.RunAsync("curl -I https://pypi.org");
            Assert.Null(allowed.Error);

            await policySandbox.PatchEgressRulesAsync(new List<NetworkRule>
            {
                new() { Action = NetworkRuleAction.Allow, Target = "www.github.com" },
                new() { Action = NetworkRuleAction.Deny, Target = "pypi.org" }
            });
            await Task.Delay(2000);

            var patchedPolicy = await policySandbox.GetEgressPolicyAsync();
            Assert.NotNull(patchedPolicy.Egress);
            Assert.Contains(
                patchedPolicy.Egress!,
                rule => rule.Target == "www.github.com" && rule.Action == NetworkRuleAction.Allow);
            Assert.Contains(
                patchedPolicy.Egress!,
                rule => rule.Target == "pypi.org" && rule.Action == NetworkRuleAction.Deny);
        }
        finally
        {
            try
            {
                await policySandbox.KillAsync();
            }
            catch
            {
            }

            await policySandbox.DisposeAsync();
        }
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Sandbox_Create_With_HostVolumeMount()
    {
        var hostDir = "/tmp/opensandbox-e2e/host-volume-test";
        var containerMountPath = "/mnt/host-data";
        var volumeSandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
        {
            ConnectionConfig = _fixture.ConnectionConfig,
            Image = _fixture.DefaultImage,
            TimeoutSeconds = _fixture.DefaultTimeoutSeconds,
            ReadyTimeoutSeconds = _fixture.DefaultReadyTimeoutSeconds,
            Volumes = new[]
            {
                new Volume
                {
                    Name = "test-host-vol",
                    Host = new Host { Path = hostDir },
                    MountPath = containerMountPath,
                    ReadOnly = false
                }
            }
        });

        try
        {
            var marker = await volumeSandbox.Commands.RunAsync($"cat {containerMountPath}/marker.txt");
            Assert.Null(marker.Error);
            Assert.Single(marker.Logs.Stdout);
            Assert.Equal("opensandbox-e2e-marker", marker.Logs.Stdout[0].Text);

            var write = await volumeSandbox.Commands.RunAsync(
                $"echo 'written-from-sandbox' > {containerMountPath}/sandbox-output.txt");
            Assert.Null(write.Error);

            var readBack = await volumeSandbox.Commands.RunAsync($"cat {containerMountPath}/sandbox-output.txt");
            Assert.Null(readBack.Error);
            Assert.Single(readBack.Logs.Stdout);
            Assert.Equal("written-from-sandbox", readBack.Logs.Stdout[0].Text);
        }
        finally
        {
            try
            {
                await volumeSandbox.KillAsync();
            }
            catch
            {
            }

            await volumeSandbox.DisposeAsync();
        }
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Sandbox_Create_With_HostVolumeMount_ReadOnly()
    {
        var hostDir = "/tmp/opensandbox-e2e/host-volume-test";
        var containerMountPath = "/mnt/host-data-ro";
        var roSandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
        {
            ConnectionConfig = _fixture.ConnectionConfig,
            Image = _fixture.DefaultImage,
            TimeoutSeconds = _fixture.DefaultTimeoutSeconds,
            ReadyTimeoutSeconds = _fixture.DefaultReadyTimeoutSeconds,
            Volumes = new[]
            {
                new Volume
                {
                    Name = "test-host-vol-ro",
                    Host = new Host { Path = hostDir },
                    MountPath = containerMountPath,
                    ReadOnly = true
                }
            }
        });

        try
        {
            var marker = await roSandbox.Commands.RunAsync($"cat {containerMountPath}/marker.txt");
            Assert.Null(marker.Error);
            Assert.Single(marker.Logs.Stdout);
            Assert.Equal("opensandbox-e2e-marker", marker.Logs.Stdout[0].Text);

            var write = await roSandbox.Commands.RunAsync($"touch {containerMountPath}/should-fail.txt");
            Assert.NotNull(write.Error);
        }
        finally
        {
            try
            {
                await roSandbox.KillAsync();
            }
            catch
            {
            }

            await roSandbox.DisposeAsync();
        }
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Sandbox_Create_With_PvcVolumeMount()
    {
        var pvcVolumeName = "opensandbox-e2e-pvc-test";
        var containerMountPath = "/mnt/pvc-data";
        var pvcSandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
        {
            ConnectionConfig = _fixture.ConnectionConfig,
            Image = _fixture.DefaultImage,
            TimeoutSeconds = _fixture.DefaultTimeoutSeconds,
            ReadyTimeoutSeconds = _fixture.DefaultReadyTimeoutSeconds,
            Volumes = new[]
            {
                new Volume
                {
                    Name = "test-pvc-vol",
                    Pvc = new PVC { ClaimName = pvcVolumeName },
                    MountPath = containerMountPath,
                    ReadOnly = false
                }
            }
        });

        try
        {
            var marker = await pvcSandbox.Commands.RunAsync($"cat {containerMountPath}/marker.txt");
            Assert.Null(marker.Error);
            Assert.Single(marker.Logs.Stdout);
            Assert.Equal("pvc-marker-data", marker.Logs.Stdout[0].Text);

            var write = await pvcSandbox.Commands.RunAsync(
                $"echo 'written-to-pvc' > {containerMountPath}/pvc-output.txt");
            Assert.Null(write.Error);

            var readBack = await pvcSandbox.Commands.RunAsync($"cat {containerMountPath}/pvc-output.txt");
            Assert.Null(readBack.Error);
            Assert.Single(readBack.Logs.Stdout);
            Assert.Equal("written-to-pvc", readBack.Logs.Stdout[0].Text);
        }
        finally
        {
            try
            {
                await pvcSandbox.KillAsync();
            }
            catch
            {
            }

            await pvcSandbox.DisposeAsync();
        }
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Sandbox_Create_With_PvcVolumeMount_ReadOnly()
    {
        var pvcVolumeName = "opensandbox-e2e-pvc-test";
        var containerMountPath = "/mnt/pvc-data-ro";
        var roSandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
        {
            ConnectionConfig = _fixture.ConnectionConfig,
            Image = _fixture.DefaultImage,
            TimeoutSeconds = _fixture.DefaultTimeoutSeconds,
            ReadyTimeoutSeconds = _fixture.DefaultReadyTimeoutSeconds,
            Volumes = new[]
            {
                new Volume
                {
                    Name = "test-pvc-vol-ro",
                    Pvc = new PVC { ClaimName = pvcVolumeName },
                    MountPath = containerMountPath,
                    ReadOnly = true
                }
            }
        });

        try
        {
            var marker = await roSandbox.Commands.RunAsync($"cat {containerMountPath}/marker.txt");
            Assert.Null(marker.Error);
            Assert.Single(marker.Logs.Stdout);
            Assert.Equal("pvc-marker-data", marker.Logs.Stdout[0].Text);

            var write = await roSandbox.Commands.RunAsync($"touch {containerMountPath}/should-fail.txt");
            Assert.NotNull(write.Error);
        }
        finally
        {
            try
            {
                await roSandbox.KillAsync();
            }
            catch
            {
            }

            await roSandbox.DisposeAsync();
        }
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Sandbox_Create_With_PvcVolumeMount_SubPath()
    {
        var pvcVolumeName = "opensandbox-e2e-pvc-test";
        var containerMountPath = "/mnt/train";
        var subPathSandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
        {
            ConnectionConfig = _fixture.ConnectionConfig,
            Image = _fixture.DefaultImage,
            TimeoutSeconds = _fixture.DefaultTimeoutSeconds,
            ReadyTimeoutSeconds = _fixture.DefaultReadyTimeoutSeconds,
            Volumes = new[]
            {
                new Volume
                {
                    Name = "test-pvc-subpath",
                    Pvc = new PVC { ClaimName = pvcVolumeName },
                    MountPath = containerMountPath,
                    ReadOnly = false,
                    SubPath = "datasets/train"
                }
            }
        });

        try
        {
            var marker = await subPathSandbox.Commands.RunAsync($"cat {containerMountPath}/marker.txt");
            Assert.Null(marker.Error);
            Assert.Single(marker.Logs.Stdout);
            Assert.Equal("pvc-subpath-marker", marker.Logs.Stdout[0].Text);

            var ls = await subPathSandbox.Commands.RunAsync($"ls {containerMountPath}/");
            Assert.Null(ls.Error);
            var lsText = string.Join("\n", ls.Logs.Stdout.Select(x => x.Text));
            Assert.Contains("marker.txt", lsText, StringComparison.Ordinal);
            Assert.DoesNotContain("datasets", lsText, StringComparison.Ordinal);

            var write = await subPathSandbox.Commands.RunAsync(
                $"echo 'subpath-write-test' > {containerMountPath}/output.txt");
            Assert.Null(write.Error);

            var readBack = await subPathSandbox.Commands.RunAsync($"cat {containerMountPath}/output.txt");
            Assert.Null(readBack.Error);
            Assert.Single(readBack.Logs.Stdout);
            Assert.Equal("subpath-write-test", readBack.Logs.Stdout[0].Text);
        }
        finally
        {
            try
            {
                await subPathSandbox.KillAsync();
            }
            catch
            {
            }

            await subPathSandbox.DisposeAsync();
        }
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Command_Execution_Success_Cwd_Background_Failure()
    {
        var sandbox = _fixture.Sandbox;

        var stdoutMessages = new ConcurrentBag<OutputMessage>();
        var stderrMessages = new ConcurrentBag<OutputMessage>();
        var results = new ConcurrentBag<ExecutionResult>();
        var errors = new ConcurrentBag<ExecutionError>();
        var completedEvents = new ConcurrentBag<ExecutionComplete>();
        var initEvents = new ConcurrentBag<ExecutionInit>();

        var handlers = new ExecutionHandlers
        {
            OnStdout = msg => { stdoutMessages.Add(msg); return Task.CompletedTask; },
            OnStderr = msg => { stderrMessages.Add(msg); return Task.CompletedTask; },
            OnResult = res => { results.Add(res); return Task.CompletedTask; },
            OnExecutionComplete = complete => { completedEvents.Add(complete); return Task.CompletedTask; },
            OnError = err => { errors.Add(err); return Task.CompletedTask; },
            OnInit = init => { initEvents.Add(init); return Task.CompletedTask; }
        };

        var echoResult = await sandbox.Commands.RunAsync("echo Hello OpenSandbox E2E", handlers: handlers);
        Assert.False(string.IsNullOrWhiteSpace(echoResult.Id));
        Assert.Null(echoResult.Error);
        Assert.Single(echoResult.Logs.Stdout);
        Assert.Equal("Hello OpenSandbox E2E", echoResult.Logs.Stdout[0].Text);
        AssertRecentTimestampMs(echoResult.Logs.Stdout[0].Timestamp, 60_000);
        AssertTerminalEventContract(initEvents, completedEvents, errors, echoResult.Id!);

        var pwdResult = await sandbox.Commands.RunAsync(
            "pwd",
            options: new RunCommandOptions { WorkingDirectory = "/tmp" });
        Assert.Null(pwdResult.Error);
        Assert.Single(pwdResult.Logs.Stdout);
        Assert.Equal("/tmp", pwdResult.Logs.Stdout[0].Text);

        var start = DateTime.UtcNow;
        await sandbox.Commands.RunAsync(
            "sleep 30",
            options: new RunCommandOptions { Background = true });
        var elapsed = DateTime.UtcNow - start;
        Assert.True(elapsed.TotalSeconds < 10, "Background command should return quickly.");

        stdoutMessages = new ConcurrentBag<OutputMessage>();
        stderrMessages = new ConcurrentBag<OutputMessage>();
        errors = new ConcurrentBag<ExecutionError>();
        completedEvents = new ConcurrentBag<ExecutionComplete>();
        initEvents = new ConcurrentBag<ExecutionInit>();

        var failResult = await sandbox.Commands.RunAsync(
            "nonexistent-command-that-does-not-exist",
            handlers: new ExecutionHandlers
            {
                OnStdout = msg => { stdoutMessages.Add(msg); return Task.CompletedTask; },
                OnStderr = msg => { stderrMessages.Add(msg); return Task.CompletedTask; },
                OnError = err => { errors.Add(err); return Task.CompletedTask; },
                OnExecutionComplete = complete => { completedEvents.Add(complete); return Task.CompletedTask; },
                OnInit = init => { initEvents.Add(init); return Task.CompletedTask; }
            });

        Assert.NotNull(failResult.Error);
        Assert.Equal("CommandExecError", failResult.Error!.Name);
        Assert.True(failResult.Logs.Stderr.Count > 0);
        Assert.Contains(
            failResult.Logs.Stderr,
            msg => msg.Text.Contains("nonexistent-command-that-does-not-exist", StringComparison.Ordinal));
        AssertTerminalEventContract(initEvents, completedEvents, errors, failResult.Id!);
        Assert.Empty(completedEvents);
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Command_Status_And_Background_Logs()
    {
        var sandbox = _fixture.Sandbox;

        var execResult = await sandbox.Commands.RunAsync(
            "sh -c 'echo log-line-1; echo log-line-2; sleep 2'",
            options: new RunCommandOptions { Background = true });
        Assert.False(string.IsNullOrWhiteSpace(execResult.Id));
        var commandId = execResult.Id!;

        var status = await sandbox.Commands.GetCommandStatusAsync(commandId);
        Assert.Equal(commandId, status.Id);
        Assert.NotNull(status.Running);

        var logsText = new StringBuilder();
        long? cursor = null;
        for (var i = 0; i < 20; i++)
        {
            var logs = await sandbox.Commands.GetBackgroundCommandLogsAsync(commandId, cursor);
            logsText.Append(logs.Content);
            cursor = logs.Cursor ?? cursor;
            if (logsText.ToString().Contains("log-line-2", StringComparison.Ordinal))
            {
                break;
            }

            await Task.Delay(1000);
        }

        var finalLogs = logsText.ToString();
        Assert.Contains("log-line-1", finalLogs, StringComparison.Ordinal);
        Assert.Contains("log-line-2", finalLogs, StringComparison.Ordinal);
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Command_Env_Injection()
    {
        var sandbox = _fixture.Sandbox;
        var envKey = "OPEN_SANDBOX_E2E_CMD_ENV";
        var envValue = $"env-ok-{DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()}";
        var probeCommand =
            $"sh -c 'if [ -z \"${{{envKey}:-}}\" ]; then echo \"__EMPTY__\"; else echo \"${{{envKey}}}\"; fi'";

        var baseline = await sandbox.Commands.RunAsync(probeCommand);
        Assert.Null(baseline.Error);
        var baselineOutput = string.Join("\n", baseline.Logs.Stdout.Select(m => m.Text)).Trim();
        Assert.Equal("__EMPTY__", baselineOutput);

        var injected = await sandbox.Commands.RunAsync(
            probeCommand,
            options: new RunCommandOptions
            {
                Envs = new Dictionary<string, string>
                {
                    [envKey] = envValue,
                    ["OPEN_SANDBOX_E2E_SECOND_ENV"] = "second-ok"
                }
            });
        Assert.Null(injected.Error);
        var injectedOutput = string.Join("\n", injected.Logs.Stdout.Select(m => m.Text)).Trim();
        Assert.Equal(envValue, injectedOutput);
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Filesystem_Operations_CRUD_Replace_Move_Delete()
    {
        var sandbox = _fixture.Sandbox;

        var testDir1 = $"/tmp/fs_test1_{DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()}";
        var testDir2 = $"/tmp/fs_test2_{DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()}";

        await sandbox.Files.CreateDirectoriesAsync(new[]
        {
            new CreateDirectoryEntry { Path = testDir1, Mode = 755 },
            new CreateDirectoryEntry { Path = testDir2, Mode = 644 }
        });

        var dirInfo = await sandbox.Files.GetFileInfoAsync(new[] { testDir1, testDir2 });
        Assert.Equal(testDir1, dirInfo[testDir1].Path);
        Assert.Equal(755, dirInfo[testDir1].Mode);
        AssertTimesClose(dirInfo[testDir1].CreatedAt, dirInfo[testDir1].ModifiedAt, 2);

        var testFile1 = $"{testDir1}/test_file1.txt";
        var testFile2 = $"{testDir1}/test_file2.txt";
        var testFile3 = $"{testDir1}/test_file3.txt";
        var testContent = "Hello Filesystem! Line 2. Line 3.";

        await sandbox.Files.WriteFilesAsync(new[]
        {
            new WriteEntry { Path = testFile1, Data = testContent, Mode = 644 },
            new WriteEntry { Path = testFile2, Data = Encoding.UTF8.GetBytes(testContent), Mode = 755 },
            new WriteEntry { Path = testFile3, Data = new MemoryStream(Encoding.UTF8.GetBytes(testContent)), Mode = 755 }
        });

        var readContent1 = await sandbox.Files.ReadFileAsync(
            testFile1,
            new ReadFileOptions { Encoding = "utf-8" });
        var readContent1Partial = await sandbox.Files.ReadFileAsync(
            testFile1,
            new ReadFileOptions { Encoding = "utf-8", Range = "bytes=0-9" });
        var readBytes2 = await sandbox.Files.ReadBytesAsync(testFile2);
        var readContent2 = Encoding.UTF8.GetString(readBytes2);

        var chunks = new List<byte>();
        await foreach (var chunk in sandbox.Files.ReadBytesStreamAsync(testFile3))
        {
            chunks.AddRange(chunk);
        }

        var readContent3 = Encoding.UTF8.GetString(chunks.ToArray());

        Assert.Equal(testContent, readContent1);
        Assert.Equal(testContent, readContent2);
        Assert.Equal(testContent, readContent3);
        Assert.Equal(testContent.Substring(0, 10), readContent1Partial);

        var fileInfoMap = await sandbox.Files.GetFileInfoAsync(new[] { testFile1, testFile2, testFile3 });
        var expectedSize = Encoding.UTF8.GetBytes(testContent).Length;
        Assert.Equal(expectedSize, fileInfoMap[testFile1].Size);
        Assert.Equal(expectedSize, fileInfoMap[testFile2].Size);
        Assert.Equal(expectedSize, fileInfoMap[testFile3].Size);
        AssertTimesClose(fileInfoMap[testFile1].CreatedAt, fileInfoMap[testFile1].ModifiedAt, 2);

        var found = new HashSet<string>();
        var searchResults = await sandbox.Files.SearchAsync(new SearchEntry { Path = testDir1, Pattern = "*" });
        foreach (var entry in searchResults)
        {
            found.Add(entry.Path);
        }
        Assert.Equal(new HashSet<string> { testFile1, testFile2, testFile3 }, found);

        await sandbox.Files.SetPermissionsAsync(new[]
        {
            new SetPermissionEntry { Path = testFile1, Mode = 755 },
            new SetPermissionEntry { Path = testFile2, Mode = 600 }
        });

        var updatedInfo = await sandbox.Files.GetFileInfoAsync(new[] { testFile1, testFile2 });
        Assert.Equal(755, updatedInfo[testFile1].Mode);
        Assert.Equal(600, updatedInfo[testFile2].Mode);

        var beforeUpdate = (await sandbox.Files.GetFileInfoAsync(new[] { testFile1 }))[testFile1];
        var updatedContent1 = testContent + " Appended line.";
        await Task.Delay(50);
        await sandbox.Files.WriteFilesAsync(new[]
        {
            new WriteEntry { Path = testFile1, Data = updatedContent1, Mode = 644 }
        });

        var newContent1 = await sandbox.Files.ReadFileAsync(testFile1, new ReadFileOptions { Encoding = "utf-8" });
        Assert.Equal(updatedContent1, newContent1);
        var afterUpdate = (await sandbox.Files.GetFileInfoAsync(new[] { testFile1 }))[testFile1];
        AssertModifiedUpdated(beforeUpdate.ModifiedAt, afterUpdate.ModifiedAt, 1, 1000);

        await Task.Delay(50);
        await sandbox.Files.ReplaceContentsAsync(new[]
        {
            new ContentReplaceEntry
            {
                Path = testFile1,
                OldContent = "Appended line.",
                NewContent = "Replaced line."
            }
        });

        var replaced = await sandbox.Files.ReadFileAsync(testFile1, new ReadFileOptions { Encoding = "utf-8" });
        Assert.Contains("Replaced line.", replaced, StringComparison.Ordinal);
        Assert.DoesNotContain("Appended line.", replaced, StringComparison.Ordinal);

        var movedPath = $"{testDir2}/moved_file3.txt";
        await sandbox.Files.MoveFilesAsync(new[] { new MoveEntry { Src = testFile3, Dest = movedPath } });
        var movedBytes = await sandbox.Files.ReadBytesAsync(movedPath);
        Assert.Equal(testContent, Encoding.UTF8.GetString(movedBytes));
        await Assert.ThrowsAnyAsync<Exception>(() => sandbox.Files.ReadBytesAsync(testFile3));

        await sandbox.Files.DeleteFilesAsync(new[] { testFile2 });
        await Assert.ThrowsAnyAsync<Exception>(() => sandbox.Files.ReadFileAsync(testFile2));

        await sandbox.Files.DeleteDirectoriesAsync(new[] { testDir1, testDir2 });
        var verify = await sandbox.Commands.RunAsync(
            $"test ! -d {testDir1} && test ! -d {testDir2} && echo OK",
            options: new RunCommandOptions { WorkingDirectory = "/tmp" });
        Assert.Null(verify.Error);
        Assert.Single(verify.Logs.Stdout);
        Assert.Equal("OK", verify.Logs.Stdout[0].Text);
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Command_Interrupt()
    {
        var sandbox = _fixture.Sandbox;

        var initEvents = new ConcurrentBag<ExecutionInit>();
        var completedEvents = new ConcurrentBag<ExecutionComplete>();
        var errors = new ConcurrentBag<ExecutionError>();
        var initLatch = new TaskCompletionSource<string>(TaskCreationOptions.RunContinuationsAsynchronously);

        var handlers = new ExecutionHandlers
        {
            OnInit = init =>
            {
                initEvents.Add(init);
                initLatch.TrySetResult(init.Id);
                return Task.CompletedTask;
            },
            OnExecutionComplete = complete => { completedEvents.Add(complete); return Task.CompletedTask; },
            OnError = err => { errors.Add(err); return Task.CompletedTask; }
        };

        var executionTask = sandbox.Commands.RunAsync("sleep 30", handlers: handlers);
        var id = await initLatch.Task.WaitAsync(TimeSpan.FromSeconds(15));

        await Task.Delay(2000);
        await sandbox.Commands.InterruptAsync(id);

        var result = await executionTask.WaitAsync(TimeSpan.FromSeconds(30));
        Assert.Equal(id, result.Id);
        Assert.True((completedEvents.Count > 0) ^ (errors.Count > 0));
        Assert.True(result.Error != null || result.Logs.Stderr.Count > 0);
    }

    [Fact(Timeout = 5 * 60 * 1000)]
    public async Task Sandbox_Pause_And_Resume()
    {
        var sandbox = _fixture.Sandbox;

        await Task.Delay(5000);
        await sandbox.PauseAsync();

        var pausedInfo = await WaitForStateAsync(sandbox, SandboxStates.Paused, TimeSpan.FromMinutes(5));
        Assert.Equal(SandboxStates.Paused, pausedInfo.Status.State);

        var healthy = true;
        for (var i = 0; i < 10; i++)
        {
            healthy = await sandbox.IsHealthyAsync();
            if (!healthy)
            {
                break;
            }
            await Task.Delay(500);
        }
        Assert.False(healthy, "Sandbox should be unhealthy after pause.");

        var resumed = await sandbox.ResumeAsync(new SandboxResumeOptions
        {
            ReadyTimeoutSeconds = 60,
            HealthCheckPollingInterval = 1000
        });

        var resumedInfo = await WaitForStateAsync(resumed, SandboxStates.Running, TimeSpan.FromMinutes(3));
        Assert.Equal(SandboxStates.Running, resumedInfo.Status.State);

        var isHealthy = false;
        for (var i = 0; i < 30; i++)
        {
            isHealthy = await resumed.IsHealthyAsync();
            if (isHealthy)
            {
                break;
            }
            await Task.Delay(1000);
        }
        Assert.True(isHealthy, "Sandbox should be healthy after resume.");

        // Smoke-check command path after resume to ensure execd adapter is usable.
        var echo = await resumed.Commands.RunAsync("echo resume-ok");
        Assert.Null(echo.Error);
        Assert.Single(echo.Logs.Stdout);
        Assert.Equal("resume-ok", echo.Logs.Stdout[0].Text);
    }

    private static void AssertRecentTimestampMs(long ts, long toleranceMs)
    {
        Assert.True(ts > 0);
        var delta = Math.Abs(DateTimeOffset.UtcNow.ToUnixTimeMilliseconds() - ts);
        Assert.True(delta <= toleranceMs, $"timestamp too far from now: delta={delta}ms (ts={ts})");
    }

    private static void AssertEndpointHasPort(string endpoint, int expectedPort)
    {
        Assert.False(endpoint.Contains("://", StringComparison.Ordinal), $"unexpected scheme in endpoint: {endpoint}");
        if (endpoint.Contains('/'))
        {
            Assert.EndsWith($"/{expectedPort}", endpoint, StringComparison.Ordinal);
            Assert.False(string.IsNullOrWhiteSpace(endpoint.Split('/', 2)[0]));
            return;
        }

        var parts = endpoint.Split(':');
        Assert.True(parts.Length >= 2, $"missing host:port in endpoint: {endpoint}");
        var port = parts[^1];
        Assert.True(int.TryParse(port, out var parsed));
        Assert.Equal(expectedPort, parsed);
    }

    private static void AssertTimesClose(DateTime? createdAt, DateTime? modifiedAt, double toleranceSeconds)
    {
        Assert.NotNull(createdAt);
        Assert.NotNull(modifiedAt);
        var delta = Math.Abs((modifiedAt!.Value - createdAt!.Value).TotalSeconds);
        Assert.True(delta <= toleranceSeconds, $"created/modified skew too large: {delta}s");
    }

    private static void AssertModifiedUpdated(DateTime? before, DateTime? after, int minDeltaMs, int allowSkewMs)
    {
        Assert.NotNull(before);
        Assert.NotNull(after);
        var deltaMs = (after!.Value - before!.Value).TotalMilliseconds;
        Assert.True(deltaMs >= minDeltaMs - allowSkewMs, $"modified_at did not update as expected: delta_ms={deltaMs}");
    }

    private static void AssertTerminalEventContract(
        IEnumerable<ExecutionInit> initEvents,
        IEnumerable<ExecutionComplete> completedEvents,
        IEnumerable<ExecutionError> errors,
        string executionId)
    {
        var initList = initEvents.ToList();
        var completeList = completedEvents.ToList();
        var errorList = errors.ToList();

        Assert.Single(initList);
        Assert.False(string.IsNullOrWhiteSpace(initList[0].Id));
        Assert.Equal(executionId, initList[0].Id);
        AssertRecentTimestampMs(initList[0].Timestamp, 120_000);

        var hasComplete = completeList.Count > 0;
        var hasError = errorList.Count > 0;
        Assert.True(hasComplete || hasError);

        if (hasComplete)
        {
            Assert.Single(completeList);
            AssertRecentTimestampMs(completeList[0].Timestamp, 180_000);
            Assert.True(completeList[0].ExecutionTimeMs >= 0);
        }

        if (hasError)
        {
            Assert.False(string.IsNullOrWhiteSpace(errorList[0].Name));
            Assert.False(string.IsNullOrWhiteSpace(errorList[0].Value));
            AssertRecentTimestampMs(errorList[0].Timestamp, 180_000);
        }
    }

    private static async Task<SandboxInfo> WaitForStateAsync(
        Sandbox sandbox,
        string expectedState,
        TimeSpan timeout)
    {
        var deadline = DateTime.UtcNow + timeout;
        SandboxInfo info;
        while (true)
        {
            info = await sandbox.GetInfoAsync();
            if (info.Status.State == expectedState)
            {
                return info;
            }

            if (DateTime.UtcNow > deadline)
            {
                throw new TimeoutException($"Timed out waiting for state={expectedState}, last_state={info.Status.State}");
            }

            await Task.Delay(1000);
        }
    }
}

public sealed class SandboxE2ETestFixture : IAsyncLifetime
{
    private readonly E2ETestFixture _baseFixture = new();
    private Sandbox? _sandbox;

    public ConnectionConfig ConnectionConfig => _baseFixture.ConnectionConfig;
    public ConnectionConfig ServerProxyConnectionConfig => _baseFixture.ServerProxyConnectionConfig;
    public string DefaultImage => _baseFixture.DefaultImage;
    public int DefaultTimeoutSeconds => _baseFixture.DefaultTimeoutSeconds;
    public int DefaultReadyTimeoutSeconds => _baseFixture.DefaultReadyTimeoutSeconds;
    public Sandbox Sandbox => _sandbox ?? throw new InvalidOperationException("Sandbox is not initialized.");

    public async Task InitializeAsync()
    {
        _sandbox = await Sandbox.CreateAsync(new SandboxCreateOptions
        {
            ConnectionConfig = _baseFixture.ConnectionConfig,
            Image = _baseFixture.DefaultImage,
            TimeoutSeconds = _baseFixture.DefaultTimeoutSeconds,
            ReadyTimeoutSeconds = _baseFixture.DefaultReadyTimeoutSeconds,
            Metadata = new Dictionary<string, string> { ["tag"] = "csharp-e2e-test" },
            Env = new Dictionary<string, string> { ["E2E_TEST"] = "true" },
            HealthCheckPollingInterval = 500
        });
    }

    public async Task DisposeAsync()
    {
        if (_sandbox == null)
        {
            return;
        }

        try
        {
            await _sandbox.KillAsync();
        }
        catch
        {
        }

        await _sandbox.DisposeAsync();
    }
}


================================================
FILE: tests/csharp/OpenSandbox.E2ETests/SandboxManagerE2ETests.cs
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using OpenSandbox.Models;
using Xunit;

namespace OpenSandbox.E2ETests;

[Collection("CSharp E2E Tests")]
public class SandboxManagerE2ETests : IClassFixture<SandboxManagerE2ETestFixture>
{
    private readonly SandboxManagerE2ETestFixture _fixture;

    public SandboxManagerE2ETests(SandboxManagerE2ETestFixture fixture)
    {
        _fixture = fixture;
    }

    [Fact(Timeout = 10 * 60 * 1000)]
    public async Task ListSandboxInfos_StatesFilter_IsOrLogic()
    {
        var manager = _fixture.Manager;
        var tag = _fixture.Tag;
        var s1 = _fixture.S1;
        var s2 = _fixture.S2;
        var s3 = _fixture.S3;

        var result = await manager.ListSandboxInfosAsync(new SandboxFilter
        {
            States = new[] { SandboxStates.Running, SandboxStates.Paused },
            Metadata = new Dictionary<string, string> { ["tag"] = tag },
            PageSize = 50
        });

        var ids = result.Items.Select(info => info.Id).ToHashSet();
        Assert.Contains(s1.Id, ids);
        Assert.Contains(s2.Id, ids);
        Assert.Contains(s3.Id, ids);

        var pausedOnly = await manager.ListSandboxInfosAsync(new SandboxFilter
        {
            States = new[] { SandboxStates.Paused },
            Metadata = new Dictionary<string, string> { ["tag"] = tag },
            PageSize = 50
        });

        var pausedIds = pausedOnly.Items.Select(info => info.Id).ToHashSet();
        Assert.Contains(s3.Id, pausedIds);
        Assert.DoesNotContain(s1.Id, pausedIds);
        Assert.DoesNotContain(s2.Id, pausedIds);

        var runningOnly = await manager.ListSandboxInfosAsync(new SandboxFilter
        {
            States = new[] { SandboxStates.Running },
            Metadata = new Dictionary<string, string> { ["tag"] = tag },
            PageSize = 50
        });

        var runningIds = runningOnly.Items.Select(info => info.Id).ToHashSet();
        Assert.Contains(s1.Id, runningIds);
        Assert.Contains(s2.Id, runningIds);
        Assert.DoesNotContain(s3.Id, runningIds);
    }

    [Fact(Timeout = 10 * 60 * 1000)]
    public async Task ListSandboxInfos_MetadataFilter_IsAndLogic()
    {
        var manager = _fixture.Manager;
        var tag = _fixture.Tag;
        var s1 = _fixture.S1;
        var s2 = _fixture.S2;
        var s3 = _fixture.S3;

        var tagAndTeam = await manager.ListSandboxInfosAsync(new SandboxFilter
        {
            Metadata = new Dictionary<string, string> { ["tag"] = tag, ["team"] = "t1" },
            PageSize = 50
        });

        var tagAndTeamIds = tagAndTeam.Items.Select(info => info.Id).ToHashSet();
        Assert.Contains(s1.Id, tagAndTeamIds);
        Assert.Contains(s2.Id, tagAndTeamIds);
        Assert.DoesNotContain(s3.Id, tagAndTeamIds);

        var tagTeamEnv = await manager.ListSandboxInfosAsync(new SandboxFilter
        {
            Metadata = new Dictionary<string, string>
            {
                ["tag"] = tag,
                ["team"] = "t1",
                ["env"] = "prod"
            },
            PageSize = 50
        });

        var tagTeamEnvIds = tagTeamEnv.Items.Select(info => info.Id).ToHashSet();
        Assert.Contains(s1.Id, tagTeamEnvIds);
        Assert.DoesNotContain(s2.Id, tagTeamEnvIds);
        Assert.DoesNotContain(s3.Id, tagTeamEnvIds);

        var tagEnv = await manager.ListSandboxInfosAsync(new SandboxFilter
        {
            Metadata = new Dictionary<string, string>
            {
                ["tag"] = tag,
                ["env"] = "prod"
            },
            PageSize = 50
        });

        var tagEnvIds = tagEnv.Items.Select(info => info.Id).ToHashSet();
        Assert.Contains(s1.Id, tagEnvIds);
        Assert.Contains(s3.Id, tagEnvIds);
        Assert.DoesNotContain(s2.Id, tagEnvIds);

        var noneMatch = await manager.ListSandboxInfosAsync(new SandboxFilter
        {
            Metadata = new Dictionary<string, string>
            {
                ["tag"] = tag,
                ["team"] = "t2"
            },
            PageSize = 50
        });
        var createdIds = new HashSet<string> { s1.Id, s2.Id, s3.Id };
        Assert.DoesNotContain(noneMatch.Items, info => createdIds.Contains(info.Id));
    }

    [Fact(Timeout = 2 * 60 * 1000)]
    public async Task Manager_InvalidSandboxOperations_ShouldFail()
    {
        var manager = _fixture.Manager;
        var fakeId = $"sandbox-not-exist-{Guid.NewGuid():N}";

        await Assert.ThrowsAnyAsync<Exception>(() => manager.GetSandboxInfoAsync(fakeId));
        await Assert.ThrowsAnyAsync<Exception>(() => manager.PauseSandboxAsync(fakeId));
        await Assert.ThrowsAnyAsync<Exception>(() => manager.ResumeSandboxAsync(fakeId));
        await Assert.ThrowsAnyAsync<Exception>(() => manager.KillSandboxAsync(fakeId));
        await Assert.ThrowsAnyAsync<Exception>(() => manager.RenewSandboxAsync(fakeId, 300));
    }
}

public sealed class SandboxManagerE2ETestFixture : IAsyncLifetime
{
    private readonly E2ETestFixture _baseFixture = new();
    private SandboxManager? _manager;
    private Sandbox? _s1;
    private Sandbox? _s2;
    private Sandbox? _s3;
    private string? _tag;

    public SandboxManager Manager => _manager ?? throw new InvalidOperationException("Manager is not initialized.");
    public Sandbox S1 => _s1 ?? throw new InvalidOperationException("S1 is not initialized.");
    public Sandbox S2 => _s2 ?? throw new InvalidOperationException("S2 is not initialized.");
    public Sandbox S3 => _s3 ?? throw new InvalidOperationException("S3 is not initialized.");
    public string Tag => _tag ?? throw new InvalidOperationException("Tag is not initialized.");

    public async Task InitializeAsync()
    {
        _manager = SandboxManager.Create(new SandboxManagerOptions
        {
            ConnectionConfig = _baseFixture.ConnectionConfig
        });

        _tag = $"csharp-manager-{Guid.NewGuid():N}"[..20];

        _s1 = await CreateSandboxAsync(new Dictionary<string, string>
        {
            ["tag"] = _tag,
            ["team"] = "t1",
            ["env"] = "prod"
        });

        _s2 = await CreateSandboxAsync(new Dictionary<string, string>
        {
            ["tag"] = _tag,
            ["team"] = "t1",
            ["env"] = "dev"
        });

        _s3 = await CreateSandboxAsync(new Dictionary<string, string>
        {
            ["tag"] = _tag,
            ["env"] = "prod"
        });

        await _manager.PauseSandboxAsync(_s3.Id);
        await WaitForStateAsync(_s3.Id, SandboxStates.Paused, TimeSpan.FromMinutes(3));
    }

    public async Task DisposeAsync()
    {
        foreach (var sandbox in new[] { _s1, _s2, _s3 })
        {
            if (sandbox == null)
            {
                continue;
            }

            try
            {
                await sandbox.KillAsync();
            }
            catch
            {
            }

            await sandbox.DisposeAsync();
        }

        if (_manager != null)
        {
            await _manager.DisposeAsync();
        }
    }

    private async Task<Sandbox> CreateSandboxAsync(IReadOnlyDictionary<string, string> metadata)
    {
        return await Sandbox.CreateAsync(new SandboxCreateOptions
        {
            ConnectionConfig = _baseFixture.ConnectionConfig,
            Image = _baseFixture.DefaultImage,
            TimeoutSeconds = _baseFixture.DefaultTimeoutSeconds,
            ReadyTimeoutSeconds = _baseFixture.DefaultReadyTimeoutSeconds,
            Metadata = metadata,
            Env = new Dictionary<string, string> { ["E2E_TEST"] = "true" },
            HealthCheckPollingInterval = 500
        });
    }

    private async Task WaitForStateAsync(string sandboxId, string expectedState, TimeSpan timeout)
    {
        var deadline = DateTime.UtcNow + timeout;
        while (true)
        {
            var info = await Manager.GetSandboxInfoAsync(sandboxId);
            if (info.Status.State == expectedState)
            {
                return;
            }

            if (DateTime.UtcNow > deadline)
            {
                throw new TimeoutException($"Timed out waiting for state={expectedState}, last_state={info.Status.State}");
            }

            await Task.Delay(1000);
        }
    }
}


================================================
FILE: tests/java/build.gradle.kts
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

plugins {
    java
    alias(libs.plugins.spotless)
}

group = "com.alibaba.opensandbox"
version = "1.0.0"

java {
    sourceCompatibility = JavaVersion.VERSION_17
    targetCompatibility = JavaVersion.VERSION_17
}

repositories {
    mavenLocal()
    exclusiveContent {
        forRepository {
            mavenLocal()
        }
        filter {
            includeGroup("com.alibaba.opensandbox")
        }
    }
    mavenCentral()
}

configurations.configureEach {
    resolutionStrategy.cacheDynamicVersionsFor(0, "seconds")
    resolutionStrategy.cacheChangingModulesFor(0, "seconds")
}

dependencies {
    // OpenSandbox Kotlin SDKs
    testImplementation("com.alibaba.opensandbox:sandbox:latest.integration")
    testImplementation("com.alibaba.opensandbox:code-interpreter:latest.integration")

    // Test frameworks
    testImplementation("org.junit.jupiter:junit-jupiter:5.9.2")
    testRuntimeOnly("org.junit.platform:junit-platform-launcher:1.13.4")
}

tasks.withType<Test> {
    useJUnitPlatform()
}

tasks.register<Test>("e2eTest") {
    description = "Runs end-to-end tests."
    group = "verification"

    useJUnitPlatform {
        includeTags("e2e")
    }
}

spotless {
    java {
        googleJavaFormat("1.19.2").aosp()
        removeUnusedImports()
        trimTrailingWhitespace()
        endWithNewline()
    }
}


================================================
FILE: tests/java/gradle/libs.versions.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[versions]
spotless = "6.23.3"
google-java-format = "1.19.2"

[plugins]
spotless = { id = "com.diffplug.spotless", version.ref = "spotless" }


================================================
FILE: tests/java/gradle/wrapper/gradle-wrapper.properties
================================================
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-9.2.1-all.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists


================================================
FILE: tests/java/gradle.properties
================================================
org.gradle.jvmargs=-Xmx2g -XX:MaxMetaspaceSize=512m
org.gradle.parallel=true
org.gradle.caching=true


================================================
FILE: tests/java/gradlew
================================================
#!/bin/sh

#
# Copyright © 2015 the original authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
#

##############################################################################
#
#   Gradle start up script for POSIX generated by Gradle.
#
#   Important for running:
#
#   (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
#       noncompliant, but you have some other compliant shell such as ksh or
#       bash, then to run this script, type that shell name before the whole
#       command line, like:
#
#           ksh Gradle
#
#       Busybox and similar reduced shells will NOT work, because this script
#       requires all of these POSIX shell features:
#         * functions;
#         * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
#           «${var#prefix}», «${var%suffix}», and «$( cmd )»;
#         * compound commands having a testable exit status, especially «case»;
#         * various built-in commands including «command», «set», and «ulimit».
#
#   Important for patching:
#
#   (2) This script targets any POSIX shell, so it avoids extensions provided
#       by Bash, Ksh, etc; in particular arrays are avoided.
#
#       The "traditional" practice of packing multiple parameters into a
#       space-separated string is a well documented source of bugs and security
#       problems, so this is (mostly) avoided, by progressively accumulating
#       options in "$@", and eventually passing that to Java.
#
#       Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
#       and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
#       see the in-line comments for details.
#
#       There are tweaks for specific operating systems such as AIX, CygWin,
#       Darwin, MinGW, and NonStop.
#
#   (3) This script is generated from the Groovy template
#       https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
#       within the Gradle project.
#
#       You can find Gradle at https://github.com/gradle/gradle/.
#
##############################################################################

# Attempt to set APP_HOME

# Resolve links: $0 may be a link
app_path=$0

# Need this for daisy-chained symlinks.
while
    APP_HOME=${app_path%"${app_path##*/}"}  # leaves a trailing /; empty if no leading path
    [ -h "$app_path" ]
do
    ls=$( ls -ld "$app_path" )
    link=${ls#*' -> '}
    case $link in             #(
      /*)   app_path=$link ;; #(
      *)    app_path=$APP_HOME$link ;;
    esac
done

# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum

warn () {
    echo "$*"
} >&2

die () {
    echo
    echo "$*"
    echo
    exit 1
} >&2

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "$( uname )" in                #(
  CYGWIN* )         cygwin=true  ;; #(
  Darwin* )         darwin=true  ;; #(
  MSYS* | MINGW* )  msys=true    ;; #(
  NONSTOP* )        nonstop=true ;;
esac


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD=$JAVA_HOME/jre/sh/java
    else
        JAVACMD=$JAVA_HOME/bin/java
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD=java
    if ! command -v java >/dev/null 2>&1
    then
        die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
fi

# Increase the maximum file descriptors if we can.
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
    case $MAX_FD in #(
      max*)
        # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
        # shellcheck disable=SC2039,SC3045
        MAX_FD=$( ulimit -H -n ) ||
            warn "Could not query maximum file descriptor limit"
    esac
    case $MAX_FD in  #(
      '' | soft) :;; #(
      *)
        # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
        # shellcheck disable=SC2039,SC3045
        ulimit -n "$MAX_FD" ||
            warn "Could not set maximum file descriptor limit to $MAX_FD"
    esac
fi

# Collect all arguments for the java command, stacking in reverse order:
#   * args from the command line
#   * the main class name
#   * -classpath
#   * -D...appname settings
#   * --module-path (only if needed)
#   * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.

# For Cygwin or MSYS, switch paths to Windows format before running java
if "$cygwin" || "$msys" ; then
    APP_HOME=$( cygpath --path --mixed "$APP_HOME" )

    JAVACMD=$( cygpath --unix "$JAVACMD" )

    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    for arg do
        if
            case $arg in                                #(
              -*)   false ;;                            # don't mess with options #(
              /?*)  t=${arg#/} t=/${t%%/*}              # looks like a POSIX filepath
                    [ -e "$t" ] ;;                      #(
              *)    false ;;
            esac
        then
            arg=$( cygpath --path --ignore --mixed "$arg" )
        fi
        # Roll the args list around exactly as many times as the number of
        # args, so each arg winds up back in the position where it started, but
        # possibly modified.
        #
        # NB: a `for` loop captures its iteration list before it begins, so
        # changing the positional parameters here affects neither the number of
        # iterations, nor the values presented in `arg`.
        shift                   # remove old arg
        set -- "$@" "$arg"      # push replacement arg
    done
fi


# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Collect all arguments for the java command:
#   * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
#     and any embedded shellness will be escaped.
#   * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
#     treated as '${Hostname}' itself on the command line.

set -- \
        "-Dorg.gradle.appname=$APP_BASE_NAME" \
        -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \
        "$@"

# Stop when "xargs" is not available.
if ! command -v xargs >/dev/null 2>&1
then
    die "xargs is not available"
fi

# Use "xargs" to parse quoted args.
#
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
#
# In Bash we could simply go:
#
#   readarray ARGS < <( xargs -n1 <<<"$var" ) &&
#   set -- "${ARGS[@]}" "$@"
#
# but POSIX shell has neither arrays nor command substitution, so instead we
# post-process each arg (as a line of input to sed) to backslash-escape any
# character that might be a shell metacharacter, then use eval to reverse
# that process (while maintaining the separation between arguments), and wrap
# the whole thing up as a single "set" statement.
#
# This will of course break if any of these variables contains a newline or
# an unmatched quote.
#

eval "set -- $(
        printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
        xargs -n1 |
        sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
        tr '\n' ' '
    )" '"$@"'

exec "$JAVACMD" "$@"


================================================
FILE: tests/java/settings.gradle.kts
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

rootProject.name = "opensandbox-java-e2e-tests"


================================================
FILE: tests/java/src/test/java/com/alibaba/opensandbox/e2e/BaseE2ETest.java
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.e2e;

import static org.junit.jupiter.api.Assertions.*;

import com.alibaba.opensandbox.sandbox.config.ConnectionConfig;
import java.io.IOException;
import java.io.InputStream;
import java.time.Duration;
import java.time.OffsetDateTime;
import java.util.*;
import org.junit.jupiter.api.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Base class for all E2E tests providing common setup and teardown functionality. */
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
public abstract class BaseE2ETest {

    protected static final Logger logger = LoggerFactory.getLogger(BaseE2ETest.class);

    // ==========================================
    // Configuration Keys
    // ==========================================
    private static final String PROP_API_KEY = "opensandbox.test.api.key";
    private static final String PROP_DOMAIN = "opensandbox.test.domain";
    private static final String PROP_PROTOCOL = "opensandbox.test.protocol";
    private static final String PROP_IMG_DEFAULT = "opensandbox.sandbox.default.image";

    // ==========================================
    // Shared State (Static)
    // ==========================================
    protected static final Properties testProperties = new Properties();
    protected static ConnectionConfig sharedConnectionConfig;

    static {
        loadTestProperties();
        initializeSharedConfig();
    }

    protected static String getSandboxImage() {
        return testProperties.getProperty(PROP_IMG_DEFAULT);
    }

    protected static ConnectionConfig createConnectionConfig(boolean useServerProxy) {
        String protocol = testProperties.getProperty(PROP_PROTOCOL, "https");
        return ConnectionConfig.builder()
                .apiKey(testProperties.getProperty(PROP_API_KEY))
                .domain(testProperties.getProperty(PROP_DOMAIN))
                .requestTimeout(Duration.ofMinutes(1))
                .protocol(protocol)
                .useServerProxy(useServerProxy)
                .build();
    }

    private static void loadTestProperties() {
        try (InputStream input =
                BaseE2ETest.class.getClassLoader().getResourceAsStream("test.properties")) {
            if (input != null) {
                testProperties.load(input);
            } else {
                logger.warn("test.properties file not found, using default values.");
            }
        } catch (IOException e) {
            throw new RuntimeException("Failed to load test properties", e);
        }
    }

    private static void initializeSharedConfig() {
        String protocol = testProperties.getProperty(PROP_PROTOCOL, "https");
        sharedConnectionConfig =
                ConnectionConfig.builder()
                        .apiKey(testProperties.getProperty(PROP_API_KEY))
                        .domain(testProperties.getProperty(PROP_DOMAIN))
                        .requestTimeout(Duration.ofMinutes(1))
                        .protocol(protocol)
                        .build();
    }

    @BeforeEach
    void beforeEach(TestInfo testInfo) {
        logger.info("=== Starting test: {} ===", testInfo.getDisplayName());
    }

    // ==========================================
    // Shared assertion helpers (ported from python e2e style)
    // ==========================================
    protected static long nowMs() {
        return System.currentTimeMillis();
    }

    protected static void assertRecentTimestampMs(long ts, long toleranceMs) {
        assertTrue(ts > 0, "timestamp must be > 0");
        long delta = Math.abs(nowMs() - ts);
        assertTrue(
                delta <= toleranceMs,
                "timestamp too far from now: delta=" + delta + "ms (ts=" + ts + ")");
    }

    protected static void assertEndpointHasPort(String endpoint, int expectedPort) {
        assertNotNull(endpoint);
        assertFalse(endpoint.contains("://"), "unexpected scheme in endpoint: " + endpoint);
        if (endpoint.contains("/")) {
            assertTrue(
                    endpoint.endsWith("/" + expectedPort),
                    "endpoint route must end with /" + expectedPort + ": " + endpoint);
            String prefix = endpoint.split("/", 2)[0];
            assertFalse(prefix.isBlank(), "missing domain in endpoint: " + endpoint);
            return;
        }
        int idx = endpoint.lastIndexOf(':');
        assertTrue(idx > 0, "missing host:port in endpoint: " + endpoint);
        String host = endpoint.substring(0, idx);
        String port = endpoint.substring(idx + 1);
        assertFalse(host.isBlank(), "missing host in endpoint: " + endpoint);
        assertTrue(port.matches("\\d+"), "non-numeric port in endpoint: " + endpoint);
        assertEquals(expectedPort, Integer.parseInt(port), "endpoint port mismatch: " + endpoint);
    }

    protected static void assertTimesClose(
            OffsetDateTime createdAt, OffsetDateTime modifiedAt, long toleranceSeconds) {
        long delta = Math.abs(Duration.between(createdAt, modifiedAt).getSeconds());
        assertTrue(delta <= toleranceSeconds, "created/modified skew too large: " + delta + "s");
    }
}


================================================
FILE: tests/java/src/test/java/com/alibaba/opensandbox/e2e/CodeInterpreterE2ETest.java
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.e2e;

import static org.junit.jupiter.api.Assertions.*;

import com.alibaba.opensandbox.codeinterpreter.CodeInterpreter;
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.CodeContext;
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.RunCodeRequest;
import com.alibaba.opensandbox.codeinterpreter.domain.models.execd.executions.SupportedLanguage;
import com.alibaba.opensandbox.sandbox.Sandbox;
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.*;
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.*;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.*;
import org.junit.jupiter.api.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Comprehensive E2E tests for CodeInterpreter runCode functionality.
 *
 * <p>Tests code execution capabilities including: - Multi-language code execution (Java, Python,
 * Go, TypeScript) - Session state management and variable persistence - Context isolation between
 * different execution contexts - Error handling and recovery mechanisms - Event handling patterns
 * identical to runCommand
 *
 * <p>Uses the shared CodeInterpreter instance from BaseE2ETest.
 */
@Tag("e2e")
@DisplayName("CodeInterpreter E2E Tests - RunCode Functionality")
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class CodeInterpreterE2ETest extends BaseE2ETest {

    protected static final Logger logger = LoggerFactory.getLogger(CodeInterpreterE2ETest.class);

    private Sandbox sandbox;
    private CodeInterpreter codeInterpreter;

    private static void assertTerminalEventContract(
            List<ExecutionInit> initEvents,
            List<ExecutionComplete> completedEvents,
            List<ExecutionError> errors,
            String executionId) {
        assertEquals(1, initEvents.size(), "init event must exist exactly once");
        assertNotNull(initEvents.get(0).getId());
        assertFalse(initEvents.get(0).getId().isBlank());
        assertEquals(executionId, initEvents.get(0).getId());
        assertRecentTimestampMs(initEvents.get(0).getTimestamp(), 180_000);
        assertTrue(
                (!completedEvents.isEmpty()) || (!errors.isEmpty()),
                "expected at least one of complete/error");
        if (!completedEvents.isEmpty()) {
            assertEquals(1, completedEvents.size());
            assertRecentTimestampMs(completedEvents.get(0).getTimestamp(), 180_000);
            assertTrue(completedEvents.get(0).getExecutionTimeInMillis() >= 0);
        }
        if (!errors.isEmpty()) {
            assertNotNull(errors.get(0).getName());
            assertFalse(errors.get(0).getName().isBlank());
            assertNotNull(errors.get(0).getValue());
            assertRecentTimestampMs(errors.get(0).getTimestamp(), 180_000);
        }
    }

    @BeforeAll
    void setup() {
        Volume volume =
                Volume.builder()
                        .name("execd-logs")
                        .host(Host.of("/tmp/opensandbox-e2e/logs"))
                        .mountPath("/tmp/opensandbox-e2e/logs")
                        .readOnly(false)
                        .build();
        sandbox =
                Sandbox.builder()
                        .connectionConfig(sharedConnectionConfig)
                        .entrypoint(List.of("/opt/opensandbox/code-interpreter.sh"))
                        .image(getSandboxImage())
                        .resource(java.util.Map.of("cpu", "2", "memory", "4Gi"))
                        .timeout(Duration.ofMinutes(20))
                        .readyTimeout(Duration.ofSeconds(60))
                        .metadata(java.util.Map.of("tag", "e2e-code-interpreter"))
                        .env("E2E_TEST", "true")
                        .env("GO_VERSION", "1.25")
                        .env("JAVA_VERSION", "21")
                        .env("NODE_VERSION", "22")
                        .env("PYTHON_VERSION", "3.12")
                        .env("EXECD_LOG_FILE", "/tmp/opensandbox-e2e/logs/execd.log")
                        .healthCheckPollingInterval(Duration.ofMillis(500))
                        .volume(volume)
                        .build();
        codeInterpreter = CodeInterpreter.builder().fromSandbox(sandbox).build();
        assertNotNull(codeInterpreter);
        assertNotNull(codeInterpreter.getId());
    }

    @AfterAll
    void teardown() {
        if (sandbox != null) {
            try {
                sandbox.kill();
            } catch (Exception ignored) {
            }
            try {
                sandbox.close();
            } catch (Exception ignored) {
            }
        }
    }

    // ==========================================
    // Basic Code Execution Tests
    // ==========================================
    @Test
    @Order(1)
    @DisplayName("CodeInterpreter Creation and Basic Functionality")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testCodeInterpreterBasicFunctionality() {
        logger.info("Testing CodeInterpreter creation and basic functionality");

        assertNotNull(codeInterpreter);
        assertNotNull(codeInterpreter.getId());

        // 2. Verify service access
        assertNotNull(codeInterpreter.codes());
        assertNotNull(codeInterpreter.files());
        assertNotNull(codeInterpreter.commands());
        assertNotNull(codeInterpreter.metrics());
    }

    @Test
    @Order(2)
    @DisplayName("Java Code Execution")
    @Timeout(value = 10, unit = TimeUnit.MINUTES)
    void testJavaCodeExecution() {
        logger.info("Testing Java code execution");

        CodeContext javaContext = codeInterpreter.codes().createContext(SupportedLanguage.JAVA);

        assertNotNull(javaContext);
        assertNotNull(javaContext.getId());
        assertEquals("java", javaContext.getLanguage());

        // Event tracking for comprehensive validation
        List<OutputMessage> stdoutMessages = Collections.synchronizedList(new ArrayList<>());
        List<OutputMessage> stderrMessages = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionResult> results = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionError> errors = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionComplete> completedEvents = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionInit> initEvents = Collections.synchronizedList(new ArrayList<>());

        ExecutionHandlers handlers =
                ExecutionHandlers.builder()
                        .onStdout(
                                (OutputMessage msg) -> {
                                    stdoutMessages.add(msg);
                                    logger.info("Java stdout: {}", msg.getText());
                                })
                        .onStderr(
                                (OutputMessage msg) -> {
                                    stderrMessages.add(msg);
                                    logger.warn("Java stderr: {}", msg.getText());
                                })
                        .onResult(
                                (ExecutionResult result) -> {
                                    results.add(result);
                                    logger.info("Java result: {}", result.getText());
                                })
                        .onExecutionComplete(
                                (ExecutionComplete complete) -> {
                                    completedEvents.add(complete);
                                    logger.info(
                                            "Java execution completed in {} ms",
                                            complete.getExecutionTimeInMillis());
                                })
                        .onError(
                                (ExecutionError error) -> {
                                    errors.add(error);
                                    logger.error(
                                            "Java error: {} - {}",
                                            error.getName(),
                                            error.getValue());
                                })
                        .onInit(
                                (ExecutionInit init) -> {
                                    initEvents.add(init);
                                    logger.info(
                                            "Java execution initialized with ID: {}", init.getId());
                                })
                        .build();

        RunCodeRequest simpleRequest =
                RunCodeRequest.builder()
                        .code(
                                "System.out.println(\"Hello from Java!\");\n"
                                        + "int result = 2 + 2;\n"
                                        + "System.out.println(\"2 + 2 = \" + result);\n"
                                        + "result")
                        .context(javaContext)
                        .handlers(handlers)
                        .build();

        Execution simpleResult = codeInterpreter.codes().run(simpleRequest);

        assertNotNull(simpleResult);
        assertNotNull(simpleResult.getId());
        assertFalse(simpleResult.getId().isBlank());
        assertEquals("4", simpleResult.getResult().get(0).getText());
        assertTerminalEventContract(initEvents, completedEvents, errors, simpleResult.getId());
        assertTrue(errors.isEmpty());
        assertTrue(stdoutMessages.stream().anyMatch(m -> m.getText().contains("Hello from Java!")));
        assertTrue(
                stdoutMessages.stream()
                        .anyMatch(m -> m.getText().replace(" ", "").contains("2+2=4")));

        RunCodeRequest varRequest =
                RunCodeRequest.builder()
                        .code(
                                "import java.util.*;\n"
                                        + "List<Integer> numbers = Arrays.asList(1, 2, 3, 4, 5);\n"
                                        + "int sum ="
                                        + " numbers.stream().mapToInt(Integer::intValue).sum();\n"
                                        + "System.out.println(\"Numbers: \" + numbers);\n"
                                        + "System.out.println(\"Sum: \" + sum);\n"
                                        + "result")
                        .context(javaContext)
                        .build();

        Execution varResult = codeInterpreter.codes().run(varRequest);

        assertNotNull(varResult);
        assertNotNull(varResult.getId());
        assertEquals("4", varResult.getResult().get(0).getText());

        // 3. Java error handling test (mutually exclusive contract)
        stdoutMessages.clear();
        stderrMessages.clear();
        results.clear();
        errors.clear();
        completedEvents.clear();
        initEvents.clear();
        RunCodeRequest errorRequest =
                RunCodeRequest.builder()
                        .code("int x = 10 / 0; // This will cause ArithmeticException")
                        .context(javaContext)
                        .handlers(handlers)
                        .build();

        Execution errorResult = codeInterpreter.codes().run(errorRequest);

        assertNotNull(errorResult);
        assertNotNull(errorResult.getId());
        assertNotNull(errorResult.getError());
        assertEquals("EvalException", errorResult.getError().getName());
        assertTerminalEventContract(initEvents, completedEvents, errors, errorResult.getId());
    }

    @Test
    @Order(3)
    @DisplayName("Python Code Execution")
    @Timeout(value = 10, unit = TimeUnit.MINUTES)
    void testPythonCodeExecution() {
        logger.info("Testing Python code execution");

        // Use class-scoped interpreter (created in @BeforeAll)
        assertNotNull(codeInterpreter);
        CodeContext pythonContext = codeInterpreter.codes().createContext(SupportedLanguage.PYTHON);
        assertNotNull(pythonContext);
        assertEquals("python", pythonContext.getLanguage());
        Duration perExecTimeout = Duration.ofMinutes(2);

        // Event tracking
        List<OutputMessage> stdoutMessages = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionComplete> completedEvents = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionError> errors = Collections.synchronizedList(new ArrayList<>());

        ExecutionHandlers handlers =
                ExecutionHandlers.builder()
                        .onStdout(
                                (OutputMessage msg) -> {
                                    stdoutMessages.add(msg);
                                    logger.info("Python stdout: {}", msg.getText());
                                })
                        .onExecutionComplete(
                                (ExecutionComplete complete) -> {
                                    completedEvents.add(complete);
                                    logger.info(
                                            "Python execution completed in {} ms",
                                            complete.getExecutionTimeInMillis());
                                })
                        .onError(
                                (ExecutionError error) -> {
                                    errors.add(error);
                                    logger.error(
                                            "Python error: {} - {}",
                                            error.getName(),
                                            error.getValue());
                                })
                        .build();

        // 1. Simple Python execution
        RunCodeRequest simpleRequest =
                RunCodeRequest.builder()
                        .code(
                                "print('Hello from Python!')\n"
                                        + "result = 2 + 2\n"
                                        + "print(f'2 + 2 = {result}')")
                        .context(pythonContext)
                        .handlers(handlers)
                        .build();

        Execution simpleResult =
                runWithRetry(simpleRequest, perExecTimeout, 2, "python-simple-execution");

        assertNotNull(simpleResult);
        assertNotNull(simpleResult.getId());
        assertFalse(completedEvents.isEmpty());
        assertTrue(errors.isEmpty());

        // 2. Python with variables and state persistence
        RunCodeRequest varRequest =
                RunCodeRequest.builder()
                        .code(
                                "x = 42\n"
                                        + "y = 'persistent variable'\n"
                                        + "my_list = [1, 2, 3, 4, 5]\n"
                                        + "print(f'x={x}, y=\"{y}\", list={my_list}')\n"
                                        + "result")
                        .context(pythonContext)
                        .build();

        Execution varResult = runWithRetry(varRequest, perExecTimeout, 2, "python-state-setup");

        assertNotNull(varResult);
        assertNotNull(varResult.getId());
        assertEquals("4", varResult.getResult().get(0).getText());

        // 3. Test variable persistence across executions
        RunCodeRequest persistRequest =
                RunCodeRequest.builder()
                        .code(
                                "print(f'Previously set variables: x={x}, y={y}')\n"
                                        + "z = sum(my_list)\n"
                                        + "print(f'Sum of list: {z}')")
                        .context(pythonContext)
                        .build();

        Execution persistResult =
                runWithRetry(persistRequest, perExecTimeout, 2, "python-state-persistence");

        assertNotNull(persistResult);
        assertNotNull(persistResult.getId());

        // 4. Python error handling
        RunCodeRequest errorRequest =
                RunCodeRequest.builder()
                        .code("print(undefined_variable)  # This will cause NameError")
                        .context(pythonContext)
                        .handlers(handlers)
                        .build();

        Execution errorResult =
                runWithRetry(errorRequest, perExecTimeout, 2, "python-runtime-error");

        assertNotNull(errorResult);
        assertNotNull(errorResult.getId());
        assertTrue(
                errorResult.getError() != null || !errorResult.getLogs().getStderr().isEmpty(),
                "Python error execution should capture runtime errors");

        logger.info("Python code execution tests completed");
    }

    @Test
    @Order(4)
    @DisplayName("Go Code Execution")
    @Timeout(value = 10, unit = TimeUnit.MINUTES)
    void testGoCodeExecution() {
        logger.info("Testing Go code execution");

        assertNotNull(codeInterpreter);
        CodeContext goContext = codeInterpreter.codes().createContext(SupportedLanguage.GO);

        assertNotNull(goContext);
        assertEquals("go", goContext.getLanguage());

        // Event tracking
        List<OutputMessage> stdoutMessages = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionComplete> completedEvents = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionError> errors = Collections.synchronizedList(new ArrayList<>());

        ExecutionHandlers handlers =
                ExecutionHandlers.builder()
                        .onStdout(
                                (OutputMessage msg) -> {
                                    stdoutMessages.add(msg);
                                    logger.info("Go stdout: {}", msg.getText());
                                })
                        .onExecutionComplete(
                                (ExecutionComplete complete) -> {
                                    completedEvents.add(complete);
                                    logger.info(
                                            "Go execution completed in {} ms",
                                            complete.getExecutionTimeInMillis());
                                })
                        .onError(
                                (ExecutionError error) -> {
                                    errors.add(error);
                                    logger.error(
                                            "Go error: {} - {}", error.getName(), error.getValue());
                                })
                        .build();

        // 1. Simple Go execution
        RunCodeRequest simpleRequest =
                RunCodeRequest.builder()
                        .code(
                                "package main\n"
                                        + "func main() {\n"
                                        + "    println(\"Hello from Go!\")\n"
                                        + "    result := 2 + 2\n"
                                        + "    println(\"2 + 2 =\", result)\n"
                                        + "}")
                        .context(goContext)
                        .handlers(handlers)
                        .build();

        Execution simpleResult = codeInterpreter.codes().run(simpleRequest);

        assertNotNull(simpleResult);
        assertNotNull(simpleResult.getId());
        assertFalse(completedEvents.isEmpty());

        // 2. Go with data structures and functions
        RunCodeRequest dataRequest =
                RunCodeRequest.builder()
                        .code(
                                "package main\n"
                                        + "func calculate(numbers []int) int {\n"
                                        + "    sum := 0\n"
                                        + "    for _, num := range numbers {\n"
                                        + "        sum += num\n"
                                        + "    }\n"
                                        + "    return sum\n"
                                        + "}\n"
                                        + "func main() {\n"
                                        + "    numbers := []int{1, 2, 3, 4, 5}\n"
                                        + "    sum := calculate(numbers)\n"
                                        + "    println(\"Numbers:\", numbers)\n"
                                        + "    println(\"Sum:\", sum)\n"
                                        + "}")
                        .context(goContext)
                        .build();

        Execution dataResult = codeInterpreter.codes().run(dataRequest);

        assertNotNull(dataResult);
        assertNotNull(dataResult.getId());

        // 3. Go compilation error test
        RunCodeRequest errorRequest =
                RunCodeRequest.builder()
                        .code(
                                "package main\n"
                                        + "func main() {\n"
                                        + "    undeclaredVariable++  // This will cause compilation"
                                        + " error\n"
                                        + "}")
                        .context(goContext)
                        .handlers(handlers)
                        .build();

        Execution errorResult = codeInterpreter.codes().run(errorRequest);

        assertNotNull(errorResult);
        assertNotNull(errorResult.getId());
        assertTrue(
                errorResult.getError() != null || errorResult.getLogs().getStderr().size() > 0,
                "Go error execution should capture compilation errors");

        logger.info("Go code execution tests completed");
    }

    @Test
    @Order(5)
    @DisplayName("TypeScript Code Execution")
    @Timeout(value = 10, unit = TimeUnit.MINUTES)
    void testTypeScriptCodeExecution() {
        logger.info("Testing TypeScript code execution");

        assertNotNull(codeInterpreter);

        // Create TypeScript execution context
        CodeContext tsContext = codeInterpreter.codes().createContext(SupportedLanguage.TYPESCRIPT);

        assertNotNull(tsContext);
        assertEquals("typescript", tsContext.getLanguage());

        // Event tracking
        List<OutputMessage> stdoutMessages = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionComplete> completedEvents = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionError> errors = Collections.synchronizedList(new ArrayList<>());

        ExecutionHandlers handlers =
                ExecutionHandlers.builder()
                        .onStdout(
                                (OutputMessage msg) -> {
                                    stdoutMessages.add(msg);
                                    logger.info("TypeScript stdout: {}", msg.getText());
                                })
                        .onExecutionComplete(
                                (ExecutionComplete complete) -> {
                                    completedEvents.add(complete);
                                    logger.info(
                                            "TypeScript execution completed in {} ms",
                                            complete.getExecutionTimeInMillis());
                                })
                        .onError(
                                (ExecutionError error) -> {
                                    errors.add(error);
                                    logger.error(
                                            "TypeScript error: {} - {}",
                                            error.getName(),
                                            error.getValue());
                                })
                        .build();

        // 1. Simple TypeScript execution
        RunCodeRequest simpleRequest =
                RunCodeRequest.builder()
                        .code(
                                "console.log('Hello from TypeScript!');\n"
                                        + "const result: number = 2 + 2;\n"
                                        + "console.log(`2 + 2 = ${result}`);")
                        .context(tsContext)
                        .handlers(handlers)
                        .build();

        Execution simpleResult = codeInterpreter.codes().run(simpleRequest);

        assertNotNull(simpleResult);
        assertNotNull(simpleResult.getId());
        assertFalse(completedEvents.isEmpty());

        // 2. TypeScript with types and interfaces
        RunCodeRequest typesRequest =
                RunCodeRequest.builder()
                        .code(
                                "interface Person {\n"
                                        + "  name: string;\n"
                                        + "  age: number;\n"
                                        + "}\n"
                                        + "const person: Person = { name: 'John', age: 30 };\n"
                                        + "const numbers: number[] = [1, 2, 3, 4, 5];\n"
                                        + "const sum: number = numbers.reduce((a, b) => a + b, 0);\n"
                                        + "console.log(`Person: ${person.name}, Age: ${person.age}`);\n"
                                        + "console.log(`Numbers: ${numbers}`);\n"
                                        + "console.log(`Sum: ${sum}`);")
                        .context(tsContext)
                        .build();

        Execution typesResult = codeInterpreter.codes().run(typesRequest);

        assertNotNull(typesResult);
        assertNotNull(typesResult.getId());

        // 3. TypeScript error test: use deterministic runtime error.
        RunCodeRequest errorRequest =
                RunCodeRequest.builder()
                        .code("throw new Error('ts-runtime-error');")
                        .context(tsContext)
                        .handlers(handlers)
                        .build();

        Execution errorResult = codeInterpreter.codes().run(errorRequest);

        assertNotNull(errorResult);
        assertNotNull(errorResult.getId());
        assertTrue(
                errorResult.getError() != null || errorResult.getLogs().getStderr().size() > 0,
                "TypeScript error execution should capture type errors");

        logger.info("TypeScript code execution tests completed");
    }

    /**
     * Run a code request with a per-execution timeout so that a single hanging SSE stream cannot
     * block the entire test for the full JUnit timeout.
     */
    private Execution runWithTimeout(RunCodeRequest request, Duration timeout) {
        CompletableFuture<Execution> future =
                CompletableFuture.supplyAsync(() -> codeInterpreter.codes().run(request));
        try {
            return future.get(timeout.toMillis(), TimeUnit.MILLISECONDS);
        } catch (TimeoutException e) {
            future.cancel(true);
            throw new AssertionError("Code execution did not complete within " + timeout, e);
        } catch (ExecutionException e) {
            Throwable cause = e.getCause();
            if (cause instanceof RuntimeException) {
                throw (RuntimeException) cause;
            }
            throw new RuntimeException(cause);
        } catch (InterruptedException e) {
            future.cancel(true);
            Thread.currentThread().interrupt();
            throw new RuntimeException(e);
        }
    }

    private Execution runWithRetry(
            RunCodeRequest request, Duration timeout, int attempts, String label) {
        AssertionError lastAssertionError = null;
        RuntimeException lastRuntimeException = null;
        for (int attempt = 1; attempt <= attempts; attempt++) {
            try {
                return runWithTimeout(request, timeout);
            } catch (AssertionError e) {
                lastAssertionError = e;
                logger.warn("{} attempt {}/{} timed out", label, attempt, attempts, e);
            } catch (RuntimeException e) {
                lastRuntimeException = e;
                logger.warn("{} attempt {}/{} failed", label, attempt, attempts, e);
            }
        }
        if (lastAssertionError != null) {
            throw lastAssertionError;
        }
        if (lastRuntimeException != null) {
            throw lastRuntimeException;
        }
        throw new AssertionError(label + " failed without a captured exception");
    }

    @Test
    @Order(6)
    @DisplayName("Multi-Language Support and Context Isolation")
    @Timeout(value = 10, unit = TimeUnit.MINUTES)
    void testMultiLanguageAndContextIsolation() {
        logger.info("Testing multi-language support and context isolation");

        assertNotNull(codeInterpreter);

        // Per-execution timeout: if a single run() call hangs (sandbox gone, network
        // issue), fail fast instead of blocking the entire 10-minute JUnit timeout.
        Duration perExecTimeout = Duration.ofMinutes(2);

        // Create separate contexts for different languages
        CodeContext python1 = codeInterpreter.codes().createContext(SupportedLanguage.PYTHON);
        CodeContext python2 = codeInterpreter.codes().createContext(SupportedLanguage.PYTHON);

        // 1. Set different variables in each Python context to test isolation
        RunCodeRequest python1Setup =
                RunCodeRequest.builder()
                        .code(
                                "secret_value1 = 'python1_secret'\n"
                                        + "print(f'Python1 secret: {secret_value1}')")
                        .context(python1)
                        .build();

        RunCodeRequest python2Setup =
                RunCodeRequest.builder()
                        .code(
                                "secret_value2 = 'python2_secret'\n"
                                        + "print(f'Python2 secret: {secret_value2}')")
                        .context(python2)
                        .build();

        Execution result1 = runWithTimeout(python1Setup, perExecTimeout);
        Execution result2 = runWithTimeout(python2Setup, perExecTimeout);

        assertNotNull(result1);
        assertNotNull(result1.getId());
        assertNotNull(result2);
        assertNotNull(result2.getId());

        // 2. Verify isolation - each context should only see its own variables
        RunCodeRequest python1Check =
                RunCodeRequest.builder()
                        .code("print(f'Python1 still has: {secret_value1}')")
                        .context(python1)
                        .build();

        RunCodeRequest python2Check =
                RunCodeRequest.builder()
                        .code("print(f'Python2 has no: {secret_value1}')")
                        .context(python2)
                        .build();

        Execution check1 = runWithTimeout(python1Check, perExecTimeout);
        Execution check2 = runWithTimeout(python2Check, perExecTimeout);

        assertNotNull(check1);
        assertNotNull(check1.getId());
        assertNotNull(check2);
        assertNotNull(check2.getId());
        assertNotNull(check2.getError());
        assertEquals("NameError", check2.getError().getName());
    }

    @Test
    @Order(7)
    @DisplayName("Concurrent Code Execution")
    @Timeout(value = 10, unit = TimeUnit.MINUTES)
    void testConcurrentCodeExecution() {
        logger.info("Testing concurrent code execution");

        assertNotNull(codeInterpreter);
        ExecutorService executor = Executors.newFixedThreadPool(4);
        long timestamp = System.currentTimeMillis();

        // Create multiple contexts for concurrent execution
        CodeContext pythonConcurrent1 =
                codeInterpreter.codes().createContext(SupportedLanguage.PYTHON);
        CodeContext pythonConcurrent2 =
                codeInterpreter.codes().createContext(SupportedLanguage.PYTHON);
        CodeContext javaConcurrent = codeInterpreter.codes().createContext(SupportedLanguage.JAVA);
        CodeContext goConcurrent = codeInterpreter.codes().createContext(SupportedLanguage.GO);

        // Track futures with labels for diagnostics
        List<String> taskLabels = List.of("Python1", "Python2", "Java", "Go");
        List<Future<Execution>> futures = new ArrayList<>();

        try {
            // Submit concurrent executions
            futures.add(
                    executor.submit(
                            () -> {
                                RunCodeRequest request =
                                        RunCodeRequest.builder()
                                                .code(
                                                        "import time\n"
                                                                + "for i in range(3):\n"
                                                                + "    print(f'Python1 iteration"
                                                                + " {i}')\n"
                                                                + "    time.sleep(0.1)\n"
                                                                + "print('Python1 completed')")
                                                .context(pythonConcurrent1)
                                                .build();
                                return codeInterpreter.codes().run(request);
                            }));

            futures.add(
                    executor.submit(
                            () -> {
                                RunCodeRequest request =
                                        RunCodeRequest.builder()
                                                .code(
                                                        "import time\n"
                                                                + "for i in range(3):\n"
                                                                + "    print(f'Python2 iteration"
                                                                + " {i}')\n"
                                                                + "    time.sleep(0.1)\n"
                                                                + "print('Python2 completed')")
                                                .context(pythonConcurrent2)
                                                .build();
                                return codeInterpreter.codes().run(request);
                            }));

            futures.add(
                    executor.submit(
                            () -> {
                                RunCodeRequest request =
                                        RunCodeRequest.builder()
                                                .code(
                                                        "for (int i = 0; i < 3; i++) {\n"
                                                                + "    System.out.println(\"Java"
                                                                + " iteration \" + i);\n"
                                                                + "    Thread.sleep(100);\n"
                                                                + "}\n"
                                                                + "System.out.println(\"Java"
                                                                + " completed\");")
                                                .context(javaConcurrent)
                                                .build();
                                return codeInterpreter.codes().run(request);
                            }));

            futures.add(
                    executor.submit(
                            () -> {
                                RunCodeRequest request =
                                        RunCodeRequest.builder()
                                                .code(
                                                        "package main\n"
                                                                + "func main() {\n"
                                                                + "    for i := 0; i < 3; i++ {\n"
                                                                + "        println(\"Go iteration\","
                                                                + " i)\n"
                                                                + "    }\n"
                                                                + "    println(\"Go completed\")\n"
                                                                + "}")
                                                .context(goConcurrent)
                                                .build();
                                return codeInterpreter.codes().run(request);
                            }));

            // Collect results with per-task diagnostics
            int succeeded = 0;
            List<String> failures = new ArrayList<>();
            for (int i = 0; i < futures.size(); i++) {
                String label = taskLabels.get(i);
                try {
                    Execution result = futures.get(i).get(5, TimeUnit.MINUTES);
                    if (result == null) {
                        String msg = label + ": returned null Execution";
                        logger.error(msg);
                        failures.add(msg);
                    } else if (result.getId() == null) {
                        // Log available fields to aid debugging
                        String detail =
                                label
                                        + ": Execution has null id (error="
                                        + (result.getError() != null
                                                ? result.getError().getName()
                                                        + ": "
                                                        + result.getError().getValue()
                                                : "none")
                                        + ")";
                        logger.warn(detail);
                        failures.add(detail);
                    } else {
                        logger.info(
                                "Concurrent execution [{}] completed: {}", label, result.getId());
                        succeeded++;
                    }
                } catch (TimeoutException te) {
                    String msg = label + ": timed out waiting for result";
                    logger.error(msg, te);
                    failures.add(msg);
                    futures.get(i).cancel(true);
                } catch (ExecutionException ee) {
                    String msg = label + ": execution threw " + ee.getCause();
                    logger.error(msg, ee.getCause());
                    failures.add(msg);
                }
            }

            // At least 2 of 4 concurrent executions must succeed.
            // Java/Go compilation overhead in CI can occasionally cause
            // timeouts or incomplete responses, so we tolerate partial
            // failure while still asserting that concurrency works.
            assertTrue(
                    succeeded >= 2,
                    "Expected at least 2 of 4 concurrent executions to succeed, but only "
                            + succeeded
                            + " did. Failures: "
                            + failures);
            logger.info(
                    "Concurrent execution: {}/{} succeeded (failures: {})",
                    succeeded,
                    futures.size(),
                    failures);

        } catch (Exception e) {
            logger.error("Concurrent execution test failed unexpectedly", e);
            fail("Concurrent execution failed: " + e);
        } finally {
            executor.shutdown();
        }

        logger.info("Concurrent code execution tests completed");
    }

    @Test
    @Order(8)
    @DisplayName("Code Execution Interrupt")
    @Timeout(value = 10, unit = TimeUnit.MINUTES)
    void testCodeExecutionInterrupt() throws InterruptedException, ExecutionException {
        logger.info("Testing code execution interrupt functionality");

        CodeContext pythonContext = codeInterpreter.codes().createContext(SupportedLanguage.PYTHON);
        CodeContext javaContext = codeInterpreter.codes().createContext(SupportedLanguage.JAVA);

        // Event tracking for interrupt testing
        List<ExecutionComplete> completedEvents = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionError> errors = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionInit> initEvents = Collections.synchronizedList(new ArrayList<>());

        ExecutionHandlers handlers =
                ExecutionHandlers.builder()
                        .onExecutionComplete(
                                (ExecutionComplete complete) -> {
                                    completedEvents.add(complete);
                                    logger.info(
                                            "Execution completed in {} ms",
                                            complete.getExecutionTimeInMillis());
                                })
                        .onError(
                                (ExecutionError error) -> {
                                    errors.add(error);
                                    logger.error(
                                            "Execution error: {} - {}",
                                            error.getName(),
                                            error.getValue());
                                })
                        .onInit(
                                (ExecutionInit init) -> {
                                    initEvents.add(init);
                                    logger.info("Execution initialized with ID: {}", init.getId());
                                })
                        .build();

        // Test 1: Python long-running execution with interrupt
        logger.info("Testing Python interrupt functionality");

        RunCodeRequest pythonLongRunningRequest =
                RunCodeRequest.builder()
                        .code(
                                "import time\n"
                                        + "print('Starting long-running Python execution')\n"
                                        + "for i in range(100):\n"
                                        + "    print(f'Python iteration {i}')\n"
                                        + "    time.sleep(0.2)  # Sleep 200ms per iteration (20 seconds"
                                        + " total)\n"
                                        + "print('Python execution completed - this should not be"
                                        + " seen')")
                        .context(pythonContext)
                        .handlers(handlers)
                        .build();

        // Start Python execution in background
        ExecutorService executor = Executors.newSingleThreadExecutor();
        long start = System.currentTimeMillis();
        Future<Execution> pythonFuture =
                executor.submit(() -> codeInterpreter.codes().run(pythonLongRunningRequest));

        // Wait for init
        long deadline = System.currentTimeMillis() + 15_000;
        while (initEvents.isEmpty() && System.currentTimeMillis() < deadline) {
            Thread.sleep(100);
        }
        assertFalse(initEvents.isEmpty(), "Execution should have been initialized");
        String pythonExecutionId = initEvents.get(initEvents.size() - 1).getId();
        assertNotNull(pythonExecutionId, "Execution ID should not be null");

        // Interrupt the execution after letting it run briefly
        logger.info("Interrupting Python execution with ID: {}", pythonExecutionId);
        assertDoesNotThrow(() -> codeInterpreter.codes().interrupt(pythonExecutionId));

        // Wait for execution to complete (should be interrupted).
        // The SSE stream may close abruptly after interrupt, so handle both
        // a clean result and an exception from a broken connection.
        Execution pythonResult = null;
        try {
            pythonResult = pythonFuture.get(60, TimeUnit.SECONDS);
        } catch (TimeoutException e) {
            pythonFuture.cancel(true);
            logger.warn("Python execution did not complete within 60s after interrupt");
        } catch (ExecutionException e) {
            // SSE stream broken by interrupt — acceptable
            logger.warn("Python execution raised after interrupt: {}", e.getCause().getMessage());
        }
        executor.shutdown();

        long elapsed = System.currentTimeMillis() - start;

        if (pythonResult != null) {
            assertNotNull(pythonResult.getId());
            assertEquals(pythonExecutionId, pythonResult.getId());
        }

        // Verify the interrupt was effective: execution finished much faster
        // than the full 20 s run.  Terminal events (complete/error) may or may
        // not arrive depending on how quickly the server closed the stream.
        assertTrue(
                elapsed < 90_000,
                "Execution should have finished promptly after interrupt (elapsed="
                        + elapsed
                        + "ms)");

        // Test 2: Java long-running execution with interrupt
        logger.info("Testing Java interrupt functionality");

        // Clear event lists for Java test
        completedEvents.clear();
        errors.clear();
        initEvents.clear();

        RunCodeRequest javaLongRunningRequest =
                RunCodeRequest.builder()
                        .code(
                                "System.out.println(\"Starting long-running Java execution\");\n"
                                        + "for (int i = 0; i < 100; i++) {\n"
                                        + "    System.out.println(\"Java iteration \" + i);\n"
                                        + "    try {\n"
                                        + "        Thread.sleep(200);  // Sleep 200ms per iteration\n"
                                        + "    } catch (InterruptedException e) {\n"
                                        + "        System.out.println(\"Java execution"
                                        + " interrupted\");\n"
                                        + "        break;\n"
                                        + "    }\n"
                                        + "}\n"
                                        + "System.out.println(\"Java execution completed - this should"
                                        + " not be seen\");")
                        .context(javaContext)
                        .handlers(handlers)
                        .build();

        // Start Java execution in background
        ExecutorService javaExecutor = Executors.newSingleThreadExecutor();
        Future<Execution> javaFuture =
                javaExecutor.submit(() -> codeInterpreter.codes().run(javaLongRunningRequest));

        // Wait for execution to start
        Thread.sleep(1000);

        // Verify Java execution was initialized
        assertFalse(initEvents.isEmpty(), "Java execution should have been initialized");
        String javaExecutionId = initEvents.get(initEvents.size() - 1).getId();
        assertNotNull(javaExecutionId, "Java execution ID should not be null");

        // Interrupt the Java execution
        logger.info("Interrupting Java execution with ID: {}", javaExecutionId);
        assertDoesNotThrow(() -> codeInterpreter.codes().interrupt(javaExecutionId));

        // Wait for execution to complete, with a timeout to avoid hanging
        // if the SSE stream doesn't close promptly after interrupt.
        Execution javaResult = null;
        try {
            javaResult = javaFuture.get(60, TimeUnit.SECONDS);
        } catch (TimeoutException e) {
            javaFuture.cancel(true);
            logger.warn("Java execution did not complete within 60s after interrupt");
        } catch (ExecutionException e) {
            logger.warn("Java execution raised after interrupt: {}", e.getCause().getMessage());
        }
        javaExecutor.shutdown();

        if (javaResult != null) {
            assertNotNull(javaResult.getId());
            logger.info(
                    "Java execution result: ID={}, Error={}",
                    javaResult.getId(),
                    javaResult.getError() != null ? javaResult.getError().getName() : "none");
        }

        // Test 4: Quick execution that completes before interrupt
        logger.info("Testing interrupt of already completed execution");

        RunCodeRequest quickRequest =
                RunCodeRequest.builder()
                        .code(
                                "print('Quick Python execution')\n"
                                        + "result = 2 + 2\n"
                                        + "print(f'Result: {result}')")
                        .context(pythonContext)
                        .handlers(handlers)
                        .build();

        Execution quickResult = runWithTimeout(quickRequest, Duration.ofMinutes(1));
        assertNotNull(quickResult);
        assertNotNull(quickResult.getId());

        // Try to interrupt already completed execution
        try {
            codeInterpreter.codes().interrupt(quickResult.getId());
        } catch (Exception ignored) {
        }

        logger.info("Code execution interrupt tests completed");
    }
}


================================================
FILE: tests/java/src/test/java/com/alibaba/opensandbox/e2e/SandboxE2ETest.java
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.e2e;

import static org.junit.jupiter.api.Assertions.*;

import com.alibaba.opensandbox.sandbox.Sandbox;
import com.alibaba.opensandbox.sandbox.config.ConnectionConfig;
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxApiException;
import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.*;
import com.alibaba.opensandbox.sandbox.domain.models.execd.filesystem.*;
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.*;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.OffsetDateTime;
import java.util.*;
import java.util.concurrent.*;
import org.junit.jupiter.api.*;

/**
 * Comprehensive E2E tests for Sandbox functionality.
 *
 * <p>Tests all sandbox capabilities including - Lifecycle management (creation, health,
 * termination) - Command execution with various shells and scenarios - Filesystem operations (CRUD,
 * permissions, search) - Resource management and monitoring - Error handling and recovery -
 * Concurrent operations and stress testing
 */
@Tag("e2e")
@DisplayName("Sandbox E2E Tests (Java SDK) - Strict Coverage")
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class SandboxE2ETest extends BaseE2ETest {

    private Sandbox sandbox;

    @BeforeAll
    void setup() {
        Map<String, String> resourceMap = new HashMap<>();
        resourceMap.put("cpu", "2");
        resourceMap.put("memory", "4Gi");

        Map<String, String> metadataMap = new HashMap<>();
        metadataMap.put("tag", "e2e-test");

        sandbox =
                Sandbox.builder()
                        .connectionConfig(sharedConnectionConfig)
                        .image(getSandboxImage())
                        .resource(resourceMap)
                        .timeout(Duration.ofMinutes(2))
                        .readyTimeout(Duration.ofSeconds(60))
                        .metadata(metadataMap)
                        .env("E2E_TEST", "true")
                        .healthCheckPollingInterval(Duration.ofMillis(500))
                        .build();
    }

    @AfterAll
    void teardown() {
        if (sandbox != null) {
            try {
                sandbox.kill();
            } catch (Exception ignored) {
            }
            try {
                sandbox.close();
            } catch (Exception ignored) {
            }
        }
    }

    private static void assertModifiedUpdated(
            OffsetDateTime before, OffsetDateTime after, long minDeltaMs, long allowSkewMs) {
        long deltaMs = Duration.between(before, after).toMillis();
        assertTrue(
                deltaMs >= minDeltaMs - allowSkewMs,
                "modifiedAt did not update as expected: deltaMs="
                        + deltaMs
                        + " (minDeltaMs="
                        + minDeltaMs
                        + ", allowSkewMs="
                        + allowSkewMs
                        + ")");
    }

    private static void assertTerminalEventContract(
            List<ExecutionInit> initEvents,
            List<ExecutionComplete> completedEvents,
            List<ExecutionError> errors,
            String executionId) {
        assertEquals(1, initEvents.size(), "Execution must have exactly one init event");
        assertNotNull(initEvents.get(0).getId());
        assertFalse(initEvents.get(0).getId().isBlank());
        assertEquals(executionId, initEvents.get(0).getId(), "init.id must match execution.id");
        assertRecentTimestampMs(initEvents.get(0).getTimestamp(), 120_000);

        boolean hasComplete = !completedEvents.isEmpty();
        boolean hasError = !errors.isEmpty();
        assertTrue(
                hasComplete || hasError,
                "expected at least one of complete/error, got complete="
                        + completedEvents.size()
                        + " error="
                        + errors.size());
        if (hasComplete) {
            assertEquals(1, completedEvents.size());
            assertRecentTimestampMs(completedEvents.get(0).getTimestamp(), 180_000);
            assertTrue(completedEvents.get(0).getExecutionTimeInMillis() >= 0);
        }
        if (hasError) {
            assertNotNull(errors.get(0).getName());
            assertFalse(errors.get(0).getName().isBlank());
            assertNotNull(errors.get(0).getValue());
            assertRecentTimestampMs(errors.get(0).getTimestamp(), 180_000);
        }
    }

    @Test
    @Order(1)
    @DisplayName("Sandbox lifecycle, health, endpoint, metrics, renew, connect")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testSandboxLifecycleAndHealth() {
        assertNotNull(sandbox);
        assertNotNull(sandbox.getId());
        assertTrue(sandbox.isHealthy(), "Sandbox should be healthy");

        SandboxInfo info = sandbox.getInfo();
        assertEquals(sandbox.getId(), info.getId());
        assertEquals("Running", info.getStatus().getState());
        assertNotNull(info.getCreatedAt());
        assertNotNull(info.getExpiresAt());
        assertTrue(info.getExpiresAt().isAfter(info.getCreatedAt()));
        assertEquals(List.of("tail", "-f", "/dev/null"), info.getEntrypoint());

        Duration duration = Duration.between(info.getCreatedAt(), info.getExpiresAt());
        assertTrue(duration.compareTo(Duration.ofMinutes(1)) >= 0);
        assertTrue(duration.compareTo(Duration.ofMinutes(3)) <= 0);

        assertNotNull(info.getMetadata());
        assertEquals("e2e-test", info.getMetadata().get("tag"));

        SandboxEndpoint endpoint = sandbox.getEndpoint(44772);
        assertNotNull(endpoint);
        assertEndpointHasPort(endpoint.getEndpoint(), 44772);

        SandboxMetrics metrics = sandbox.getMetrics();
        assertNotNull(metrics);
        assertTrue(metrics.getCpuCount() > 0);
        assertTrue(
                metrics.getCpuUsedPercentage() >= 0.0 && metrics.getCpuUsedPercentage() <= 100.0);
        assertTrue(metrics.getMemoryTotalInMiB() > 0);
        assertTrue(
                metrics.getMemoryUsedInMiB() >= 0.0
                        && metrics.getMemoryUsedInMiB() <= metrics.getMemoryTotalInMiB());
        assertRecentTimestampMs(metrics.getTimestamp(), 120_000);

        // Renew: validate remaining TTL is close to requested duration.
        SandboxRenewResponse renewResp = sandbox.renew(Duration.ofMinutes(5));
        assertNotNull(renewResp, "renew() should return a response");
        assertNotNull(renewResp.getExpiresAt(), "renew().expiresAt should not be null");
        SandboxInfo renewedInfo = sandbox.getInfo();
        assertTrue(renewedInfo.getExpiresAt().isAfter(info.getExpiresAt()));
        assertTrue(
                renewResp.getExpiresAt().isAfter(info.getExpiresAt()),
                "renew().expiresAt should be after previous expiresAt");
        // Allow small skew between renew response and subsequent getInfo() (backend timing).
        assertTrue(
                Math.abs(
                                Duration.between(
                                                renewResp.getExpiresAt(),
                                                renewedInfo.getExpiresAt())
                                        .toSeconds())
                        < 10,
                "renew response expiresAt should be close to getInfo().expiresAt");
        Duration remaining = Duration.between(OffsetDateTime.now(), renewedInfo.getExpiresAt());
        assertTrue(
                remaining.compareTo(Duration.ofMinutes(3)) > 0,
                "Remaining TTL too small: " + remaining);
        assertTrue(
                remaining.compareTo(Duration.ofMinutes(6)) < 0,
                "Remaining TTL too large: " + remaining);

        assertNotNull(sandbox.files());
        assertNotNull(sandbox.commands());
        assertNotNull(sandbox.metrics());
        assertNotNull(sandbox.httpClientProvider());

        // Connect to existing sandbox by ID and run a basic command.
        Sandbox sandbox2 =
                Sandbox.connector()
                        .connectionConfig(sharedConnectionConfig)
                        .sandboxId(sandbox.getId())
                        .connect();
        try {
            assertEquals(sandbox.getId(), sandbox2.getId());
            assertTrue(sandbox2.isHealthy());
            Execution r =
                    sandbox2.commands()
                            .run(RunCommandRequest.builder().command("echo connect-ok").build());
            assertNotNull(r);
            assertNull(r.getError());
            assertEquals(1, r.getLogs().getStdout().size());
            assertEquals("connect-ok", r.getLogs().getStdout().get(0).getText());
        } finally {
            sandbox2.close();
        }
    }

    @Test
    @Order(1)
    @DisplayName("Sandbox manual cleanup returns null expiresAt")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testSandboxManualCleanup() {
        Sandbox manualSandbox =
                Sandbox.builder()
                        .connectionConfig(sharedConnectionConfig)
                        .image(getSandboxImage())
                        .manualCleanup()
                        .readyTimeout(Duration.ofSeconds(60))
                        .metadata(Map.of("tag", "manual-java-e2e-test"))
                        .build();

        try {
            SandboxInfo info = manualSandbox.getInfo();
            assertNull(info.getExpiresAt());
            assertNotNull(info.getMetadata());
            assertEquals("manual-java-e2e-test", info.getMetadata().get("tag"));
        } finally {
            manualSandbox.kill();
            manualSandbox.close();
        }
    }

    @Test
    @Order(2)
    @DisplayName("Sandbox create with networkPolicy + get/patch egress")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testSandboxCreateWithNetworkPolicy() {
        NetworkPolicy networkPolicy =
                NetworkPolicy.builder()
                        .defaultAction(NetworkPolicy.DefaultAction.DENY)
                        .addEgress(
                                NetworkRule.builder()
                                        .action(NetworkRule.Action.ALLOW)
                                        .target("pypi.org")
                                        .build())
                        .build();

        Sandbox policySandbox =
                Sandbox.builder()
                        .connectionConfig(sharedConnectionConfig)
                        .image(getSandboxImage())
                        .timeout(Duration.ofMinutes(2))
                        .readyTimeout(Duration.ofSeconds(60))
                        .networkPolicy(networkPolicy)
                        .build();
        // Wait for NetworkPolicy sidecar to be fully initialized
        try {
            Thread.sleep(2000);
        } catch (InterruptedException ignored) {
        }

        try {
            NetworkPolicy initialPolicy = policySandbox.getEgressPolicy();
            assertNotNull(initialPolicy);
            assertEquals(NetworkPolicy.DefaultAction.DENY, initialPolicy.getDefaultAction());
            assertNotNull(initialPolicy.getEgress());
            assertTrue(
                    initialPolicy.getEgress().stream()
                            .anyMatch(
                                    r ->
                                            "pypi.org".equals(r.getTarget())
                                                    && r.getAction() == NetworkRule.Action.ALLOW));

            Execution r =
                    policySandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("curl -I https://www.github.com")
                                            .build());
            assertNotNull(r);
            assertNotNull(r.getError());

            r =
                    policySandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("curl -I https://pypi.org")
                                            .build());
            assertNotNull(r);
            assertNull(r.getError());

            policySandbox.patchEgressRules(
                    List.of(
                            NetworkRule.builder()
                                    .action(NetworkRule.Action.ALLOW)
                                    .target("www.github.com")
                                    .build(),
                            NetworkRule.builder()
                                    .action(NetworkRule.Action.DENY)
                                    .target("pypi.org")
                                    .build()));

            try {
                Thread.sleep(2000);
            } catch (InterruptedException ignored) {
            }

            NetworkPolicy patchedPolicy = policySandbox.getEgressPolicy();
            assertNotNull(patchedPolicy);
            assertNotNull(patchedPolicy.getEgress());
            assertTrue(
                    patchedPolicy.getEgress().stream()
                            .anyMatch(
                                    rule ->
                                            "www.github.com".equals(rule.getTarget())
                                                    && rule.getAction()
                                                            == NetworkRule.Action.ALLOW));
            assertTrue(
                    patchedPolicy.getEgress().stream()
                            .anyMatch(
                                    rule ->
                                            "pypi.org".equals(rule.getTarget())
                                                    && rule.getAction()
                                                            == NetworkRule.Action.DENY));

            Execution githubAllowed =
                    policySandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("curl -I https://www.github.com")
                                            .build());
            assertNotNull(githubAllowed);
            assertNull(githubAllowed.getError());

            Execution pypiDenied =
                    policySandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("curl -I https://pypi.org")
                                            .build());
            assertNotNull(pypiDenied);
            assertNotNull(pypiDenied.getError());
        } finally {
            try {
                policySandbox.kill();
            } catch (Exception ignored) {
            }
            policySandbox.close();
        }
    }

    @Test
    @Order(2)
    @DisplayName("Sandbox create with networkPolicy + get/patch egress via server proxy")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testSandboxCreateWithNetworkPolicyViaServerProxy() {
        NetworkPolicy networkPolicy =
                NetworkPolicy.builder()
                        .defaultAction(NetworkPolicy.DefaultAction.DENY)
                        .addEgress(
                                NetworkRule.builder()
                                        .action(NetworkRule.Action.ALLOW)
                                        .target("pypi.org")
                                        .build())
                        .build();

        Sandbox policySandbox =
                Sandbox.builder()
                        .connectionConfig(createConnectionConfig(true))
                        .image(getSandboxImage())
                        .timeout(Duration.ofMinutes(2))
                        .readyTimeout(Duration.ofSeconds(60))
                        .networkPolicy(networkPolicy)
                        .build();
        try {
            Thread.sleep(2000);
        } catch (InterruptedException ignored) {
        }

        try {
            SandboxEndpoint egressEndpoint = policySandbox.getEndpoint(18080);
            assertTrue(
                    egressEndpoint.getEndpoint().contains(
                            "/sandboxes/" + policySandbox.getId() + "/proxy/18080"));

            NetworkPolicy initialPolicy = policySandbox.getEgressPolicy();
            assertNotNull(initialPolicy);
            assertEquals(NetworkPolicy.DefaultAction.DENY, initialPolicy.getDefaultAction());
            assertNotNull(initialPolicy.getEgress());
            assertTrue(
                    initialPolicy.getEgress().stream()
                            .anyMatch(
                                    r ->
                                            "pypi.org".equals(r.getTarget())
                                                    && r.getAction() == NetworkRule.Action.ALLOW));

            Execution blocked =
                    policySandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("curl -I https://www.github.com")
                                            .build());
            assertNotNull(blocked);
            assertNotNull(blocked.getError());

            Execution allowed =
                    policySandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("curl -I https://pypi.org")
                                            .build());
            assertNotNull(allowed);
            assertNull(allowed.getError());

            policySandbox.patchEgressRules(
                    List.of(
                            NetworkRule.builder()
                                    .action(NetworkRule.Action.ALLOW)
                                    .target("www.github.com")
                                    .build(),
                            NetworkRule.builder()
                                    .action(NetworkRule.Action.DENY)
                                    .target("pypi.org")
                                    .build()));

            try {
                Thread.sleep(2000);
            } catch (InterruptedException ignored) {
            }

            NetworkPolicy patchedPolicy = policySandbox.getEgressPolicy();
            assertNotNull(patchedPolicy.getEgress());
            assertTrue(
                    patchedPolicy.getEgress().stream()
                            .anyMatch(
                                    rule ->
                                            "www.github.com".equals(rule.getTarget())
                                                    && rule.getAction()
                                                            == NetworkRule.Action.ALLOW));
            assertTrue(
                    patchedPolicy.getEgress().stream()
                            .anyMatch(
                                    rule ->
                                            "pypi.org".equals(rule.getTarget())
                                                    && rule.getAction()
                                                            == NetworkRule.Action.DENY));
        } finally {
            try {
                policySandbox.kill();
            } catch (Exception ignored) {
            }
            policySandbox.close();
        }
    }

    @Test
    @Order(2)
    @DisplayName("Sandbox create with host volume mount (read-write)")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testSandboxCreateWithHostVolumeMount() {
        String hostDir = "/tmp/opensandbox-e2e/host-volume-test";
        String containerMountPath = "/mnt/host-data";

        Volume volume =
                Volume.builder()
                        .name("test-host-vol")
                        .host(Host.of(hostDir))
                        .mountPath(containerMountPath)
                        .readOnly(false)
                        .build();

        Sandbox volumeSandbox =
                Sandbox.builder()
                        .connectionConfig(sharedConnectionConfig)
                        .image(getSandboxImage())
                        .timeout(Duration.ofMinutes(2))
                        .readyTimeout(Duration.ofSeconds(60))
                        .volume(volume)
                        .build();

        try {
            assertTrue(volumeSandbox.isHealthy(), "Volume sandbox should be healthy");

            // Step 1: Verify the host marker file is visible inside the sandbox
            Execution readMarker =
                    volumeSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("cat " + containerMountPath + "/marker.txt")
                                            .build());
            assertNull(readMarker.getError(), "Failed to read marker file");
            assertEquals(1, readMarker.getLogs().getStdout().size());
            assertEquals(
                    "opensandbox-e2e-marker", readMarker.getLogs().getStdout().get(0).getText());

            // Step 2: Write a file from inside the sandbox to the mounted path
            Execution writeResult =
                    volumeSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command(
                                                    "echo 'written-from-sandbox' > "
                                                            + containerMountPath
                                                            + "/sandbox-output.txt")
                                            .build());
            assertNull(writeResult.getError(), "Failed to write file");

            // Step 3: Verify the written file is readable
            Execution readBack =
                    volumeSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command(
                                                    "cat "
                                                            + containerMountPath
                                                            + "/sandbox-output.txt")
                                            .build());
            assertNull(readBack.getError());
            assertEquals(1, readBack.getLogs().getStdout().size());
            assertEquals("written-from-sandbox", readBack.getLogs().getStdout().get(0).getText());

            // Step 4: Verify the mount path is a proper directory
            Execution dirCheck =
                    volumeSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("test -d " + containerMountPath)
                                            .build());
            assertNull(dirCheck.getError());
        } finally {
            try {
                volumeSandbox.kill();
            } catch (Exception ignored) {
            }
            volumeSandbox.close();
        }
    }

    @Test
    @Order(2)
    @DisplayName("Sandbox create with host volume mount (read-only)")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testSandboxCreateWithHostVolumeMountReadOnly() {
        String hostDir = "/tmp/opensandbox-e2e/host-volume-test";
        String containerMountPath = "/mnt/host-data-ro";

        Volume volume =
                Volume.builder()
                        .name("test-host-vol-ro")
                        .host(Host.of(hostDir))
                        .mountPath(containerMountPath)
                        .readOnly(true)
                        .build();

        Sandbox roSandbox =
                Sandbox.builder()
                        .connectionConfig(sharedConnectionConfig)
                        .image(getSandboxImage())
                        .timeout(Duration.ofMinutes(2))
                        .readyTimeout(Duration.ofSeconds(60))
                        .volume(volume)
                        .build();

        try {
            assertTrue(roSandbox.isHealthy(), "Read-only volume sandbox should be healthy");

            // Step 1: Verify the host marker file is readable
            Execution readMarker =
                    roSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("cat " + containerMountPath + "/marker.txt")
                                            .build());
            assertNull(readMarker.getError(), "Failed to read marker file on read-only mount");
            assertEquals(1, readMarker.getLogs().getStdout().size());
            assertEquals(
                    "opensandbox-e2e-marker", readMarker.getLogs().getStdout().get(0).getText());

            // Step 2: Verify writing is denied on read-only mount
            Execution writeResult =
                    roSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command(
                                                    "touch "
                                                            + containerMountPath
                                                            + "/should-fail.txt")
                                            .build());
            assertNotNull(writeResult.getError(), "Write should fail on read-only mount");
        } finally {
            try {
                roSandbox.kill();
            } catch (Exception ignored) {
            }
            roSandbox.close();
        }
    }

    @Test
    @Order(2)
    @DisplayName("Sandbox create with PVC named volume mount (read-write)")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testSandboxCreateWithPvcVolumeMount() {
        String pvcVolumeName = "opensandbox-e2e-pvc-test";
        String containerMountPath = "/mnt/pvc-data";

        Volume volume =
                Volume.builder()
                        .name("test-pvc-vol")
                        .pvc(PVC.of(pvcVolumeName))
                        .mountPath(containerMountPath)
                        .readOnly(false)
                        .build();

        Sandbox pvcSandbox =
                Sandbox.builder()
                        .connectionConfig(sharedConnectionConfig)
                        .image(getSandboxImage())
                        .timeout(Duration.ofMinutes(2))
                        .readyTimeout(Duration.ofSeconds(60))
                        .volume(volume)
                        .build();

        try {
            assertTrue(pvcSandbox.isHealthy(), "PVC volume sandbox should be healthy");

            // Step 1: Verify the marker file seeded into the named volume is readable
            Execution readMarker =
                    pvcSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("cat " + containerMountPath + "/marker.txt")
                                            .build());
            assertNull(readMarker.getError(), "Failed to read marker file from PVC volume");
            assertEquals(1, readMarker.getLogs().getStdout().size());
            assertEquals("pvc-marker-data", readMarker.getLogs().getStdout().get(0).getText());

            // Step 2: Write a file from inside the sandbox to the named volume
            Execution writeResult =
                    pvcSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command(
                                                    "echo 'written-to-pvc' > "
                                                            + containerMountPath
                                                            + "/pvc-output.txt")
                                            .build());
            assertNull(writeResult.getError(), "Failed to write file to PVC volume");

            // Step 3: Verify the written file is readable
            Execution readBack =
                    pvcSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command(
                                                    "cat " + containerMountPath + "/pvc-output.txt")
                                            .build());
            assertNull(readBack.getError());
            assertEquals(1, readBack.getLogs().getStdout().size());
            assertEquals("written-to-pvc", readBack.getLogs().getStdout().get(0).getText());

            // Step 4: Verify the mount path is a proper directory
            Execution dirCheck =
                    pvcSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("test -d " + containerMountPath)
                                            .build());
            assertNull(dirCheck.getError());
        } finally {
            try {
                pvcSandbox.kill();
            } catch (Exception ignored) {
            }
            pvcSandbox.close();
        }
    }

    @Test
    @Order(2)
    @DisplayName("Sandbox create with PVC named volume mount (read-only)")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testSandboxCreateWithPvcVolumeMountReadOnly() {
        String pvcVolumeName = "opensandbox-e2e-pvc-test";
        String containerMountPath = "/mnt/pvc-data-ro";

        Volume volume =
                Volume.builder()
                        .name("test-pvc-vol-ro")
                        .pvc(PVC.of(pvcVolumeName))
                        .mountPath(containerMountPath)
                        .readOnly(true)
                        .build();

        Sandbox roSandbox =
                Sandbox.builder()
                        .connectionConfig(sharedConnectionConfig)
                        .image(getSandboxImage())
                        .timeout(Duration.ofMinutes(2))
                        .readyTimeout(Duration.ofSeconds(60))
                        .volume(volume)
                        .build();

        try {
            assertTrue(roSandbox.isHealthy(), "Read-only PVC volume sandbox should be healthy");

            // Step 1: Verify the marker file is readable
            Execution readMarker =
                    roSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("cat " + containerMountPath + "/marker.txt")
                                            .build());
            assertNull(readMarker.getError(), "Failed to read marker file on read-only PVC mount");
            assertEquals(1, readMarker.getLogs().getStdout().size());
            assertEquals("pvc-marker-data", readMarker.getLogs().getStdout().get(0).getText());

            // Step 2: Verify writing is denied on read-only mount
            Execution writeResult =
                    roSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command(
                                                    "touch "
                                                            + containerMountPath
                                                            + "/should-fail.txt")
                                            .build());
            assertNotNull(writeResult.getError(), "Write should fail on read-only PVC mount");
        } finally {
            try {
                roSandbox.kill();
            } catch (Exception ignored) {
            }
            roSandbox.close();
        }
    }

    @Test
    @Order(2)
    @DisplayName("Sandbox create with PVC named volume subPath mount")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testSandboxCreateWithPvcVolumeMountSubPath() {
        String pvcVolumeName = "opensandbox-e2e-pvc-test";
        String containerMountPath = "/mnt/train";

        Volume volume =
                Volume.builder()
                        .name("test-pvc-subpath")
                        .pvc(PVC.of(pvcVolumeName))
                        .mountPath(containerMountPath)
                        .readOnly(false)
                        .subPath("datasets/train")
                        .build();

        Sandbox subpathSandbox =
                Sandbox.builder()
                        .connectionConfig(sharedConnectionConfig)
                        .image(getSandboxImage())
                        .timeout(Duration.ofMinutes(2))
                        .readyTimeout(Duration.ofSeconds(60))
                        .volume(volume)
                        .build();

        try {
            assertTrue(subpathSandbox.isHealthy(), "PVC subPath sandbox should be healthy");

            // Step 1: Verify the subpath marker file is readable
            Execution readMarker =
                    subpathSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("cat " + containerMountPath + "/marker.txt")
                                            .build());
            assertNull(readMarker.getError(), "Failed to read subpath marker file");
            assertEquals(1, readMarker.getLogs().getStdout().size());
            assertEquals("pvc-subpath-marker", readMarker.getLogs().getStdout().get(0).getText());

            // Step 2: Verify only subPath contents are visible (not the full volume)
            Execution lsResult =
                    subpathSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("ls " + containerMountPath + "/")
                                            .build());
            assertNull(lsResult.getError());
            String lsOutput =
                    lsResult.getLogs().getStdout().stream()
                            .map(m -> m.getText())
                            .reduce("", (a, b) -> a + "\n" + b);
            assertTrue(lsOutput.contains("marker.txt"), "Should contain marker.txt");
            assertFalse(lsOutput.contains("datasets"), "Should not contain datasets dir");

            // Step 3: Write a file and verify
            Execution writeResult =
                    subpathSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command(
                                                    "echo 'subpath-write-test' > "
                                                            + containerMountPath
                                                            + "/output.txt")
                                            .build());
            assertNull(writeResult.getError(), "Failed to write file to PVC subPath");

            Execution readBack =
                    subpathSandbox
                            .commands()
                            .run(
                                    RunCommandRequest.builder()
                                            .command("cat " + containerMountPath + "/output.txt")
                                            .build());
            assertNull(readBack.getError());
            assertEquals(1, readBack.getLogs().getStdout().size());
            assertEquals("subpath-write-test", readBack.getLogs().getStdout().get(0).getText());
        } finally {
            try {
                subpathSandbox.kill();
            } catch (Exception ignored) {
            }
            subpathSandbox.close();
        }
    }

    // ==========================================
    // Command Execution Tests
    // ==========================================

    @Test
    @Order(3)
    @DisplayName("Command execution: success, cwd, background, failure")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testBasicCommandExecution() {
        assertNotNull(sandbox);

        List<OutputMessage> stdoutMessages = Collections.synchronizedList(new ArrayList<>());
        List<OutputMessage> stderrMessages = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionResult> results = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionError> errors = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionComplete> completedEvents = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionInit> initEvents = Collections.synchronizedList(new ArrayList<>());

        ExecutionHandlers handlers =
                ExecutionHandlers.builder()
                        .onStdout(
                                (OutputMessage msg) -> {
                                    stdoutMessages.add(msg);
                                    logger.info("Stdout: {}", msg.getText());
                                })
                        .onStderr(
                                (OutputMessage msg) -> {
                                    stderrMessages.add(msg);
                                    logger.warn("Stderr: {}", msg.getText());
                                })
                        .onResult(
                                (ExecutionResult result) -> {
                                    results.add(result);
                                })
                        .onExecutionComplete(
                                (ExecutionComplete complete) -> {
                                    completedEvents.add(complete);
                                })
                        .onError(
                                (ExecutionError error) -> {
                                    errors.add(error);
                                })
                        .onInit(
                                (ExecutionInit init) -> {
                                    initEvents.add(init);
                                })
                        .build();

        RunCommandRequest echoRequest =
                RunCommandRequest.builder()
                        .command("echo 'Hello OpenSandbox E2E'")
                        .handlers(handlers)
                        .build();
        Execution echoResult = sandbox.commands().run(echoRequest);

        assertNotNull(echoResult);
        assertNotNull(echoResult.getId());
        assertFalse(echoResult.getId().isBlank());
        assertNull(echoResult.getError());
        assertEquals(1, echoResult.getLogs().getStdout().size());
        assertEquals("Hello OpenSandbox E2E", echoResult.getLogs().getStdout().get(0).getText());
        assertFalse(echoResult.getLogs().getStdout().get(0).isError());
        assertRecentTimestampMs(echoResult.getLogs().getStdout().get(0).getTimestamp(), 60_000);
        assertEquals(0, echoResult.getLogs().getStderr().size());

        assertTerminalEventContract(initEvents, completedEvents, errors, echoResult.getId());
        assertEquals(1, stdoutMessages.size());
        assertEquals("Hello OpenSandbox E2E", stdoutMessages.get(0).getText());
        assertFalse(stdoutMessages.get(0).isError());
        assertRecentTimestampMs(stdoutMessages.get(0).getTimestamp(), 60_000);
        assertTrue(stderrMessages.isEmpty());

        RunCommandRequest pwdRequest =
                RunCommandRequest.builder().command("pwd").workingDirectory("/tmp").build();

        Execution pwdResult = sandbox.commands().run(pwdRequest);
        assertNotNull(pwdResult);
        assertNotNull(pwdResult.getId());
        assertNull(pwdResult.getError());
        assertEquals(1, pwdResult.getLogs().getStdout().size());
        assertEquals("/tmp", pwdResult.getLogs().getStdout().get(0).getText());
        assertFalse(pwdResult.getLogs().getStdout().get(0).isError());
        assertRecentTimestampMs(pwdResult.getLogs().getStdout().get(0).getTimestamp(), 60_000);

        long startTime = System.currentTimeMillis();
        RunCommandRequest backgroundRequest =
                RunCommandRequest.builder().command("sleep 30").background(true).build();

        sandbox.commands().run(backgroundRequest);
        long endTime = System.currentTimeMillis();

        long executionTime = endTime - startTime;
        assertTrue(
                executionTime < 10000,
                String.format(
                        "Background command should return quickly, but took %d ms", executionTime));

        // Failure case: contract error OR complete (mutually exclusive) and error must be present.
        stdoutMessages.clear();
        stderrMessages.clear();
        results.clear();
        errors.clear();
        completedEvents.clear();
        initEvents.clear();
        RunCommandRequest failRequest =
                RunCommandRequest.builder()
                        .command("nonexistent-command-that-does-not-exist")
                        .handlers(handlers)
                        .build();
        Execution failResult = sandbox.commands().run(failRequest);
        assertNotNull(failResult);
        assertNotNull(failResult.getId());
        assertFalse(failResult.getId().isBlank());
        assertNotNull(failResult.getError());
        assertEquals("CommandExecError", failResult.getError().getName());
        assertTrue(failResult.getLogs().getStderr().size() > 0);
        assertTrue(
                failResult.getLogs().getStderr().stream()
                        .anyMatch(
                                m ->
                                        m.getText()
                                                .contains(
                                                        "nonexistent-command-that-does-not-exist")));
        assertTrue(failResult.getLogs().getStderr().stream().allMatch(OutputMessage::isError));
        assertRecentTimestampMs(failResult.getLogs().getStderr().get(0).getTimestamp(), 60_000);

        assertTerminalEventContract(initEvents, completedEvents, errors, failResult.getId());
        assertTrue(completedEvents.isEmpty(), "Failing command should not emit completion event");
    }

    @Test
    @Order(4)
    @DisplayName("Command execution with env injection")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testRunCommandWithEnvInjection() {
        assertNotNull(sandbox);

        String envKey = "OPEN_SANDBOX_E2E_CMD_ENV";
        String envValue = "env-ok-" + System.currentTimeMillis();
        String probeCommand =
                "sh -c 'if [ -z \"${"
                        + envKey
                        + "-}\" ]; then echo \"__EMPTY__\"; else echo \"${"
                        + envKey
                        + "}\"; fi'";

        // Baseline: variable should be empty when not injected.
        Execution baseline =
                sandbox.commands().run(RunCommandRequest.builder().command(probeCommand).build());
        assertNotNull(baseline);
        assertNull(baseline.getError());
        String baselineOutput =
                baseline.getLogs().getStdout().stream()
                        .map(OutputMessage::getText)
                        .reduce("", (a, b) -> a.isEmpty() ? b : a + "\n" + b)
                        .trim();
        assertEquals("__EMPTY__", baselineOutput);

        // Inject env vars for this command and verify visibility.
        Execution injected =
                sandbox.commands()
                        .run(
                                RunCommandRequest.builder()
                                        .command(probeCommand)
                                        .env(envKey, envValue)
                                        .env("OPEN_SANDBOX_E2E_SECOND_ENV", "second-ok")
                                        .build());
        assertNotNull(injected);
        assertNull(injected.getError());
        String injectedOutput =
                injected.getLogs().getStdout().stream()
                        .map(OutputMessage::getText)
                        .reduce("", (a, b) -> a.isEmpty() ? b : a + "\n" + b)
                        .trim();
        assertEquals(envValue, injectedOutput);
    }

    // ==========================================
    // Filesystem Operations Tests
    // ==========================================

    @Test
    @Order(4)
    @DisplayName("Command status + background logs")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testCommandStatusAndLogs() throws Exception {
        assertNotNull(sandbox);

        RunCommandRequest backgroundRequest =
                RunCommandRequest.builder()
                        .command("sh -c 'echo log-line-1; echo log-line-2; sleep 2'")
                        .background(true)
                        .build();
        Execution exec = sandbox.commands().run(backgroundRequest);
        assertNotNull(exec.getId());
        String commandId = exec.getId();

        CommandStatus status = sandbox.commands().getCommandStatus(commandId);
        String statusId = status.getId();
        Boolean runningValue = status.getRunning();
        assertEquals(commandId, statusId);
        assertNotNull(runningValue);

        StringBuilder logsText = new StringBuilder();
        Long cursor = null;
        for (int i = 0; i < 20; i++) {
            CommandLogs logs = sandbox.commands().getBackgroundCommandLogs(commandId, cursor);
            String content = logs.getContent();
            cursor = logs.getCursor();
            logsText.append(content);
            if (logsText.toString().contains("log-line-2")) {
                break;
            }
            Thread.sleep(1000);
        }

        assertTrue(logsText.toString().contains("log-line-1"));
        assertTrue(logsText.toString().contains("log-line-2"));
    }

    @Test
    @Order(5)
    @DisplayName("Filesystem operations: CRUD + replace/move/delete + mtime checks")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testBasicFilesystemOperations() {
        assertNotNull(sandbox);
        String testDir1 = "/tmp/fs_test1_" + System.currentTimeMillis();
        String testDir2 = "/tmp/fs_test2_" + System.currentTimeMillis();

        WriteEntry dirEntry1 = WriteEntry.builder().path(testDir1).mode(755).build();
        WriteEntry dirEntry2 = WriteEntry.builder().path(testDir2).mode(644).build();

        sandbox.files().createDirectories(List.of(dirEntry1, dirEntry2));

        Map<String, EntryInfo> dirInfo = sandbox.files().readFileInfo(List.of(testDir1, testDir2));
        assertEquals(testDir1, dirInfo.get(testDir1).getPath());
        assertEquals(755, dirInfo.get(testDir1).getMode());
        assertTimesClose(
                dirInfo.get(testDir1).getCreatedAt(), dirInfo.get(testDir1).getModifiedAt(), 2);

        Execution lsResult =
                sandbox.commands()
                        .run(
                                RunCommandRequest.builder()
                                        .command("ls -la |grep fs_test")
                                        .workingDirectory("/tmp")
                                        .build());

        assertEquals(2, lsResult.getLogs().getStdout().size());

        String testFile1 = testDir1 + "/test_file1.txt";
        String testFile2 = testDir1 + "/test_file2.txt";
        String testFile3 = testDir1 + "/test_file3.txt";
        String testContent = "Hello Filesystem!\nLine 2 with special chars: åäö\nLine 3";

        WriteEntry writeEntry1 =
                WriteEntry.builder().path(testFile1).data(testContent).mode(644).build();
        WriteEntry writeEntry2 =
                WriteEntry.builder()
                        .path(testFile2)
                        .data(testContent.getBytes(StandardCharsets.UTF_8))
                        .mode(755)
                        .build();
        WriteEntry writeEntry3 =
                WriteEntry.builder()
                        .path(testFile3)
                        .data(
                                new ByteArrayInputStream(
                                        testContent.getBytes(StandardCharsets.UTF_8)))
                        .group("nogroup")
                        .owner("nobody")
                        .mode(755)
                        .build();

        sandbox.files().write(List.of(writeEntry1, writeEntry2, writeEntry3));

        String readContent1 =
                sandbox.files().readFile(testFile1, StandardCharsets.UTF_8.name(), null);
        String readContent1Partial =
                sandbox.files().readFile(testFile1, StandardCharsets.UTF_8.name(), "bytes=0-9");

        byte[] readBytes2 = sandbox.files().readByteArray(testFile2, null);
        String readContent2 = new String(readBytes2, StandardCharsets.UTF_8);

        try (java.io.InputStream inputStream = sandbox.files().readStream(testFile3, null)) {
            byte[] streamBytes = inputStream.readAllBytes();
            String readContent3 = new String(streamBytes, StandardCharsets.UTF_8);

            // Verify content matches original for all files
            assertEquals(testContent, readContent1, "Content of testFile1 should match");
            assertEquals(testContent, readContent2, "Content of testFile2 should match");
            assertEquals(testContent, readContent3, "Content of testFile3 should match");

            // Verify partial read works correctly
            assertEquals(
                    testContent.substring(0, 10),
                    readContent1Partial,
                    "Partial read should match first 10 characters");
        } catch (java.io.IOException e) {
            throw new RuntimeException("Failed to read stream", e);
        }

        List<String> allTestFiles = List.of(testFile1, testFile2, testFile3);
        Map<String, EntryInfo> fileInfoMap = sandbox.files().readFileInfo(allTestFiles);
        long expectedSize = testContent.getBytes(StandardCharsets.UTF_8).length;

        EntryInfo fileInfo1 = fileInfoMap.get(testFile1);
        assertNotNull(fileInfo1, "FileInfo for testFile1 should not be null");
        assertEquals(testFile1, fileInfo1.getPath());
        assertEquals(expectedSize, fileInfo1.getSize(), "File1 size should match content length");
        assertEquals(644, fileInfo1.getMode(), "File1 mode should be 644");
        assertNotNull(fileInfo1.getOwner(), "File1 owner should not be null");
        assertNotNull(fileInfo1.getGroup(), "File1 group should not be null");
        assertTimesClose(fileInfo1.getCreatedAt(), fileInfo1.getModifiedAt(), 2);

        EntryInfo fileInfo2 = fileInfoMap.get(testFile2);
        assertNotNull(fileInfo2, "FileInfo for testFile2 should not be null");
        assertEquals(testFile2, fileInfo2.getPath());
        assertEquals(expectedSize, fileInfo2.getSize(), "File2 size should match content length");
        assertEquals(755, fileInfo2.getMode(), "File2 mode should be 755");
        assertNotNull(fileInfo2.getOwner(), "File2 owner should not be null");
        assertNotNull(fileInfo2.getGroup(), "File2 group should not be null");
        assertTimesClose(fileInfo2.getCreatedAt(), fileInfo2.getModifiedAt(), 2);

        EntryInfo fileInfo3 = fileInfoMap.get(testFile3);
        assertNotNull(fileInfo3, "FileInfo for testFile3 should not be null");
        assertEquals(testFile3, fileInfo3.getPath());
        assertEquals(expectedSize, fileInfo3.getSize(), "File3 size should match content length");
        assertEquals(755, fileInfo3.getMode(), "File3 mode should be 755");
        assertEquals("nobody", fileInfo3.getOwner(), "File3 owner should be nobody");
        assertEquals("nogroup", fileInfo3.getGroup(), "File3 group should be nogroup");
        assertTimesClose(fileInfo3.getCreatedAt(), fileInfo3.getModifiedAt(), 2);

        SearchEntry searchAllEntry = SearchEntry.builder().path(testDir1).pattern("*").build();
        Set<String> found = new HashSet<>();
        for (EntryInfo e : sandbox.files().search(searchAllEntry)) {
            found.add(e.getPath());
        }
        assertEquals(Set.of(testFile1, testFile2, testFile3), found);

        SetPermissionEntry permEntry1 =
                SetPermissionEntry.builder()
                        .path(testFile1)
                        .mode(755)
                        .owner("nobody")
                        .group("nogroup")
                        .build();
        SetPermissionEntry permEntry2 =
                SetPermissionEntry.builder()
                        .path(testFile2)
                        .mode(600)
                        .owner("nobody")
                        .group("nogroup")
                        .build();
        sandbox.files().setPermissions(List.of(permEntry1, permEntry2));

        // Verify permission changes for both files in single call
        Map<String, EntryInfo> updatedInfoMap =
                sandbox.files().readFileInfo(List.of(testFile1, testFile2));
        EntryInfo updatedInfo1 = updatedInfoMap.get(testFile1);
        EntryInfo updatedInfo2 = updatedInfoMap.get(testFile2);

        assertNotNull(updatedInfo1, "Updated info for testFile1 should not be null");
        assertEquals(755, updatedInfo1.getMode(), "testFile1 mode should be updated to 755");
        assertEquals(
                "nobody", updatedInfo1.getOwner(), "testFile1 owner should be updated to nobody");
        assertEquals(
                "nogroup", updatedInfo1.getGroup(), "testFile1 group should be updated to nogroup");

        assertNotNull(updatedInfo2, "Updated info for testFile2 should not be null");
        assertEquals(600, updatedInfo2.getMode(), "testFile2 mode should be updated to 600");
        assertEquals(
                "nobody", updatedInfo2.getOwner(), "testFile2 owner should be updated to nobody");
        assertEquals(
                "nogroup", updatedInfo2.getGroup(), "testFile2 group should be updated to nogroup");

        EntryInfo beforeUpdate = sandbox.files().readFileInfo(List.of(testFile1)).get(testFile1);
        String updatedContent1 = testContent + "\nAppended line to file1";
        String updatedContent2 = testContent + "\nAppended line to file2";
        try {
            Thread.sleep(50);
        } catch (InterruptedException ignored) {
        }
        WriteEntry updateEntry1 =
                WriteEntry.builder().path(testFile1).data(updatedContent1).mode(644).build();
        WriteEntry updateEntry2 =
                WriteEntry.builder().path(testFile2).data(updatedContent2).mode(755).build();
        sandbox.files().write(List.of(updateEntry1, updateEntry2));

        String newContent1 = sandbox.files().readFile(testFile1, "UTF-8", null);
        String newContent2 = sandbox.files().readFile(testFile2, "UTF-8", null);
        assertEquals(updatedContent1, newContent1);
        assertEquals(updatedContent2, newContent2);

        EntryInfo afterUpdate = sandbox.files().readFileInfo(List.of(testFile1)).get(testFile1);
        assertEquals(
                updatedContent1.getBytes(StandardCharsets.UTF_8).length, afterUpdate.getSize());
        assertModifiedUpdated(beforeUpdate.getModifiedAt(), afterUpdate.getModifiedAt(), 1, 1000);

        // Replace contents
        EntryInfo beforeReplace = afterUpdate;
        try {
            Thread.sleep(50);
        } catch (InterruptedException ignored) {
        }
        sandbox.files()
                .replaceContents(
                        List.of(
                                ContentReplaceEntry.builder()
                                        .path(testFile1)
                                        .oldContent("Appended line to file1")
                                        .newContent("Replaced line in file1")
                                        .build()));
        String replaced = sandbox.files().readFile(testFile1, "UTF-8", null);
        assertTrue(replaced.contains("Replaced line in file1"));
        assertFalse(replaced.contains("Appended line to file1"));
        EntryInfo afterReplace = sandbox.files().readFileInfo(List.of(testFile1)).get(testFile1);
        assertModifiedUpdated(beforeReplace.getModifiedAt(), afterReplace.getModifiedAt(), 1, 1000);

        // Move file3
        String movedPath = testDir2 + "/moved_file3.txt";
        sandbox.files()
                .moveFiles(List.of(MoveEntry.builder().src(testFile3).dest(movedPath).build()));
        String moved =
                new String(sandbox.files().readByteArray(movedPath, null), StandardCharsets.UTF_8);
        assertEquals(testContent, moved);
        assertThrows(Exception.class, () -> sandbox.files().readByteArray(testFile3, null));

        // Delete file2
        sandbox.files().deleteFiles(List.of(testFile2));
        assertThrows(Exception.class, () -> sandbox.files().readFile(testFile2, "UTF-8", null));
        Set<String> after = new HashSet<>();
        for (EntryInfo e :
                sandbox.files().search(SearchEntry.builder().path(testDir1).pattern("*").build())) {
            after.add(e.getPath());
        }
        assertEquals(Set.of(testFile1), after);

        // Delete directories
        sandbox.files().deleteDirectories(List.of(testDir1, testDir2));
        Execution verify =
                sandbox.commands()
                        .run(
                                RunCommandRequest.builder()
                                        .command(
                                                "test ! -d "
                                                        + testDir1
                                                        + " && test ! -d "
                                                        + testDir2
                                                        + " && echo OK")
                                        .workingDirectory("/tmp")
                                        .build());
        assertNull(verify.getError());
        assertEquals(1, verify.getLogs().getStdout().size());
        assertEquals("OK", verify.getLogs().getStdout().get(0).getText());
    }

    @Test
    @Order(6)
    @DisplayName("Interrupt command")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testInterruptCommand() throws Exception {
        assertNotNull(sandbox);

        List<ExecutionInit> initEvents = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionComplete> completedEvents = Collections.synchronizedList(new ArrayList<>());
        List<ExecutionError> errors = Collections.synchronizedList(new ArrayList<>());
        CountDownLatch initLatch = new CountDownLatch(1);

        ExecutionHandlers handlers =
                ExecutionHandlers.builder()
                        .onInit(
                                (ExecutionInit init) -> {
                                    initEvents.add(init);
                                    initLatch.countDown();
                                })
                        .onExecutionComplete(completedEvents::add)
                        .onError(errors::add)
                        .build();

        ExecutorService ex = Executors.newSingleThreadExecutor();
        long start = System.currentTimeMillis();
        Future<Execution> future =
                ex.submit(
                        () ->
                                sandbox.commands()
                                        .run(
                                                RunCommandRequest.builder()
                                                        .command("sleep 30")
                                                        .handlers(handlers)
                                                        .build()));
        assertTrue(initLatch.await(15, TimeUnit.SECONDS), "did not receive init event");
        assertEquals(1, initEvents.size());
        String id = initEvents.get(0).getId();
        assertNotNull(id);
        Thread.sleep(2000);
        sandbox.commands().interrupt(id);
        Execution result = future.get(30, TimeUnit.SECONDS);
        long elapsed = System.currentTimeMillis() - start;
        assertNotNull(result);
        assertEquals(id, result.getId());
        assertTrue(elapsed < 20_000, "Interrupted command took too long: " + elapsed + "ms");
        assertTrue((!completedEvents.isEmpty()) ^ (!errors.isEmpty()));
        assertTrue(result.getError() != null || !result.getLogs().getStderr().isEmpty());
        ex.shutdownNow();
    }

    @Test
    @Order(7)
    @DisplayName("Sandbox Pause Operation")
    @Timeout(value = 5, unit = TimeUnit.MINUTES)
    void testSandboxPause() throws InterruptedException {
        assertNotNull(sandbox);

        Thread.sleep(20000);
        sandbox.pause();

        int pollCount = 0;
        SandboxStatus finalStatus = null;

        while (pollCount < 300) {
            Thread.sleep(1000);
            pollCount++;

            SandboxInfo info = sandbox.getInfo();
            SandboxStatus currentStatus = info.getStatus();
            if ("Pausing".equals(currentStatus.getState())) {
                continue;
            }
            finalStatus = currentStatus;
            break;
        }

        assertNotNull(finalStatus, "Failed to get final status after resume operation");
        assertEquals("Paused", finalStatus.getState(), "Sandbox should be in Paused state");

        // pause => unhealthy
        boolean healthy = true;
        for (int i = 0; i < 10; i++) {
            healthy = sandbox.isHealthy();
            if (!healthy) break;
            Thread.sleep(500);
        }
        assertFalse(healthy, "Sandbox should be unhealthy after pause");
    }

    @Test
    @Order(8)
    @DisplayName("Sandbox Resume Operation")
    @Timeout(value = 3, unit = TimeUnit.MINUTES)
    void testSandboxResume() throws InterruptedException {
        assertNotNull(sandbox);

        Sandbox resumedSandbox =
                Sandbox.resumer()
                        .sandboxId(sandbox.getId())
                        .connectionConfig(sharedConnectionConfig)
                        .resumeTimeout(Duration.ofMinutes(1))
                        .healthCheckPollingInterval(Duration.ofSeconds(1))
                        .resume();

        SandboxStatus status = resumedSandbox.getInfo().getStatus();

        assertNotNull(status, "Failed to get final status after resume operation");
        assertEquals("Running", status.getState());

        boolean healthy = false;
        for (int i = 0; i < 30; i++) {
            healthy = sandbox.isHealthy();
            if (healthy) break;
            Thread.sleep(1000);
        }
        assertTrue(healthy, "Sandbox should be healthy after resume");
    }

    @Test
    @Order(9)
    @DisplayName("X-Request-ID passthrough on server error")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testXRequestIdPassthroughOnServerError() {
        String requestId = "e2e-java-server-" + System.currentTimeMillis();
        String missingSandboxId = "missing-" + requestId;

        ConnectionConfig cfg =
                ConnectionConfig.builder()
                        .apiKey(sharedConnectionConfig.getApiKey())
                        .domain(sharedConnectionConfig.getDomain())
                        .protocol(sharedConnectionConfig.getProtocol())
                        .requestTimeout(sharedConnectionConfig.getRequestTimeout())
                        .headers(Map.of("X-Request-ID", requestId))
                        .build();

        SandboxApiException ex =
                assertThrows(
                        SandboxApiException.class,
                        () -> {
                            Sandbox connected =
                                    Sandbox.connector()
                                            .connectionConfig(cfg)
                                            .sandboxId(missingSandboxId)
                                            .connect();
                            try {
                                connected.getInfo();
                            } finally {
                                connected.close();
                            }
                        });
        assertEquals(requestId, ex.getRequestId());
    }
}


================================================
FILE: tests/java/src/test/java/com/alibaba/opensandbox/e2e/SandboxManagerE2ETest.java
================================================
/*
 * Copyright 2025 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.opensandbox.e2e;

import static org.junit.jupiter.api.Assertions.*;

import com.alibaba.opensandbox.sandbox.Sandbox;
import com.alibaba.opensandbox.sandbox.SandboxManager;
import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException;
import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.*;
import java.time.Duration;
import java.util.*;
import java.util.concurrent.TimeUnit;
import org.junit.jupiter.api.*;

/**
 * E2E tests for SandboxManager list/filter semantics.
 *
 * <p>Focus:
 *
 * <ul>
 *   <li>states filter uses OR logic
 *   <li>metadata filter uses AND logic
 * </ul>
 *
 * <p>We create 3 dedicated sandboxes per run to keep assertions deterministic and avoid impacting
 * the shared sandbox used by other tests.
 */
@Tag("e2e")
@DisplayName("SandboxManager E2E Tests (Java SDK) - List/Filter Semantics")
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class SandboxManagerE2ETest extends BaseE2ETest {

    private SandboxManager sandboxManager;
    private Sandbox s1;
    private Sandbox s2;
    private Sandbox s3;
    private String tag;

    @BeforeAll
    void setup() throws InterruptedException {
        sandboxManager = SandboxManager.builder().connectionConfig(sharedConnectionConfig).build();
        tag = "e2e-sandbox-manager-" + UUID.randomUUID().toString().substring(0, 8);
        Map<String, String> resourceMap = new HashMap<>();
        resourceMap.put("cpu", "1");
        resourceMap.put("memory", "2Gi");

        s1 =
                Sandbox.builder()
                        .connectionConfig(sharedConnectionConfig)
                        .image(getSandboxImage())
                        .resource(resourceMap)
                        .timeout(Duration.ofMinutes(5))
                        .readyTimeout(Duration.ofSeconds(60))
                        .metadata(Map.of("tag", tag, "team", "t1", "env", "prod"))
                        .env("E2E_TEST", "true")
                        .healthCheckPollingInterval(Duration.ofMillis(500))
                        .build();
        s2 =
                Sandbox.builder()
                        .connectionConfig(sharedConnectionConfig)
                        .image(getSandboxImage())
                        .resource(resourceMap)
                        .timeout(Duration.ofMinutes(5))
                        .readyTimeout(Duration.ofSeconds(60))
                        .metadata(Map.of("tag", tag, "team", "t1", "env", "dev"))
                        .env("E2E_TEST", "true")
                        .healthCheckPollingInterval(Duration.ofMillis(500))
                        .build();
        s3 =
                Sandbox.builder()
                        .connectionConfig(sharedConnectionConfig)
                        .image(getSandboxImage())
                        .resource(resourceMap)
                        .timeout(Duration.ofMinutes(5))
                        .readyTimeout(Duration.ofSeconds(60))
                        .metadata(Map.of("tag", tag, "env", "prod"))
                        .env("E2E_TEST", "true")
                        .healthCheckPollingInterval(Duration.ofMillis(500))
                        .build();

        assertTrue(s1.isHealthy());
        assertTrue(s2.isHealthy());
        assertTrue(s3.isHealthy());

        // Pause s3 to create a deterministic non-Running state.
        sandboxManager.pauseSandbox(s3.getId());
        long deadline = System.currentTimeMillis() + 180_000;
        while (System.currentTimeMillis() < deadline) {
            SandboxInfo info = sandboxManager.getSandboxInfo(s3.getId());
            if ("Paused".equals(info.getStatus().getState())) {
                break;
            }
            Thread.sleep(1000);
        }
        assertEquals("Paused", sandboxManager.getSandboxInfo(s3.getId()).getStatus().getState());
    }

    @AfterAll
    void teardown() {
        for (Sandbox s : List.of(s1, s2, s3)) {
            if (s == null) continue;
            try {
                s.kill();
            } catch (Exception ignored) {
            }
            try {
                s.close();
            } catch (Exception ignored) {
            }
        }
        if (sandboxManager != null) {
            try {
                sandboxManager.close();
            } catch (Exception ignored) {
            }
        }
    }

    @Test
    @Order(1)
    @DisplayName("states filter uses OR semantics")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testStatesFilterOrLogic() {
        SandboxFilter filter =
                SandboxFilter.builder()
                        .states("Running", "Paused")
                        .metadata(Map.of("tag", tag))
                        .pageSize(50)
                        .build();
        PagedSandboxInfos infos = sandboxManager.listSandboxInfos(filter);
        Set<String> ids = new HashSet<>();
        for (SandboxInfo info : infos.getSandboxInfos()) {
            ids.add(info.getId());
        }
        assertTrue(ids.containsAll(Set.of(s1.getId(), s2.getId(), s3.getId())));

        PagedSandboxInfos pausedOnly =
                sandboxManager.listSandboxInfos(
                        SandboxFilter.builder()
                                .states("Paused")
                                .metadata(Map.of("tag", tag))
                                .pageSize(50)
                                .build());
        Set<String> pausedIds = new HashSet<>();
        for (SandboxInfo info : pausedOnly.getSandboxInfos()) {
            pausedIds.add(info.getId());
        }
        assertTrue(pausedIds.contains(s3.getId()));
        assertFalse(pausedIds.contains(s1.getId()));
        assertFalse(pausedIds.contains(s2.getId()));

        PagedSandboxInfos runningOnly =
                sandboxManager.listSandboxInfos(
                        SandboxFilter.builder()
                                .states("Running")
                                .metadata(Map.of("tag", tag))
                                .pageSize(50)
                                .build());
        Set<String> runningIds = new HashSet<>();
        for (SandboxInfo info : runningOnly.getSandboxInfos()) {
            runningIds.add(info.getId());
        }
        assertTrue(runningIds.contains(s1.getId()));
        assertTrue(runningIds.contains(s2.getId()));
        assertFalse(runningIds.contains(s3.getId()));
    }

    @Test
    @Order(2)
    @DisplayName("metadata filter uses AND semantics")
    @Timeout(value = 2, unit = TimeUnit.MINUTES)
    void testMetadataFilterAndLogic() {
        PagedSandboxInfos tagAndTeam =
                sandboxManager.listSandboxInfos(
                        SandboxFilter.builder()
                                .metadata(Map.of("tag", tag, "team", "t1"))
                                .pageSize(50)
                                .build());
        Set<String> ids = new HashSet<>();
        for (SandboxInfo info : tagAndTeam.getSandboxInfos()) {
            ids.add(info.getId());
        }
        assertTrue(ids.contains(s1.getId()));
        assertTrue(ids.contains(s2.getId()));
        assertFalse(ids.contains(s3.getId()));

        PagedSandboxInfos tagTeamEnv =
                sandboxManager.listSandboxInfos(
                        SandboxFilter.builder()
                                .metadata(Map.of("tag", tag, "team", "t1", "env", "prod"))
                                .pageSize(50)
                                .build());
        Set<String> ids2 = new HashSet<>();
        for (SandboxInfo info : tagTeamEnv.getSandboxInfos()) {
            ids2.add(info.getId());
        }
        assertTrue(ids2.contains(s1.getId()));
        assertFalse(ids2.contains(s2.getId()));
        assertFalse(ids2.contains(s3.getId()));

        PagedSandboxInfos tagEnv =
                sandboxManager.listSandboxInfos(
                        SandboxFilter.builder()
                                .metadata(Map.of("tag", tag, "env", "prod"))
                                .pageSize(50)
                                .build());
        Set<String> ids3 = new HashSet<>();
        for (SandboxInfo info : tagEnv.getSandboxInfos()) {
            ids3.add(info.getId());
        }
        assertTrue(ids3.contains(s1.getId()));
        assertTrue(ids3.contains(s3.getId()));
        assertFalse(ids3.contains(s2.getId()));

        PagedSandboxInfos noneMatch =
                sandboxManager.listSandboxInfos(
                        SandboxFilter.builder()
                                .metadata(Map.of("tag", tag, "team", "t2"))
                                .pageSize(50)
                                .build());
        for (SandboxInfo info : noneMatch.getSandboxInfos()) {
            assertFalse(Set.of(s1.getId(), s2.getId(), s3.getId()).contains(info.getId()));
        }
    }

    @Test
    @Order(3)
    @DisplayName("invalid operations raise SandboxException")
    @Timeout(value = 1, unit = TimeUnit.MINUTES)
    void testInvalidOperations() {
        String nonExistentId = "non-existent-" + System.nanoTime();
        assertThrows(SandboxException.class, () -> sandboxManager.getSandboxInfo(nonExistentId));
        assertThrows(SandboxException.class, () -> sandboxManager.pauseSandbox(nonExistentId));
        assertThrows(SandboxException.class, () -> sandboxManager.resumeSandbox(nonExistentId));
        assertThrows(SandboxException.class, () -> sandboxManager.killSandbox(nonExistentId));
        assertThrows(
                SandboxException.class,
                () -> sandboxManager.renewSandbox(nonExistentId, Duration.ofMinutes(5)));
    }
}


================================================
FILE: tests/java/src/test/resources/test.properties
================================================
# OpenSandbox E2E Test Configuration
# Default values for local/CI runs. Override via editing this file or by providing your own build.
opensandbox.test.domain=localhost:8080
opensandbox.test.protocol=http
opensandbox.test.api.key=e2e-test
opensandbox.sandbox.default.image=sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:latest


================================================
FILE: tests/javascript/README.md
================================================
# OpenSandbox JavaScript E2E Tests

This folder contains strict E2E tests for the JavaScript/TypeScript SDKs, aligned with `OpenSandbox/tests/python` and `OpenSandbox/tests/java`.

## Prerequisites

- Node.js (via nvm): **>= 20**
- pnpm (via corepack or global install)
- OpenSandbox server running

## Environment variables

These tests follow the same naming as Python tests:

- `OPENSANDBOX_TEST_DOMAIN` (default: `localhost:8080`)
- `OPENSANDBOX_TEST_PROTOCOL` (default: `http`)
- `OPENSANDBOX_TEST_API_KEY` (default: `e2e-test`)
- `OPENSANDBOX_SANDBOX_DEFAULT_IMAGE` (default: code-interpreter image)

## Run

```bash
cd OpenSandbox/tests/javascript

# Node >= 20 is required (SDK engines: node >= 20)
source ~/.nvm/nvm.sh
nvm use 22

# Ensure pnpm is available (repo pins pnpm@9.x)
corepack enable
corepack prepare pnpm@9.15.0 --activate

# Install test dependencies (vitest, typescript)
pnpm install

# Run tests (also builds SDKs)
pnpm test
```


================================================
FILE: tests/javascript/eslint.config.mjs
================================================
import js from "@eslint/js";
import tseslint from "typescript-eslint";

export default tseslint.config(
  {
    ignores: ["node_modules/**", "build/**", "**/*.d.ts"],
  },
  js.configs.recommended,
  ...tseslint.configs.recommended,
  {
    files: ["**/*.ts"],
    languageOptions: {
      parserOptions: {
        // Keep tests lint lightweight: do not require type-aware linting.
        // This avoids needing to include tool configs (e.g. vitest.config.ts) in tsconfig.
      },
      globals: {
        console: "readonly",
        process: "readonly",
        setTimeout: "readonly",
        clearTimeout: "readonly",
      },
    },
    rules: {
      "@typescript-eslint/no-explicit-any": "off",
      "@typescript-eslint/no-unused-vars": ["error", { argsIgnorePattern: "^_", varsIgnorePattern: "^_" }],
    },
  },
);


================================================
FILE: tests/javascript/package.json
================================================
{
  "name": "opensandbox-javascript-e2e-tests",
  "version": "1.0.0",
  "private": true,
  "type": "module",
  "packageManager": "pnpm@9.15.0",
  "scripts": {
    "pretest": "pnpm install --prefer-offline",
    "prep:sdk": "pnpm -C ../../sdks install --prefer-offline && pnpm -C ../../sdks run build:js",
    "lint": "eslint . --max-warnings 0",
    "test": "pnpm run prep:sdk && pnpm exec vitest run",
    "pretest:ci": "pnpm install --prefer-offline",
    "test:ci": "pnpm run prep:sdk && pnpm exec vitest run --reporter=default --reporter=junit --outputFile=build/test-results/junit.xml"
  },
  "dependencies": {
    "@alibaba-group/opensandbox": "link:../../sdks/sandbox/javascript",
    "@alibaba-group/opensandbox-code-interpreter": "link:../../sdks/code-interpreter/javascript"
  },
  "devDependencies": {
    "@eslint/js": "^9.39.2",
    "@types/node": "^20.11.30",
    "eslint": "^9.39.2",
    "typescript": "^5.7.2",
    "typescript-eslint": "^8.52.0",
    "vitest": "^2.1.9"
  }
}


================================================
FILE: tests/javascript/tests/base_e2e.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { ConnectionConfig } from "@alibaba-group/opensandbox";

export const DEFAULT_DOMAIN = "localhost:8080";
export const DEFAULT_PROTOCOL = "http";
export const DEFAULT_API_KEY = "e2e-test";
export const DEFAULT_IMAGE =
  "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:latest";

export const TEST_DOMAIN = process.env.OPENSANDBOX_TEST_DOMAIN ?? DEFAULT_DOMAIN;
export const TEST_PROTOCOL = process.env.OPENSANDBOX_TEST_PROTOCOL ?? DEFAULT_PROTOCOL;
export const TEST_API_KEY = process.env.OPENSANDBOX_TEST_API_KEY ?? DEFAULT_API_KEY;
export const TEST_IMAGE = process.env.OPENSANDBOX_SANDBOX_DEFAULT_IMAGE ?? DEFAULT_IMAGE;

export function getSandboxImage(): string {
  return TEST_IMAGE;
}

export function createConnectionConfig(useServerProxy = false): ConnectionConfig {
  return new ConnectionConfig({
    domain: TEST_DOMAIN,
    protocol: TEST_PROTOCOL === "https" ? "https" : "http",
    apiKey: TEST_API_KEY,
    requestTimeoutSeconds: 180,
    useServerProxy
  });
}

export function nowMs(): number {
  return Date.now();
}

export function assertRecentTimestampMs(ts: number, toleranceMs = 180_000): void {
  if (typeof ts !== "number" || ts <= 0) throw new Error(`invalid timestamp: ${ts}`);
  const delta = Math.abs(nowMs() - ts);
  if (delta > toleranceMs) {
    throw new Error(`timestamp too far from now: delta=${delta}ms (ts=${ts})`);
  }
}

export function assertEndpointHasPort(endpoint: string, expectedPort: number): void {
  if (!endpoint) throw new Error("endpoint is empty");
  if (endpoint.includes("://")) throw new Error(`unexpected scheme in endpoint: ${endpoint}`);

  if (endpoint.includes("/")) {
    if (!endpoint.endsWith(`/${expectedPort}`)) {
      throw new Error(`endpoint route must end with /${expectedPort}: ${endpoint}`);
    }
    const domain = endpoint.split("/", 1)[0];
    if (!domain) throw new Error(`missing domain in endpoint: ${endpoint}`);
    return;
  }

  const idx = endpoint.lastIndexOf(":");
  if (idx < 0) throw new Error(`missing :port in endpoint: ${endpoint}`);
  const host = endpoint.slice(0, idx);
  const port = endpoint.slice(idx + 1);
  if (!host) throw new Error(`missing host in endpoint: ${endpoint}`);
  if (!/^\d+$/.test(port)) throw new Error(`non-numeric port in endpoint: ${endpoint}`);
  if (Number(port) !== expectedPort) throw new Error(`endpoint port mismatch: ${endpoint} != :${expectedPort}`);
}


================================================
FILE: tests/javascript/tests/test_code_interpreter_e2e.test.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { afterAll, beforeAll, beforeEach, expect, test } from "vitest";

import { Sandbox, type ExecutionHandlers } from "@alibaba-group/opensandbox";

import {
  CodeInterpreter,
  SupportedLanguages,
} from "@alibaba-group/opensandbox-code-interpreter";

import {
  assertEndpointHasPort,
  assertRecentTimestampMs,
  createConnectionConfig,
  getSandboxImage,
} from "./base_e2e.ts";

let sandbox: Sandbox | null = null;
let ci: CodeInterpreter | null = null;

// ---------------------------------------------------------------------------
// Helpers: sandbox lifecycle & retry
// ---------------------------------------------------------------------------

function sandboxCreateOptions() {
  return {
    connectionConfig: createConnectionConfig(),
    image: getSandboxImage(),
    entrypoint: ["/opt/opensandbox/code-interpreter.sh"],
    timeoutSeconds: 15 * 60,
    readyTimeoutSeconds: 60,
    metadata: { tag: "e2e-code-interpreter" },
    env: {
      E2E_TEST: "true",
      GO_VERSION: "1.25",
      JAVA_VERSION: "21",
      NODE_VERSION: "22",
      PYTHON_VERSION: "3.12",
      EXECD_LOG_FILE: "/tmp/opensandbox-e2e/logs/execd.log",
    },
    healthCheckPollingInterval: 200,
    volumes: [
      {
        name: "execd-log",
        host: { path: "/tmp/opensandbox-e2e/logs" },
        mountPath: "/tmp/opensandbox-e2e/logs",
        readOnly: false,
      },
    ],
  };
}

async function recreateSandbox() {
  if (sandbox) {
    try {
      await sandbox.kill();
    } catch {
      /* ignore */
    }
  }
  sandbox = await Sandbox.create(sandboxCreateOptions());
  ci = await CodeInterpreter.create(sandbox);
}

/** Check sandbox health; recreate if dead. */
async function ensureSandboxAlive() {
  if (sandbox && ci) {
    try {
      if (await sandbox.isHealthy()) return;
    } catch {
      /* health-check failed */
    }
  }
  console.warn("  ensureSandboxAlive: sandbox unhealthy — recreating …");
  await recreateSandbox();
}

function isRetryableError(err: unknown): boolean {
  const msg = String(err);
  return (
    msg.includes("terminated") ||
    msg.includes("other side closed") ||
    msg.includes("fetch failed") ||
    msg.includes("session is busy") ||
    msg.includes("UND_ERR_SOCKET")
  );
}

function sleep(ms: number) {
  return new Promise((resolve) => setTimeout(resolve, ms));
}

/**
 * Retry an async operation up to ``maxRetries`` times.  On retryable socket /
 * session errors the sandbox is health-checked (and recreated if dead) before
 * the next attempt.
 */
async function withRetry<T>(
  fn: () => Promise<T>,
  maxRetries = 2,
  delayMs = 3000,
): Promise<T> {
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
    try {
      return await fn();
    } catch (err) {
      if (!isRetryableError(err) || attempt === maxRetries) throw err;
      console.warn(
        `  withRetry: attempt ${attempt + 1} failed, retrying in ${delayMs}ms …`,
        String(err).slice(0, 120),
      );
      await sleep(delayMs);
      await ensureSandboxAlive();
    }
  }
  throw new Error("unreachable");
}

// ---------------------------------------------------------------------------
// Setup / teardown
// ---------------------------------------------------------------------------

beforeAll(async () => {
  await recreateSandbox();
}, 10 * 60_000);

beforeEach(async () => {
  await ensureSandboxAlive();
}, 5 * 60_000);

afterAll(async () => {
  if (!sandbox) return;
  try {
    await sandbox.kill();
  } catch {
    // ignore
  }
}, 5 * 60_000);

test("01 creation and basic functionality", async () => {
  if (!sandbox || !ci) throw new Error("not initialized");

  expect(ci.id).toBe(sandbox.id);
  expect(await sandbox.isHealthy()).toBe(true);

  const info = await sandbox.getInfo();
  expect(info.status.state).toBe("Running");

  const ep = await sandbox.getEndpoint(44772);
  assertEndpointHasPort(ep.endpoint, 44772);

  const metrics = await sandbox.getMetrics();
  assertRecentTimestampMs(metrics.timestamp);
});

test("01b context management: get/list/delete/deleteContexts", async () => {
  if (!ci) throw new Error("not initialized");

  const ctx = await ci.codes.createContext(SupportedLanguages.PYTHON);
  expect(ctx.id).toBeTruthy();
  expect(ctx.language).toBe("python");

  const got = await ci.codes.getContext(ctx.id!);
  expect(got.id).toBe(ctx.id);
  expect(got.language).toBe("python");

  const all = await ci.codes.listContexts();
  expect(all.some((c) => c.id === ctx.id)).toBe(true);

  const pyOnly = await ci.codes.listContexts(SupportedLanguages.PYTHON);
  expect(pyOnly.some((c) => c.id === ctx.id)).toBe(true);

  await ci.codes.deleteContext(ctx.id!);
  await expect(ci.codes.getContext(ctx.id!)).rejects.toBeTruthy();

  // Bulk cleanup should not throw.
  await ci.codes.deleteContexts(SupportedLanguages.PYTHON);
});

test("02 java code execution", async () => {
  if (!ci) throw new Error("not initialized");

  const javaCtx = await ci.codes.createContext(SupportedLanguages.JAVA);
  expect(javaCtx.id).toBeTruthy();
  expect(javaCtx.language).toBe("java");

  const stdout: string[] = [];
  const errors: string[] = [];
  const initIds: string[] = [];

  const handlers: ExecutionHandlers = {
    onStdout: (m) => {
      stdout.push(m.text);
    },
    onError: (e) => {
      errors.push(e.name);
    },
    onInit: (i) => {
      initIds.push(i.id);
    },
  };

  const r = await ci.codes.run(
    'System.out.println("Hello from Java!");\nint result = 2 + 2;\nSystem.out.println("2 + 2 = " + result);\nresult',
    { context: javaCtx, handlers }
  );
  expect(r.id).toBeTruthy();
  expect(r.error).toBeUndefined();
  const resultText = r.result[0]?.text?.trim();
  const hasResultFromStdout = stdout.some((s) => s.includes("2 + 2 = 4"));
  expect(resultText === "4" || hasResultFromStdout).toBe(true);
  expect(initIds).toHaveLength(1);
  expect(errors).toHaveLength(0);
  expect(stdout.some((s) => s.includes("Hello from Java!"))).toBe(true);

  const err = await ci.codes.run("int x = 10 / 0; // ArithmeticException", {
    context: javaCtx,
  });
  expect(err.error).toBeTruthy();
  expect(err.error?.name).toBe("EvalException");
});

test("03 python code execution + direct language + persistence", async () => {
  if (!ci) throw new Error("not initialized");

  const direct = await withRetry(() =>
    ci!.codes.run("result = 2 + 2\nresult", {
      language: SupportedLanguages.PYTHON,
    }),
  );
  expect(direct.error).toBeUndefined();
  expect(direct.result[0]?.text).toBe("4");

  // Persistence: retry the whole block as a unit so that a sandbox restart
  // mid-way gets a fresh context instead of a stale one.
  const r = await withRetry(async () => {
    const ctx = await ci!.codes.createContext(SupportedLanguages.PYTHON);
    await ci!.codes.run("x = 42", { context: ctx });
    return ci!.codes.run("result = x\nresult", { context: ctx });
  });
  expect(r.result[0]?.text).toBe("42");

  const bad = await withRetry(async () => {
    const ctx2 = await ci!.codes.createContext(SupportedLanguages.PYTHON);
    return ci!.codes.run("print(undefined_variable)", { context: ctx2 });
  });
  expect(bad.error).toBeTruthy();
});

test("04 go and typescript execution (smoke)", async () => {
  if (!ci) throw new Error("not initialized");

  const go = await withRetry(async () => {
    const goCtx = await ci!.codes.createContext(SupportedLanguages.GO);
    return ci!.codes.run(
      'package main\nimport "fmt"\nfunc main() { fmt.Print("hi"); result := 2+2; fmt.Print(result) }',
      { context: goCtx },
    );
  });
  expect(go.id).toBeTruthy();

  const ts = await withRetry(async () => {
    const tsCtx = await ci!.codes.createContext(SupportedLanguages.TYPESCRIPT);
    return ci!.codes.run(
      "console.log('Hello from TypeScript!');\nconst result: number = 2 + 2;\nresult",
      { context: tsCtx },
    );
  });
  expect(ts.id).toBeTruthy();
});

test("05 context isolation", async () => {
  if (!ci) throw new Error("not initialized");

  // Retry entire isolation block as a unit — contexts must come from the same
  // sandbox for the assertion to make sense.
  const { ok, bad } = await withRetry(async () => {
    const python1 = await ci!.codes.createContext(SupportedLanguages.PYTHON);
    const python2 = await ci!.codes.createContext(SupportedLanguages.PYTHON);
    await ci!.codes.run("secret_value1 = 'python1_secret'", {
      context: python1,
    });

    const okRes = await ci!.codes.run("result = secret_value1\nresult", {
      context: python1,
    });
    const badRes = await ci!.codes.run("result = secret_value1\nresult", {
      context: python2,
    });
    return { ok: okRes, bad: badRes };
  });

  expect(ok.error).toBeUndefined();
  expect(bad.error).toBeTruthy();
  expect(bad.error?.name).toBe("NameError");
});

test("06 concurrent execution", async () => {
  if (!ci) throw new Error("not initialized");

  // Create contexts with retry; run concurrently and tolerate partial failure.
  const py = await withRetry(() =>
    ci!.codes.createContext(SupportedLanguages.PYTHON),
  );
  const java = await withRetry(() =>
    ci!.codes.createContext(SupportedLanguages.JAVA),
  );
  const go = await withRetry(() =>
    ci!.codes.createContext(SupportedLanguages.GO),
  );

  const results = await Promise.allSettled([
    ci.codes.run(
      "import time\nfor i in range(3):\n  print(i)\n  time.sleep(0.1)",
      { context: py },
    ),
    ci.codes.run(
      "for (int i=0;i<3;i++){ System.out.println(i); try{Thread.sleep(100);}catch(Exception e){} }",
      { context: java },
    ),
    ci.codes.run(
      'package main\nimport "fmt"\nfunc main(){ for i:=0;i<3;i++{ fmt.Print(i) } }',
      { context: go },
    ),
  ]);

  const succeeded = results.filter((r) => r.status === "fulfilled");
  // At least 2 of 3 concurrent runs should succeed (tolerate CI flakiness).
  expect(succeeded.length).toBeGreaterThanOrEqual(2);
  for (const r of succeeded) {
    expect((r as PromiseFulfilledResult<any>).value.id).toBeTruthy();
  }
});

test("07 interrupt code execution + fake id", async () => {
  if (!ci) throw new Error("not initialized");

  const ctx = await withRetry(() =>
    ci!.codes.createContext(SupportedLanguages.PYTHON),
  );

  let initId: string | null = null;
  let runTask: Promise<unknown> | null = null;
  const initReceived = new Promise<void>((resolve) => {
    const handlers: ExecutionHandlers = {
      onInit: (i) => {
        initId = i.id;
        assertRecentTimestampMs(i.timestamp);
        resolve();
      },
    };

    runTask = ci!.codes.run(
      "import time\nfor i in range(100):\n  print(i)\n  time.sleep(0.2)",
      { context: ctx, handlers },
    );
  });

  await initReceived;
  if (!initId) throw new Error("missing init id");
  await ci!.codes.interrupt(initId);

  // Important: always await/catch the execution task to avoid Vitest reporting
  // unhandled rejections when the server closes the streaming connection.
  if (runTask) {
    try {
      await runTask;
    } catch {
      // Expected in some environments: interrupt may terminate the stream abruptly.
    }
  }

  await expect(ci!.codes.interrupt(`fake-${Date.now()}`)).rejects.toBeTruthy();
});


================================================
FILE: tests/javascript/tests/test_sandbox_e2e.test.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { afterAll, beforeAll, expect, test } from "vitest";

import {
  ConnectionConfig,
  SandboxApiException,
  Sandbox,
  DEFAULT_EXECD_PORT,
  DEFAULT_EGRESS_PORT,
  SandboxManager,
  type ExecutionHandlers,
  type ExecutionComplete,
  type ExecutionError,
  type ExecutionInit,
  type ExecutionResult,
  type OutputMessage,
} from "@alibaba-group/opensandbox";

import {
  TEST_API_KEY,
  TEST_DOMAIN,
  TEST_PROTOCOL,
  assertEndpointHasPort,
  assertRecentTimestampMs,
  createConnectionConfig,
  getSandboxImage,
} from "./base_e2e.ts";

let sandbox: Sandbox | null = null;

beforeAll(async () => {
  const connectionConfig = createConnectionConfig();

  sandbox = await Sandbox.create({
    connectionConfig,
    image: getSandboxImage(),
    timeoutSeconds: 20 * 60,
    readyTimeoutSeconds: 60,
    metadata: { tag: "e2e-test" },
    entrypoint: ["tail", "-f", "/dev/null"],
    env: {
      E2E_TEST: "true",
      GO_VERSION: "1.25",
      JAVA_VERSION: "21",
      NODE_VERSION: "22",
      PYTHON_VERSION: "3.12",
    },
    healthCheckPollingInterval: 200,
  });
}, 5 * 60_000);

afterAll(async () => {
  if (!sandbox) return;
  try {
    // keep teardown best-effort
    await sandbox.kill();
  } catch {
    // ignore
  }
}, 5 * 60_000);

test("01 sandbox lifecycle, health, endpoint, metrics, renew, connect", async () => {
  if (!sandbox) throw new Error("sandbox not created");

  expect(typeof sandbox.id).toBe("string");
  expect(await sandbox.isHealthy()).toBe(true);

  await new Promise((resolve) => setTimeout(resolve, 5000));
  const info = await sandbox.getInfo();
  expect(info.id).toBe(sandbox.id);
  expect(info.status.state).toBe("Running");
  expect(info.entrypoint).toEqual(["tail", "-f", "/dev/null"]);
  expect(info.metadata?.tag).toBe("e2e-test");

  const ep = await sandbox.getEndpoint(DEFAULT_EXECD_PORT);
  expect(ep).toBeTruthy();
  expect(typeof ep.endpoint).toBe("string");
  assertEndpointHasPort(ep.endpoint, DEFAULT_EXECD_PORT);

  const metrics = await sandbox.getMetrics();
  expect(metrics.cpuCount).toBeGreaterThan(0);
  expect(metrics.cpuUsedPercentage).toBeGreaterThanOrEqual(0);
  expect(metrics.cpuUsedPercentage).toBeLessThanOrEqual(100);
  expect(metrics.memoryTotalMiB).toBeGreaterThan(0);
  expect(metrics.memoryUsedMiB).toBeGreaterThanOrEqual(0);
  expect(metrics.memoryUsedMiB).toBeLessThanOrEqual(metrics.memoryTotalMiB);
  assertRecentTimestampMs(metrics.timestamp, 120_000);

  const renewResp = await sandbox.renew(20 * 60);
  expect(renewResp.expiresAt).toBeTruthy();
  expect(renewResp.expiresAt).toBeInstanceOf(Date);

  const connectionConfig = sandbox.connectionConfig;
  const sandbox2 = await Sandbox.connect({
    sandboxId: sandbox.id,
    connectionConfig,
  });
  try {
    expect(sandbox2.id).toBe(sandbox.id);
    expect(await sandbox2.isHealthy()).toBe(true);
    const r = await sandbox2.commands.run("echo connect-ok");
    expect(r.error).toBeUndefined();
    expect(r.logs.stdout[0]?.text).toBe("connect-ok");
  } finally {
    // no local resources to close
  }
});

test("01b manual cleanup sandbox returns null expiresAt", async () => {
  const connectionConfig = createConnectionConfig();
  const manualSandbox = await Sandbox.create({
    connectionConfig,
    image: getSandboxImage(),
    timeoutSeconds: null,
    readyTimeoutSeconds: 60,
    metadata: { tag: "manual-e2e-test" },
    entrypoint: ["tail", "-f", "/dev/null"],
    healthCheckPollingInterval: 200,
  });

  try {
    const info = await manualSandbox.getInfo();
    expect(info.expiresAt).toBeNull();
    expect(info.metadata?.tag).toBe("manual-e2e-test");
  } finally {
    await manualSandbox.kill();
    await manualSandbox.close();
  }
});

test("01a sandbox create with networkPolicy", async () => {
  const connectionConfig = createConnectionConfig();
  const networkPolicySandbox = await Sandbox.create({
    connectionConfig,
    image: getSandboxImage(),
    timeoutSeconds: 2 * 60,
    readyTimeoutSeconds: 60,
    networkPolicy: {
      defaultAction: "deny",
      egress: [{ action: "allow", target: "pypi.org" }],
    },
  });
  await new Promise((r) => setTimeout(r, 5000));
  try {
    const initialPolicy = await networkPolicySandbox.getEgressPolicy();
    expect(initialPolicy.defaultAction).toBe("deny");
    expect(initialPolicy.egress?.some((r) => r.target === "pypi.org" && r.action === "allow")).toBe(true);

    const blocked = await networkPolicySandbox.commands.run("curl -I https://www.github.com");
    expect(blocked.error).toBeTruthy();
    const allowed = await networkPolicySandbox.commands.run("curl -I https://pypi.org");
    expect(allowed.error).toBeUndefined();

    await networkPolicySandbox.patchEgressRules([
      { action: "allow", target: "www.github.com" },
      { action: "deny", target: "pypi.org" },
    ]);
    await new Promise((r) => setTimeout(r, 2000));

    const patchedPolicy = await networkPolicySandbox.getEgressPolicy();
    expect(patchedPolicy.egress?.some((r) => r.target === "www.github.com" && r.action === "allow")).toBe(true);
    expect(patchedPolicy.egress?.some((r) => r.target === "pypi.org" && r.action === "deny")).toBe(true);

    const githubAllowed = await networkPolicySandbox.commands.run("curl -I https://www.github.com");
    expect(githubAllowed.error).toBeUndefined();
    const pypiDenied = await networkPolicySandbox.commands.run("curl -I https://pypi.org");
    expect(pypiDenied.error).toBeTruthy();
  } finally {
    try {
      await networkPolicySandbox.kill();
    } catch {
      // ignore
    }
  }
}, 3 * 60_000);

test("01aa sandbox create with networkPolicy via server proxy", async () => {
  const connectionConfig = createConnectionConfig(true);
  const networkPolicySandbox = await Sandbox.create({
    connectionConfig,
    image: getSandboxImage(),
    timeoutSeconds: 2 * 60,
    readyTimeoutSeconds: 60,
    networkPolicy: {
      defaultAction: "deny",
      egress: [{ action: "allow", target: "pypi.org" }],
    },
  });
  await new Promise((r) => setTimeout(r, 5000));
  try {
    const egressEndpoint = await networkPolicySandbox.getEndpoint(DEFAULT_EGRESS_PORT);
    expect(egressEndpoint.endpoint).toContain(
      `/sandboxes/${networkPolicySandbox.id}/proxy/${DEFAULT_EGRESS_PORT}`
    );

    const initialPolicy = await networkPolicySandbox.getEgressPolicy();
    expect(initialPolicy.defaultAction).toBe("deny");
    expect(initialPolicy.egress?.some((r) => r.target === "pypi.org" && r.action === "allow")).toBe(true);

    const blocked = await networkPolicySandbox.commands.run("curl -I https://www.github.com");
    expect(blocked.error).toBeTruthy();
    const allowed = await networkPolicySandbox.commands.run("curl -I https://pypi.org");
    expect(allowed.error).toBeUndefined();

    await networkPolicySandbox.patchEgressRules([
      { action: "allow", target: "www.github.com" },
      { action: "deny", target: "pypi.org" },
    ]);
    await new Promise((r) => setTimeout(r, 2000));

    const patchedPolicy = await networkPolicySandbox.getEgressPolicy();
    expect(patchedPolicy.egress?.some((r) => r.target === "www.github.com" && r.action === "allow")).toBe(true);
    expect(patchedPolicy.egress?.some((r) => r.target === "pypi.org" && r.action === "deny")).toBe(true);
  } finally {
    try {
      await networkPolicySandbox.kill();
    } catch {
      // ignore
    }
  }
}, 3 * 60_000);

test("01b sandbox create with host volume mount (read-write)", async () => {
  const connectionConfig = createConnectionConfig();
  const hostDir = "/tmp/opensandbox-e2e/host-volume-test";
  const containerMountPath = "/mnt/host-data";

  const volumeSandbox = await Sandbox.create({
    connectionConfig,
    image: getSandboxImage(),
    timeoutSeconds: 2 * 60,
    readyTimeoutSeconds: 60,
    volumes: [
      {
        name: "test-host-vol",
        host: { path: hostDir },
        mountPath: containerMountPath,
        readOnly: false,
      },
    ],
  });

  try {
    expect(await volumeSandbox.isHealthy()).toBe(true);

    // Step 1: Verify the host marker file is visible inside the sandbox
    const readMarker = await volumeSandbox.commands.run(
      `cat ${containerMountPath}/marker.txt`
    );
    expect(readMarker.error).toBeUndefined();
    expect(readMarker.logs.stdout).toHaveLength(1);
    expect(readMarker.logs.stdout[0]?.text).toBe("opensandbox-e2e-marker");

    // Step 2: Write a file from inside the sandbox to the mounted path
    const writeResult = await volumeSandbox.commands.run(
      `echo 'written-from-sandbox' > ${containerMountPath}/sandbox-output.txt`
    );
    expect(writeResult.error).toBeUndefined();

    // Step 3: Verify the written file is readable
    const readBack = await volumeSandbox.commands.run(
      `cat ${containerMountPath}/sandbox-output.txt`
    );
    expect(readBack.error).toBeUndefined();
    expect(readBack.logs.stdout).toHaveLength(1);
    expect(readBack.logs.stdout[0]?.text).toBe("written-from-sandbox");

    // Step 4: Verify the mount path is a proper directory
    let dirCheck = await volumeSandbox.commands.run(
      `test -d ${containerMountPath} && echo OK`
    );
    for (let attempt = 0; attempt < 3; attempt++) {
      expect(dirCheck.error).toBeUndefined();
      if (dirCheck.logs.stdout[0]?.text === "OK") break;
      await new Promise((r) => setTimeout(r, 1000));
      dirCheck = await volumeSandbox.commands.run(
        `test -d ${containerMountPath} && echo OK`
      );
    }
    expect(dirCheck.logs.stdout[0]?.text).toBe("OK");
  } finally {
    try {
      await volumeSandbox.kill();
    } catch {
      // ignore
    }
  }
}, 3 * 60_000);

test("01c sandbox create with host volume mount (read-only)", async () => {
  const connectionConfig = createConnectionConfig();
  const hostDir = "/tmp/opensandbox-e2e/host-volume-test";
  const containerMountPath = "/mnt/host-data-ro";

  const roSandbox = await Sandbox.create({
    connectionConfig,
    image: getSandboxImage(),
    timeoutSeconds: 2 * 60,
    readyTimeoutSeconds: 60,
    volumes: [
      {
        name: "test-host-vol-ro",
        host: { path: hostDir },
        mountPath: containerMountPath,
        readOnly: true,
      },
    ],
  });

  try {
    expect(await roSandbox.isHealthy()).toBe(true);

    // Step 1: Verify the host marker file is readable
    const readMarker = await roSandbox.commands.run(
      `cat ${containerMountPath}/marker.txt`
    );
    expect(readMarker.error).toBeUndefined();
    expect(readMarker.logs.stdout).toHaveLength(1);
    expect(readMarker.logs.stdout[0]?.text).toBe("opensandbox-e2e-marker");

    // Step 2: Verify writing is denied on read-only mount
    const writeResult = await roSandbox.commands.run(
      `touch ${containerMountPath}/should-fail.txt`
    );
    const statResult = await roSandbox.commands.run(
      `test ! -e ${containerMountPath}/should-fail.txt && echo OK`
    );
    const writeWasRejected =
      writeResult.error != null || writeResult.logs.stderr.length > 0;
    const fileWasNotCreated = statResult.logs.stdout[0]?.text === "OK";
    expect(writeWasRejected || fileWasNotCreated).toBe(true);
  } finally {
    try {
      await roSandbox.kill();
    } catch {
      // ignore
    }
  }
}, 3 * 60_000);

test("01d sandbox create with PVC named volume mount (read-write)", async () => {
  const connectionConfig = createConnectionConfig();
  const pvcVolumeName = "opensandbox-e2e-pvc-test";
  const containerMountPath = "/mnt/pvc-data";

  const pvcSandbox = await Sandbox.create({
    connectionConfig,
    image: getSandboxImage(),
    timeoutSeconds: 2 * 60,
    readyTimeoutSeconds: 60,
    volumes: [
      {
        name: "test-pvc-vol",
        pvc: { claimName: pvcVolumeName },
        mountPath: containerMountPath,
        readOnly: false,
      },
    ],
  });

  try {
    expect(await pvcSandbox.isHealthy()).toBe(true);

    // Step 1: Verify the marker file seeded into the named volume is readable
    const readMarker = await pvcSandbox.commands.run(
      `cat ${containerMountPath}/marker.txt`
    );
    expect(readMarker.error).toBeUndefined();
    expect(readMarker.logs.stdout).toHaveLength(1);
    expect(readMarker.logs.stdout[0]?.text).toBe("pvc-marker-data");

    // Step 2: Write a file from inside the sandbox to the named volume
    const writeResult = await pvcSandbox.commands.run(
      `echo 'written-to-pvc' > ${containerMountPath}/pvc-output.txt`
    );
    expect(writeResult.error).toBeUndefined();

    // Step 3: Verify the written file is readable
    const readBack = await pvcSandbox.commands.run(
      `cat ${containerMountPath}/pvc-output.txt`
    );
    expect(readBack.error).toBeUndefined();
    expect(readBack.logs.stdout).toHaveLength(1);
    expect(readBack.logs.stdout[0]?.text).toBe("written-to-pvc");

    // Step 4: Verify the mount path is a proper directory
    let dirCheck = await pvcSandbox.commands.run(
      `test -d ${containerMountPath} && echo OK`
    );
    for (let attempt = 0; attempt < 3; attempt++) {
      expect(dirCheck.error).toBeUndefined();
      if (dirCheck.logs.stdout[0]?.text === "OK") break;
      await new Promise((r) => setTimeout(r, 1000));
      dirCheck = await pvcSandbox.commands.run(
        `test -d ${containerMountPath} && echo OK`
      );
    }
    expect(dirCheck.logs.stdout[0]?.text).toBe("OK");
  } finally {
    try {
      await pvcSandbox.kill();
    } catch {
      // ignore
    }
  }
}, 3 * 60_000);

test("01e sandbox create with PVC named volume mount (read-only)", async () => {
  const connectionConfig = createConnectionConfig();
  const pvcVolumeName = "opensandbox-e2e-pvc-test";
  const containerMountPath = "/mnt/pvc-data-ro";

  const roSandbox = await Sandbox.create({
    connectionConfig,
    image: getSandboxImage(),
    timeoutSeconds: 2 * 60,
    readyTimeoutSeconds: 60,
    volumes: [
      {
        name: "test-pvc-vol-ro",
        pvc: { claimName: pvcVolumeName },
        mountPath: containerMountPath,
        readOnly: true,
      },
    ],
  });

  try {
    expect(await roSandbox.isHealthy()).toBe(true);

    // Step 1: Verify the marker file is readable
    const readMarker = await roSandbox.commands.run(
      `cat ${containerMountPath}/marker.txt`
    );
    expect(readMarker.error).toBeUndefined();
    expect(readMarker.logs.stdout).toHaveLength(1);
    expect(readMarker.logs.stdout[0]?.text).toBe("pvc-marker-data");

    // Step 2: Verify writing is denied on read-only mount
    const writeResult = await roSandbox.commands.run(
      `touch ${containerMountPath}/should-fail.txt`
    );
    const statResult = await roSandbox.commands.run(
      `test ! -e ${containerMountPath}/should-fail.txt && echo OK`
    );
    const writeWasRejected =
      writeResult.error != null || writeResult.logs.stderr.length > 0;
    const fileWasNotCreated = statResult.logs.stdout[0]?.text === "OK";
    expect(writeWasRejected || fileWasNotCreated).toBe(true);
  } finally {
    try {
      await roSandbox.kill();
    } catch {
      // ignore
    }
  }
}, 3 * 60_000);

test("01f sandbox create with PVC named volume subPath mount", async () => {
  const connectionConfig = createConnectionConfig();
  const pvcVolumeName = "opensandbox-e2e-pvc-test";
  const containerMountPath = "/mnt/train";

  const subpathSandbox = await Sandbox.create({
    connectionConfig,
    image: getSandboxImage(),
    timeoutSeconds: 2 * 60,
    readyTimeoutSeconds: 60,
    volumes: [
      {
        name: "test-pvc-subpath",
        pvc: { claimName: pvcVolumeName },
        mountPath: containerMountPath,
        readOnly: false,
        subPath: "datasets/train",
      },
    ],
  });

  try {
    expect(await subpathSandbox.isHealthy()).toBe(true);

    // Step 1: Verify the subpath marker file is readable
    const readMarker = await subpathSandbox.commands.run(
      `cat ${containerMountPath}/marker.txt`
    );
    expect(readMarker.error).toBeUndefined();
    expect(readMarker.logs.stdout).toHaveLength(1);
    expect(readMarker.logs.stdout[0]?.text).toBe("pvc-subpath-marker");

    // Step 2: Verify only subPath contents are visible (not the full volume)
    const lsResult = await subpathSandbox.commands.run(
      `ls ${containerMountPath}/`
    );
    expect(lsResult.error).toBeUndefined();
    const lsOutput = lsResult.logs.stdout.map((m) => m.text).join("\n");
    expect(lsOutput).toContain("marker.txt");
    expect(lsOutput).not.toContain("datasets");

    // Step 3: Write a file and verify (retry read-back for transient SSE drops)
    const writeResult = await subpathSandbox.commands.run(
      `echo 'subpath-write-test' > ${containerMountPath}/output.txt`
    );
    expect(writeResult.error).toBeUndefined();

    let readBack: Awaited<ReturnType<typeof subpathSandbox.commands.run>> | undefined;
    for (let attempt = 0; attempt < 3; attempt++) {
      readBack = await subpathSandbox.commands.run(
        `cat ${containerMountPath}/output.txt`
      );
      if (readBack.logs.stdout.length > 0) break;
      await new Promise<void>((resolve) => setTimeout(resolve, 1000));
    }
    expect(readBack!.error).toBeUndefined();
    expect(readBack!.logs.stdout).toHaveLength(1);
    expect(readBack!.logs.stdout[0]?.text).toBe("subpath-write-test");
  } finally {
    try {
      await subpathSandbox.kill();
    } catch {
      // ignore
    }
  }
}, 3 * 60_000);

test("01g sandbox manager: list + get", async () => {
  if (!sandbox) throw new Error("sandbox not created");

  const manager = SandboxManager.create({ connectionConfig: sandbox.connectionConfig });

  const list = await manager.listSandboxInfos({
    states: ["Running"],
    metadata: { tag: "e2e-test" },
    pageSize: 50,
  });
  expect(Array.isArray(list.items)).toBe(true);
  expect(list.items.some((s) => s.id === sandbox!.id)).toBe(true);

  const info = await manager.getSandboxInfo(sandbox.id);
  expect(info.id).toBe(sandbox.id);
  expect(info.metadata?.tag).toBe("e2e-test");
});

test("02 command execution: success, cwd, background, failure", async () => {
  if (!sandbox) throw new Error("sandbox not created");

  const stdoutMessages: OutputMessage[] = [];
  const stderrMessages: OutputMessage[] = [];
  const results: ExecutionResult[] = [];
  const initEvents: ExecutionInit[] = [];
  const completedEvents: ExecutionComplete[] = [];
  const errors: ExecutionError[] = [];

  const handlers: ExecutionHandlers = {
    onStdout: (m) => {
      stdoutMessages.push(m);
    },
    onStderr: (m) => {
      stderrMessages.push(m);
    },
    onResult: (r) => {
      results.push(r);
    },
    onInit: (i) => {
      initEvents.push(i);
    },
    onExecutionComplete: (c) => {
      completedEvents.push(c);
    },
    onError: (e) => {
      errors.push(e);
    },
  };

  const ok = await sandbox.commands.run(
    "echo 'Hello OpenSandbox E2E'",
    undefined,
    handlers
  );
  expect(ok.id).toBeTruthy();
  expect(ok.error).toBeUndefined();
  expect(ok.logs.stdout).toHaveLength(1);
  expect(ok.logs.stdout[0]?.text).toBe("Hello OpenSandbox E2E");
  assertRecentTimestampMs(ok.logs.stdout[0]!.timestamp);

  expect(initEvents).toHaveLength(1);
  expect(completedEvents).toHaveLength(1);
  expect(errors).toHaveLength(0);

  const pwd = await sandbox.commands.run("pwd", { workingDirectory: "/tmp" });
  expect(pwd.error).toBeUndefined();
  expect(pwd.logs.stdout[0]?.text).toBe("/tmp");

  const start = Date.now();
  await sandbox.commands.run("sleep 30", { background: true });
  expect(Date.now() - start).toBeLessThan(10_000);

  // failure contract: error exists; completion should be absent
  stdoutMessages.length = 0;
  stderrMessages.length = 0;
  results.length = 0;
  initEvents.length = 0;
  completedEvents.length = 0;
  errors.length = 0;

  const fail = await sandbox.commands.run(
    "nonexistent-command-that-does-not-exist",
    undefined,
    handlers
  );
  expect(fail.id).toBeTruthy();
  expect(fail.error).toBeTruthy();
  expect(fail.error?.name).toBe("CommandExecError");
  expect(fail.logs.stderr.length).toBeGreaterThan(0);
  expect(
    fail.logs.stderr.some((m) =>
      m.text.includes("nonexistent-command-that-does-not-exist")
    )
  ).toBe(true);
  expect(completedEvents.length).toBe(0);
});

test("02a command status + background logs", async () => {
  if (!sandbox) throw new Error("sandbox not created");

  const exec = await sandbox.commands.run(
    "sh -c 'echo log-line-1; echo log-line-2; sleep 2'",
    { background: true }
  );
  expect(exec.id).toBeTruthy();

  const commandId = exec.id!;
  const status = await sandbox.commands.getCommandStatus(commandId);
  expect(status.id).toBe(commandId);
  expect(typeof status.running).toBe("boolean");

  let logsText = "";
  let cursor: number | undefined = undefined;
  for (let i = 0; i < 20; i++) {
    const logs = await sandbox.commands.getBackgroundCommandLogs(
      commandId,
      cursor
    );
    logsText += logs.content;
    cursor = logs.cursor ?? cursor;
    if (logsText.includes("log-line-2")) break;
    await new Promise<void>((resolve) => setTimeout(resolve, 1000));
  }

  expect(logsText.includes("log-line-1")).toBe(true);
  expect(logsText.includes("log-line-2")).toBe(true);
});

test("02b command env injection", async () => {
  if (!sandbox) throw new Error("sandbox not created");

  const envKey = "OPEN_SANDBOX_E2E_CMD_ENV";
  const envValue = `env-ok-${Date.now()}`;
  const probeCommand = `sh -c 'if [ -z "\${${envKey}:-}" ]; then echo "__EMPTY__"; else echo "\${${envKey}}"; fi'`;

  const baseline = await sandbox.commands.run(probeCommand);
  expect(baseline.error).toBeUndefined();
  const baselineOutput = baseline.logs.stdout.map((m) => m.text).join("\n").trim();
  expect(baselineOutput).toBe("__EMPTY__");

  const injected = await sandbox.commands.run(probeCommand, {
    envs: {
      [envKey]: envValue,
      OPEN_SANDBOX_E2E_SECOND_ENV: "second-ok",
    },
  });
  expect(injected.error).toBeUndefined();
  const injectedOutput = injected.logs.stdout.map((m) => m.text).join("\n").trim();
  expect(injectedOutput).toBe(envValue);
});

test("03 filesystem operations: CRUD + replace/move/delete + range + stream", async () => {
  if (!sandbox) throw new Error("sandbox not created");

  const ts = Date.now();
  const dir1 = `/tmp/fs_test1_${ts}`;
  const dir2 = `/tmp/fs_test2_${ts}`;

  await sandbox.files.createDirectories([
    { path: dir1, mode: 755 },
    { path: dir2, mode: 644 },
  ]);

  const infoMap = await sandbox.files.getFileInfo([dir1, dir2]);
  expect(infoMap[dir1]?.path).toBe(dir1);
  expect(infoMap[dir2]?.path).toBe(dir2);
  expect(infoMap[dir1]?.mode).toBe(755);
  expect(infoMap[dir2]?.mode).toBe(644);

  const ls = await sandbox.commands.run("ls -la | grep fs_test", {
    workingDirectory: "/tmp",
  });
  expect(ls.error).toBeUndefined();
  expect(ls.logs.stdout).toHaveLength(2);

  const file1 = `${dir1}/test_file1.txt`;
  const file2 = `${dir1}/test_file2.txt`;
  const file3 = `${dir1}/test_file3.txt`;
  const content = "Hello Filesystem!\nLine 2 with special chars: åäö\nLine 3";
  const bytes = new TextEncoder().encode(content);

  // Align with Python/Kotlin semantics but keep E2E portable across different base images:
  // prefer "nogroup"/"nobody" if present, otherwise fall back to "root".
  const ownerPick = await sandbox.commands.run(
    `id -u nobody >/dev/null 2>&1 && echo nobody || echo root`,
    { workingDirectory: "/tmp" }
  );
  expect(ownerPick.error).toBeUndefined();
  const ownerName = (ownerPick.logs.stdout[0]?.text || "root").trim();

  const groupPick = await sandbox.commands.run(
    `getent group nogroup >/dev/null 2>&1 && echo nogroup || echo root`,
    { workingDirectory: "/tmp" }
  );
  expect(groupPick.error).toBeUndefined();
  const groupName = (groupPick.logs.stdout[0]?.text || "root").trim();

  await sandbox.files.writeFiles([
    { path: file1, data: content, mode: 644 },
    { path: file2, data: bytes, mode: 755 },
    { path: file3, data: bytes, mode: 755, owner: ownerName, group: groupName },
  ]);

  const searched = await sandbox.files.search({ path: dir1, pattern: "*" });
  const searchedPaths = new Set(searched.map((f) => f.path));
  expect(searchedPaths.has(file1)).toBe(true);
  expect(searchedPaths.has(file2)).toBe(true);
  expect(searchedPaths.has(file3)).toBe(true);

  const read1 = await sandbox.files.readFile(file1, { encoding: "utf-8" });
  const read1Partial = await sandbox.files.readFile(file1, {
    encoding: "utf-8",
    range: "bytes=0-9",
  });
  const read2 = await sandbox.files.readBytes(file2);
  let read3 = new Uint8Array();
  for await (const chunk of sandbox.files.readBytesStream(file3)) {
    const merged = new Uint8Array(read3.length + chunk.length);
    merged.set(read3, 0);
    merged.set(chunk, read3.length);
    read3 = merged;
  }

  expect(read1).toBe(content);
  expect(new TextDecoder("utf-8").decode(read2)).toBe(content);
  expect(new TextDecoder("utf-8").decode(read3)).toBe(content);
  expect(read1Partial).toBe(content.slice(0, 10));

  await sandbox.files.setPermissions([
    { path: file1, mode: 755, owner: ownerName, group: groupName },
    { path: file2, mode: 600, owner: ownerName, group: groupName },
  ]);
  const perms = await sandbox.files.getFileInfo([file1, file2]);
  expect(perms[file1]?.mode).toBe(755);
  expect(perms[file1]?.owner).toBe(ownerName);
  expect(perms[file1]?.group).toBe(groupName);
  expect(perms[file2]?.mode).toBe(600);

  const updated1 = `${content}\nAppended line to file1`;
  const updated2 = `${content}\nAppended line to file2`;
  await new Promise((r) => setTimeout(r, 50));
  await sandbox.files.writeFiles([
    { path: file1, data: updated1, mode: 644 },
    { path: file2, data: updated2, mode: 755 },
  ]);
  expect(await sandbox.files.readFile(file1)).toBe(updated1);
  expect(await sandbox.files.readFile(file2)).toBe(updated2);

  await new Promise((r) => setTimeout(r, 50));
  await sandbox.files.replaceContents([
    {
      path: file1,
      oldContent: "Appended line to file1",
      newContent: "Replaced line in file1",
    },
  ]);
  const replaced = await sandbox.files.readFile(file1);
  expect(replaced.includes("Replaced line in file1")).toBe(true);
  expect(replaced.includes("Appended line to file1")).toBe(false);

  const movedPath = `${dir2}/moved_file3.txt`;
  await sandbox.files.moveFiles([{ src: file3, dest: movedPath }]);
  expect(await sandbox.files.readFile(movedPath)).toBe(content);

  await sandbox.files.deleteFiles([file2]);
  await expect(sandbox.files.readFile(file2)).rejects.toBeTruthy();

  await sandbox.files.deleteDirectories([dir1, dir2]);
  let verify = await sandbox.commands.run(
    `test ! -d ${dir1} && test ! -d ${dir2} && echo OK`,
    { workingDirectory: "/tmp" }
  );
  for (let attempt = 0; attempt < 3; attempt++) {
    if (!verify.error && verify.logs.stdout[0]?.text === "OK") break;
    await new Promise((r) => setTimeout(r, 1000));
    verify = await sandbox.commands.run(
      `test ! -d ${dir1} && test ! -d ${dir2} && echo OK`,
      { workingDirectory: "/tmp" }
    );
  }
  expect(verify.error).toBeUndefined();
  expect(verify.logs.stdout[0]?.text).toBe("OK");
});

test("04 interrupt command", async () => {
  if (!sandbox) throw new Error("sandbox not created");

  const initEvents: ExecutionInit[] = [];
  const completed: ExecutionComplete[] = [];
  const errors: ExecutionError[] = [];
  let initResolve: ((v: ExecutionInit) => void) | null = null;
  const initPromise = new Promise<ExecutionInit>((r) => (initResolve = r));

  const handlers: ExecutionHandlers = {
    onInit: (i) => {
      initEvents.push(i);
      initResolve?.(i);
    },
    onExecutionComplete: (c) => {
      completed.push(c);
    },
    onError: (e) => {
      errors.push(e);
    },
  };

  const task = sandbox.commands.run("sleep 30", undefined, handlers);
  const init = await initPromise;
  expect(init.id).toBeTruthy();
  assertRecentTimestampMs(init.timestamp);

  await sandbox.commands.interrupt(init.id);
  let exec = null;
  try {
    exec = await Promise.race([
      task,
      new Promise<never>((_, reject) =>
        setTimeout(() => reject(new Error("interrupt wait timeout")), 60_000),
      ),
    ]);
  } catch {
    exec = null;
  }

  if (exec) {
    expect(exec.id).toBe(init.id);
  }

  let followUp = null;
  try {
    followUp = await sandbox.commands.run("echo interrupt-ok");
  } catch {
    followUp = null;
  }

  expect(
    completed.length > 0 ||
      errors.length > 0 ||
      (followUp?.error === undefined &&
        followUp?.logs.stdout[0]?.text === "interrupt-ok"),
  ).toBe(true);
});

test("05 sandbox pause + resume", async () => {
  if (!sandbox) throw new Error("sandbox not created");

  await new Promise((r) => setTimeout(r, 20_000));
  await sandbox.pause();

  let state = "Pausing";
  for (let i = 0; i < 300; i++) {
    await new Promise((r) => setTimeout(r, 1000));
    const info = await sandbox.getInfo();
    state = info.status.state;
    if (state !== "Pausing") break;
  }
  expect(state).toBe("Paused");

  // pause => unhealthy
  let healthy = true;
  for (let i = 0; i < 10; i++) {
    healthy = await sandbox.isHealthy();
    if (!healthy) break;
    await new Promise((r) => setTimeout(r, 500));
  }
  expect(healthy).toBe(false);

  sandbox = await sandbox.resume({
    readyTimeoutSeconds: 60,
    healthCheckPollingInterval: 200,
  });

  let ok = false;
  for (let i = 0; i < 60; i++) {
    await new Promise((r) => setTimeout(r, 1000));
    ok = await sandbox.isHealthy();
    if (ok) break;
  }
  expect(ok).toBe(true);

  const echo = await sandbox.commands.run("echo resume-ok");
  expect(echo.error).toBeUndefined();
  expect(echo.logs.stdout[0]?.text).toBe("resume-ok");
});

test("06 x-request-id passthrough on server error", async () => {
  const requestId = `e2e-js-server-${Date.now()}`;
  const missingSandboxId = `missing-${requestId}`;
  const connectionConfig = new ConnectionConfig({
    domain: TEST_DOMAIN,
    protocol: TEST_PROTOCOL === "https" ? "https" : "http",
    apiKey: TEST_API_KEY,
    requestTimeoutSeconds: 180,
    headers: { "X-Request-ID": requestId },
  });

  try {
    const connected = await Sandbox.connect({
      sandboxId: missingSandboxId,
      connectionConfig,
    });
    await connected.getInfo();
    throw new Error("expected server call to fail");
  } catch (err) {
    expect(err).toBeInstanceOf(SandboxApiException);
    expect((err as SandboxApiException).requestId).toBe(requestId);
  }
});


================================================
FILE: tests/javascript/tests/test_sandbox_manager_e2e.test.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { afterAll, beforeAll, expect, test } from "vitest";

import {
  Sandbox,
  SandboxManager,
} from "@alibaba-group/opensandbox";

import {
  createConnectionConfig,
  getSandboxImage,
} from "./base_e2e.ts";

let manager: SandboxManager | null = null;
let tag: string | null = null;
let s1: Sandbox | null = null;
let s2: Sandbox | null = null;
let s3: Sandbox | null = null;

function sleep(ms: number): Promise<void> {
  return new Promise((r) => setTimeout(r, ms));
}

async function waitForState(
  sandboxId: string,
  expectedState: string,
  timeoutMs = 180_000
): Promise<void> {
  if (!manager) throw new Error("sandbox manager not initialized");
  const deadline = Date.now() + timeoutMs;
  let lastState = "unknown";

  while (Date.now() < deadline) {
    const info = await manager.getSandboxInfo(sandboxId);
    lastState = info.status.state;
    if (lastState === expectedState) return;
    await sleep(1000);
  }

  throw new Error(
    `Timed out waiting for state=${expectedState}, lastState=${lastState}`
  );
}

beforeAll(async () => {
  const connectionConfig = createConnectionConfig();
  manager = SandboxManager.create({ connectionConfig });
  tag = `e2e-sandbox-manager-${Math.random().toString(16).slice(2, 10)}`;

  const common = {
    connectionConfig,
    image: getSandboxImage(),
    timeoutSeconds: 5 * 60,
    readyTimeoutSeconds: 60,
    healthCheckPollingInterval: 500,
    resource: { cpu: "1", memory: "2Gi" },
  };

  s1 = await Sandbox.create({
    ...common,
    metadata: { tag, team: "t1", env: "prod" },
    env: { E2E_TEST: "true", CASE: "mgr-s1" },
  });
  s2 = await Sandbox.create({
    ...common,
    metadata: { tag, team: "t1", env: "dev" },
    env: { E2E_TEST: "true", CASE: "mgr-s2" },
  });
  s3 = await Sandbox.create({
    ...common,
    metadata: { tag, env: "prod" },
    env: { E2E_TEST: "true", CASE: "mgr-s3" },
  });

  expect(await s1.isHealthy()).toBe(true);
  expect(await s2.isHealthy()).toBe(true);
  expect(await s3.isHealthy()).toBe(true);

  await manager.pauseSandbox(s3.id);
  await waitForState(s3.id, "Paused");
}, 10 * 60_000);

afterAll(async () => {
  for (const sbx of [s1, s2, s3]) {
    if (!sbx) continue;
    try {
      await sbx.kill();
    } catch {
      // ignore
    }
  }
}, 5 * 60_000);

test("01 states filter uses OR semantics", async () => {
  if (!manager || !tag || !s1 || !s2 || !s3) {
    throw new Error("sandbox manager not initialized");
  }

  const allStates = await manager.listSandboxInfos({
    states: ["Running", "Paused"],
    metadata: { tag },
    pageSize: 50,
  });
  const allIds = new Set(allStates.items.map((info) => info.id));
  expect(allIds.has(s1.id)).toBe(true);
  expect(allIds.has(s2.id)).toBe(true);
  expect(allIds.has(s3.id)).toBe(true);

  const pausedOnly = await manager.listSandboxInfos({
    states: ["Paused"],
    metadata: { tag },
    pageSize: 50,
  });
  const pausedIds = new Set(pausedOnly.items.map((info) => info.id));
  expect(pausedIds.has(s3.id)).toBe(true);
  expect(pausedIds.has(s1.id)).toBe(false);
  expect(pausedIds.has(s2.id)).toBe(false);

  const runningOnly = await manager.listSandboxInfos({
    states: ["Running"],
    metadata: { tag },
    pageSize: 50,
  });
  const runningIds = new Set(runningOnly.items.map((info) => info.id));
  expect(runningIds.has(s1.id)).toBe(true);
  expect(runningIds.has(s2.id)).toBe(true);
  expect(runningIds.has(s3.id)).toBe(false);
}, 2 * 60_000);

test("02 metadata filter uses AND semantics", async () => {
  if (!manager || !tag || !s1 || !s2 || !s3) {
    throw new Error("sandbox manager not initialized");
  }

  const tagAndTeam = await manager.listSandboxInfos({
    metadata: { tag, team: "t1" },
    pageSize: 50,
  });
  const tagAndTeamIds = new Set(tagAndTeam.items.map((info) => info.id));
  expect(tagAndTeamIds.has(s1.id)).toBe(true);
  expect(tagAndTeamIds.has(s2.id)).toBe(true);
  expect(tagAndTeamIds.has(s3.id)).toBe(false);

  const tagTeamEnv = await manager.listSandboxInfos({
    metadata: { tag, team: "t1", env: "prod" },
    pageSize: 50,
  });
  const tagTeamEnvIds = new Set(tagTeamEnv.items.map((info) => info.id));
  expect(tagTeamEnvIds.has(s1.id)).toBe(true);
  expect(tagTeamEnvIds.has(s2.id)).toBe(false);
  expect(tagTeamEnvIds.has(s3.id)).toBe(false);

  const tagEnv = await manager.listSandboxInfos({
    metadata: { tag, env: "prod" },
    pageSize: 50,
  });
  const tagEnvIds = new Set(tagEnv.items.map((info) => info.id));
  expect(tagEnvIds.has(s1.id)).toBe(true);
  expect(tagEnvIds.has(s3.id)).toBe(true);
  expect(tagEnvIds.has(s2.id)).toBe(false);

  const noneMatch = await manager.listSandboxInfos({
    metadata: { tag, team: "t2" },
    pageSize: 50,
  });
  const noneMatchIds = new Set(noneMatch.items.map((info) => info.id));
  expect(noneMatchIds.has(s1.id)).toBe(false);
  expect(noneMatchIds.has(s2.id)).toBe(false);
  expect(noneMatchIds.has(s3.id)).toBe(false);
}, 2 * 60_000);

test("03 invalid operations reject", async () => {
  if (!manager) throw new Error("sandbox manager not initialized");
  const nonExistentId = `non-existent-${Date.now()}`;

  await expect(manager.getSandboxInfo(nonExistentId)).rejects.toBeTruthy();
  await expect(manager.pauseSandbox(nonExistentId)).rejects.toBeTruthy();
  await expect(manager.resumeSandbox(nonExistentId)).rejects.toBeTruthy();
  await expect(manager.killSandbox(nonExistentId)).rejects.toBeTruthy();
  await expect(manager.renewSandbox(nonExistentId, 5 * 60)).rejects.toBeTruthy();
}, 60_000);


================================================
FILE: tests/javascript/tests/test_wait_until_ready_diagnostics.test.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { expect, test } from "vitest";

import { Sandbox, SandboxReadyTimeoutException } from "@alibaba-group/opensandbox";

test("waitUntilReady timeout includes last health-check error and connection context", async () => {
  const fakeSandbox = {
    connectionConfig: {
      domain: "localhost:8080",
      useServerProxy: false,
    },
    health: {
      ping: async () => {
        throw new Error("connect ECONNREFUSED 127.0.0.1:8080");
      },
    },
  } as unknown as Sandbox;

  let thrown: unknown;
  try {
    await Sandbox.prototype.waitUntilReady.call(fakeSandbox, {
      readyTimeoutSeconds: 0.01,
      pollingIntervalMillis: 1,
    });
  } catch (err) {
    thrown = err;
  }

  expect(thrown).toBeInstanceOf(SandboxReadyTimeoutException);
  const message = (thrown as Error).message;
  expect(message).toContain("Sandbox health check timed out");
  expect(message).toContain("Last health check error");
  expect(message).toContain("domain=localhost:8080");
  expect(message).toContain("useServerProxy=false");
  expect(message).toContain("useServerProxy=true");
});

test("waitUntilReady timeout includes false-continuously hint when ping returns false", async () => {
  let pingCalls = 0;
  const fakeSandbox = {
    connectionConfig: {
      domain: "localhost:8080",
      useServerProxy: true,
    },
    health: {
      ping: async () => {
        pingCalls++;
        return false;
      },
    },
  } as unknown as Sandbox;

  let thrown: unknown;
  try {
    await Sandbox.prototype.waitUntilReady.call(fakeSandbox, {
      readyTimeoutSeconds: 0.01,
      pollingIntervalMillis: 1,
    });
  } catch (err) {
    thrown = err;
  }

  expect(thrown).toBeInstanceOf(SandboxReadyTimeoutException);
  expect((thrown as Error).message).toContain("Health check returned false continuously.");
  expect(pingCalls).toBeGreaterThan(0);
});


================================================
FILE: tests/javascript/tsconfig.json
================================================
{
  "compilerOptions": {
    "target": "ES2022",
    "module": "ES2022",
    "moduleResolution": "Bundler",
    "lib": ["ES2022", "DOM"],
    "strict": true,
    "skipLibCheck": true,
    "noEmit": true,
    "allowImportingTsExtensions": true
  },
  "include": ["tests"]
}


================================================
FILE: tests/javascript/vitest.config.ts
================================================
// Copyright 2026 Alibaba Group Holding Ltd.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { defineConfig } from "vitest/config";

export default defineConfig({
  test: {
    environment: "node",
    // These E2E tests can be slow depending on the provider.
    testTimeout: 15 * 60_000,
    hookTimeout: 15 * 60_000,
    // Keep ordering deterministic (mirrors ordered Python/Java E2E suites).
    sequence: {
      concurrent: false,
    },
  },
});

================================================
FILE: tests/python/Makefile
================================================
.PHONY: sync sync-dev test test-sandbox test-manager test-code lint fmt

sync:
	uv sync

sync-dev:
	uv sync --group dev

test:
	uv run pytest

test-sandbox:
	uv run pytest tests/test_sandbox_e2e.py

test-manager:
	uv run pytest tests/test_sandbox_manager_e2e.py

test-code:
	uv run pytest tests/test_code_interpreter_e2e.py

lint:
	uv run ruff check tests

fmt:
	uv run ruff format tests


================================================
FILE: tests/python/README.md
================================================
## OpenSandbox Python SDK – E2E Tests (uv)

This folder is a standalone e2e test project managed by **uv**.

### Setup

```bash
cd tests/e2e/python
uv sync
```

### Run tests

```bash
uv run pytest
```

Run a specific suite:

```bash
uv run pytest tests/test_sandbox_e2e.py
```

### Notes about asyncio + shared Sandbox

These tests may reuse a single Sandbox instance across multiple test cases for speed.
To avoid `RuntimeError: Event loop is closed`, pytest-asyncio is configured to use a
**session-scoped event loop** in `pyproject.toml`.

### Handy shortcuts

```bash
make sync
make test
make test-sandbox
make lint
make fmt
```


================================================
FILE: tests/python/pyproject.toml
================================================
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[project]
name = "opensandbox-e2e-tests"
version = "0.1.0"
description = "E2E tests for OpenSandbox Python SDK"
readme = "README.md"
requires-python = ">=3.10"
license = { text = "MIT" }
authors = [
    { name = "OpenSandbox Team", email = "ninan.nn@alibaba-inc.com" }
]
dependencies = [
    "pytest>=7.0.0",
    "pytest-asyncio>=0.21.0",
    "pytest-timeout>=2.1.0",
    "pytest-order>=1.2.0",
    "pydantic>=2.0.0",
    "opensandbox",
    "opensandbox-code-interpreter",
]

[dependency-groups]
dev = [
    "ruff>=0.14.8",
    "pyright>=1.1.407",
]

[tool.uv]
# This is a test runner project (no importable package); don't try to build/install it.
package = false

[tool.uv.sources]
opensandbox = { path = "../../sdks/sandbox/python", editable = true }
opensandbox-code-interpreter = { path = "../../sdks/code-interpreter/python", editable = true }

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = [
    "-v",
    "-s",
    "-x",
    "--tb=short",
    "--strict-markers",
    "--asyncio-mode=auto",
    "--order-scope=class",
]
markers = [
    "e2e: marks tests as end-to-end tests",
    "slow: marks tests as slow running",
    "order: run tests in specific order",
]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "session"
asyncio_default_test_loop_scope = "session"
timeout = 300
log_cli = true
log_cli_level = "INFO"
log_cli_format = "%(asctime)s [%(levelname)s] %(name)s - %(message)s"
log_cli_date_format = "%Y-%m-%d %H:%M:%S"

[tool.ruff.lint]
select = [
    "E",  # pycodestyle errors
    "W",  # pycodestyle warnings
    "F",  # pyflakes
    "I",  # isort
    "B",  # flake8-bugbear
    "C4", # flake8-comprehensions
    "UP", # pyupgrade
]
ignore = [
    "E501", # line too long, handled by formatter
    "B008", # do not perform function calls in argument defaults
    "C901", # too complex
    "B017", # pytest.raises(Exception) is too broad
]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]


================================================
FILE: tests/python/tests/__init__.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


================================================
FILE: tests/python/tests/base_e2e_test.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Base class for E2E tests providing common setup and configuration.
"""

import os
from datetime import timedelta

import httpx
from opensandbox.config import ConnectionConfig, ConnectionConfigSync

DEFAULT_DOMAIN = "localhost:8080"
DEFAULT_PROTOCOL = "http"
DEFAULT_API_KEY = "e2e-test"
DEFAULT_IMAGE = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/code-interpreter:latest"

TEST_DOMAIN = os.getenv("OPENSANDBOX_TEST_DOMAIN", DEFAULT_DOMAIN)
TEST_PROTOCOL = os.getenv("OPENSANDBOX_TEST_PROTOCOL", DEFAULT_PROTOCOL)
TEST_API_KEY = os.getenv("OPENSANDBOX_TEST_API_KEY", DEFAULT_API_KEY)
TEST_IMAGE = os.getenv("OPENSANDBOX_SANDBOX_DEFAULT_IMAGE", DEFAULT_IMAGE)


def get_sandbox_image() -> str:
    """Get the default sandbox image for E2E tests."""
    return TEST_IMAGE


def create_connection_config() -> ConnectionConfig:
    """Create async ConnectionConfig for E2E tests."""
    return ConnectionConfig(
        domain=TEST_DOMAIN,
        api_key=TEST_API_KEY,
        request_timeout=timedelta(minutes=3),
        protocol=TEST_PROTOCOL,
    )


def create_connection_config_server_proxy() -> ConnectionConfig:
    """Create async ConnectionConfig for E2E tests using server-proxied endpoints."""
    return ConnectionConfig(
        domain=TEST_DOMAIN,
        api_key=TEST_API_KEY,
        request_timeout=timedelta(minutes=3),
        protocol=TEST_PROTOCOL,
        use_server_proxy=True,
    )


def create_connection_config_sync() -> ConnectionConfigSync:
    """Create sync ConnectionConfig for E2E tests."""
    return ConnectionConfigSync(
        domain=TEST_DOMAIN,
        api_key=TEST_API_KEY,
        request_timeout=timedelta(minutes=3),
        transport=httpx.HTTPTransport(
            limits=httpx.Limits(
                max_connections=100,
                max_keepalive_connections=20,
                keepalive_expiry=15,
            )
        ),
        protocol=TEST_PROTOCOL,
    )


def create_connection_config_sync_server_proxy() -> ConnectionConfigSync:
    """Create sync ConnectionConfig for E2E tests using server-proxied endpoints."""
    return ConnectionConfigSync(
        domain=TEST_DOMAIN,
        api_key=TEST_API_KEY,
        request_timeout=timedelta(minutes=3),
        transport=httpx.HTTPTransport(
            limits=httpx.Limits(
                max_connections=100,
                max_keepalive_connections=20,
                keepalive_expiry=15,
            )
        ),
        protocol=TEST_PROTOCOL,
        use_server_proxy=True,
    )


================================================
FILE: tests/python/tests/test_code_interpreter_e2e.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Comprehensive E2E tests for CodeInterpreter functionality.

Tests code execution capabilities including:
- Multi-language code execution (Java, Python, Go, TypeScript)
- Session state management and variable persistence
- Context isolation between different execution contexts
- Error handling and recovery mechanisms
- Event handling patterns identical to runCommand

This file is intentionally split into ordered test methods (rather than one giant test)
to make failures easier to locate and debug.
"""

import asyncio
import logging
import time
from collections.abc import Awaitable, Callable
from contextlib import AsyncExitStack, asynccontextmanager
from datetime import timedelta

import pytest
from code_interpreter import CodeInterpreter
from code_interpreter.models.code import SupportedLanguage
from opensandbox import Sandbox
from opensandbox.config import ConnectionConfig
from opensandbox.constants import DEFAULT_EXECD_PORT
from opensandbox.models.execd import (
    ExecutionComplete,
    ExecutionError,
    ExecutionHandlers,
    ExecutionInit,
    ExecutionResult,
    OutputMessage,
)
from opensandbox.models.sandboxes import Host, SandboxImageSpec, Volume

from tests.base_e2e_test import create_connection_config, get_sandbox_image

logger = logging.getLogger(__name__)


def _now_ms() -> int:
    return int(time.time() * 1000)


def _assert_recent_timestamp_ms(ts: int, *, tolerance_ms: int = 180_000) -> None:
    assert isinstance(ts, int)
    assert ts > 0
    delta = abs(_now_ms() - ts)
    assert delta <= tolerance_ms, f"timestamp too far from now: delta={delta}ms (ts={ts})"


def _assert_endpoint_has_port(endpoint: str, expected_port: int) -> None:
    assert endpoint
    assert "://" not in endpoint, f"unexpected scheme in endpoint: {endpoint}"
    if "/" in endpoint:
        assert endpoint.endswith(f"/{expected_port}"), (
            f"endpoint route must end with /{expected_port}: {endpoint}"
        )
        assert endpoint.split("/", 1)[0], f"missing domain in endpoint: {endpoint}"
        return
    host, port = endpoint.rsplit(":", 1)
    assert host
    assert port.isdigit()
    assert int(port) == expected_port


def _assert_terminal_event_contract(
        *,
        init_events: list[ExecutionInit],
        completed_events: list[ExecutionComplete],
        errors: list[ExecutionError],
        execution_id: str | None,
) -> None:
    # Contract: init must exist, and exactly one of (error, complete) exists.
    assert len(init_events) == 1
    assert init_events[0].id is not None and init_events[0].id.strip()
    if execution_id is not None:
        assert init_events[0].id == execution_id
    _assert_recent_timestamp_ms(init_events[0].timestamp)
    assert (len(completed_events) > 0) or (len(errors) > 0), (
        f"expected exactly one of complete/error, got complete={len(completed_events)} "
        f"error={len(errors)}"
    )
    if len(completed_events) > 0:
        assert len(completed_events) == 1
        _assert_recent_timestamp_ms(completed_events[0].timestamp)
        assert completed_events[0].execution_time_in_millis >= 0
    if len(errors) > 0:
        assert errors[0].name
        assert errors[0].value is not None
        _assert_recent_timestamp_ms(errors[0].timestamp)


def _buffer_attempt_handlers(
        handlers: ExecutionHandlers,
) -> tuple[ExecutionHandlers, Callable[[], Awaitable[None]]]:
    buffered_events: list[tuple[str, object]] = []

    async def on_stdout(msg: OutputMessage) -> None:
        buffered_events.append(("stdout", msg))

    async def on_stderr(msg: OutputMessage) -> None:
        buffered_events.append(("stderr", msg))

    async def on_result(result: ExecutionResult) -> None:
        buffered_events.append(("result", result))

    async def on_complete(complete: ExecutionComplete) -> None:
        buffered_events.append(("complete", complete))

    async def on_error(error: ExecutionError) -> None:
        buffered_events.append(("error", error))

    async def on_init(init: ExecutionInit) -> None:
        buffered_events.append(("init", init))

    async def flush() -> None:
        for event_type, payload in buffered_events:
            if event_type == "stdout" and handlers.on_stdout is not None:
                await handlers.on_stdout(payload)
            elif event_type == "stderr" and handlers.on_stderr is not None:
                await handlers.on_stderr(payload)
            elif event_type == "result" and handlers.on_result is not None:
                await handlers.on_result(payload)
            elif (
                event_type == "complete"
                and handlers.on_execution_complete is not None
            ):
                await handlers.on_execution_complete(payload)
            elif event_type == "error" and handlers.on_error is not None:
                await handlers.on_error(payload)
            elif event_type == "init" and handlers.on_init is not None:
                await handlers.on_init(payload)

    return (
        ExecutionHandlers(
            on_stdout=on_stdout if handlers.on_stdout is not None else None,
            on_stderr=on_stderr if handlers.on_stderr is not None else None,
            on_result=on_result if handlers.on_result is not None else None,
            on_execution_complete=(
                on_complete if handlers.on_execution_complete is not None else None
            ),
            on_error=on_error if handlers.on_error is not None else None,
            on_init=on_init if handlers.on_init is not None else None,
        ),
        flush,
    )


async def run_with_retry(
    code_interpreter: CodeInterpreter,
    code: str,
    *,
    context=None,
    language=None,
    handlers=None,
    max_retries: int = 3,
    retry_delay: float = 2.0,
    per_call_timeout: float = 120.0,
):
    """
    Run code with retry logic for flaky kernel initialization and network errors.

    Returns the execution result, retrying on:
    - Empty/None id responses (kernel not ready)
    - Network errors (connection reset, server disconnected)
    - Per-call timeout (SSE stream hangs due to peer disconnect)
    """
    last_result = None
    last_exception = None

    for attempt in range(max_retries):
        try:
            attempt_handlers = handlers
            flush_attempt_events: Callable[[], Awaitable[None]] | None = None
            if handlers is not None:
                attempt_handlers, flush_attempt_events = _buffer_attempt_handlers(
                    handlers
                )

            result = await asyncio.wait_for(
                code_interpreter.codes.run(
                    code,
                    context=context,
                    language=language,
                    handlers=attempt_handlers,
                ),
                timeout=per_call_timeout,
            )
            last_result = result
            if result is not None and result.id is not None:
                if flush_attempt_events is not None:
                    await flush_attempt_events()
                return result
            # Empty result - retry
            if attempt < max_retries - 1:
                logger.warning(
                    "Execution returned empty result (attempt %d/%d), retrying in %.1fs...",
                    attempt + 1, max_retries, retry_delay
                )
                await asyncio.sleep(retry_delay)
                retry_delay *= 1.5  # exponential backoff
        except asyncio.TimeoutError:
            last_exception = TimeoutError(
                f"codes.run() did not complete within {per_call_timeout}s"
            )
            if attempt < max_retries - 1:
                logger.warning(
                    "Execution timed out after %.0fs (attempt %d/%d), retrying in %.1fs...",
                    per_call_timeout, attempt + 1, max_retries, retry_delay,
                )
                await asyncio.sleep(retry_delay)
                retry_delay *= 1.5
            else:
                logger.error(
                    "Execution timed out after %.0fs on final attempt %d/%d",
                    per_call_timeout, attempt + 1, max_retries,
                )
        except Exception as e:
            last_exception = e
            error_name = type(e).__name__
            # Check if it's a retryable network error
            error_str = str(e).lower()
            is_retryable = any(keyword in error_str for keyword in [
                "disconnected", "connection", "reset", "closed", "timeout",
                "remoteerror", "protocol", "peer closed", "session is busy",
            ])
            if is_retryable and attempt < max_retries - 1:
                logger.warning(
                    "Execution failed with %s (attempt %d/%d), retrying in %.1fs: %s",
                    error_name, attempt + 1, max_retries, retry_delay, str(e)[:100]
                )
                await asyncio.sleep(retry_delay)
                retry_delay *= 1.5
            else:
                # Non-retryable error or last attempt
                raise

    # If we have a result (even empty), return it; otherwise raise last exception
    if last_result is not None:
        return last_result
    if last_exception is not None:
        raise last_exception
    return None


async def create_context_with_retry(
    code_interpreter: CodeInterpreter,
    language: str,
    max_retries: int = 3,
    retry_delay: float = 2.0,
):
    """Create a code context with retry logic for network errors."""
    last_exception = None
    for attempt in range(max_retries):
        try:
            ctx = await code_interpreter.codes.create_context(language)
            # Small delay to allow kernel initialization
            await asyncio.sleep(0.5)
            return ctx
        except Exception as e:
            last_exception = e
            error_str = str(e).lower()
            is_retryable = any(keyword in error_str for keyword in [
                "disconnected", "connection", "reset", "closed", "timeout",
                "remoteerror", "protocol", "peer closed"
            ])
            if is_retryable and attempt < max_retries - 1:
                logger.warning(
                    "Context creation failed (attempt %d/%d), retrying in %.1fs: %s",
                    attempt + 1, max_retries, retry_delay, str(e)[:100]
                )
                await asyncio.sleep(retry_delay)
                retry_delay *= 1.5
            else:
                raise
    raise last_exception  # type: ignore


@asynccontextmanager
async def managed_ctx(code_interpreter: CodeInterpreter, language: str):
    ctx = await create_context_with_retry(code_interpreter, language)
    try:
        yield ctx
    finally:
        # Best-effort cleanup with retry and a hard timeout so that an
        # unreachable sandbox (dead container / network gone) cannot block
        # the test suite indefinitely.
        for cleanup_attempt in range(2):
            try:
                if ctx.id:
                    await asyncio.wait_for(
                        code_interpreter.codes.delete_context(ctx.id),
                        timeout=10.0,
                    )
                break
            except Exception:
                if cleanup_attempt == 0:
                    await asyncio.sleep(0.5)
                else:
                    logger.warning(
                        "Cleanup: failed to delete context %s (%s)", ctx.id, language, exc_info=True
                    )


@asynccontextmanager
async def managed_ctx_stack(code_interpreter: CodeInterpreter, languages: list[str]):
    async with AsyncExitStack() as stack:
        contexts = []
        for lang in languages:
            contexts.append(await stack.enter_async_context(managed_ctx(code_interpreter, lang)))
        yield contexts


@pytest.mark.asyncio
class TestCodeInterpreterE2E:
    """Comprehensive E2E tests for CodeInterpreter runCode functionality (ordered)."""

    sandbox: Sandbox | None = None
    code_interpreter: CodeInterpreter | None = None
    connection_config: ConnectionConfig | None = None
    _setup_done = False

    @pytest.fixture(scope="class", autouse=True)
    async def _ci_lifecycle(self, request):
        """Create sandbox + code interpreter once and ALWAYS cleanup."""
        await request.cls._ensure_code_interpreter_created()
        try:
            yield
        finally:
            sandbox = request.cls.sandbox
            if sandbox is not None:
                try:
                    await sandbox.kill()
                except Exception as e:
                    logger.warning("Teardown: sandbox.kill() failed: %s", e, exc_info=True)
                try:
                    await sandbox.close()
                except Exception as e:
                    logger.warning("Teardown: sandbox.close() failed: %s", e, exc_info=True)

    @classmethod
    async def _ensure_code_interpreter_created(cls) -> None:
        """Create CodeInterpreter once and reuse it across ordered tests."""
        if cls._setup_done:
            return

        logger.info("=" * 100)
        logger.info("SETUP: Creating sandbox and creating CodeInterpreter")
        logger.info("=" * 100)

        cls.connection_config = create_connection_config()

        cls.sandbox = await Sandbox.create(
            image=SandboxImageSpec(get_sandbox_image()),
            entrypoint=["/opt/opensandbox/code-interpreter.sh"],
            connection_config=cls.connection_config,
            timeout=timedelta(minutes=15),
            ready_timeout=timedelta(seconds=60),
            metadata={"tag": "e2e-code-interpreter"},
            env={
                "E2E_TEST": "true",
                "GO_VERSION": "1.25",
                "JAVA_VERSION": "21",
                "NODE_VERSION": "22",
                "PYTHON_VERSION": "3.12",
                "EXECD_LOG_FILE": "/tmp/opensandbox-e2e/logs/execd.log",
            },
            health_check_polling_interval=timedelta(milliseconds=500),
            volumes=[
                Volume(
                    name="execd-log",
                    host=Host(path="/tmp/opensandbox-e2e/logs"),
                    mountPath="/tmp/opensandbox-e2e/logs",
                    readOnly=False,
                ),
            ],
        )

        cls.code_interpreter = await CodeInterpreter.create(sandbox=cls.sandbox)

        assert cls.code_interpreter is not None
        assert isinstance(cls.code_interpreter.id, str)
        logger.info("✓ CodeInterpreter created: %s", cls.code_interpreter.id)
        logger.info("=" * 100)

        cls._setup_done = True

    @pytest.mark.timeout(600)
    @pytest.mark.order(1)
    async def test_01_creation_and_basic_functionality(self):
        await self._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2E.code_interpreter
        assert code_interpreter is not None

        logger.info("=" * 80)
        logger.info("TEST 1: CodeInterpreter creation and basic functionality")
        logger.info("=" * 80)

        assert code_interpreter.codes is not None
        assert code_interpreter.files is not None
        assert code_interpreter.commands is not None
        assert code_interpreter.metrics is not None
        logger.info("✓ All service components are accessible")

        assert await code_interpreter.sandbox.is_healthy() is True
        logger.info("✓ CodeInterpreter is healthy")

        info = await code_interpreter.sandbox.get_info()
        assert str(code_interpreter.id) == str(info.id)
        assert info.status.state == "Running"
        logger.info(
            "✓ CodeInterpreter info: state=%s, created=%s",
            info.status.state,
            info.created_at,
        )

        endpoint = await code_interpreter.sandbox.get_endpoint(DEFAULT_EXECD_PORT)
        assert endpoint is not None
        assert endpoint.endpoint is not None
        _assert_endpoint_has_port(endpoint.endpoint, DEFAULT_EXECD_PORT)
        logger.info("✓ CodeInterpreter endpoint: %s", endpoint.endpoint)

        metrics = await code_interpreter.sandbox.get_metrics()
        assert metrics is not None
        assert metrics.cpu_count > 0
        assert 0.0 <= metrics.cpu_used_percentage <= 100.0
        assert metrics.memory_total_in_mib > 0
        assert 0.0 <= metrics.memory_used_in_mib <= metrics.memory_total_in_mib
        _assert_recent_timestamp_ms(metrics.timestamp)
        logger.info(
            "✓ CPU: %s cores, %.2f%% used",
            metrics.cpu_count,
            metrics.cpu_used_percentage,
        )
        logger.info(
            "✓ Memory: %s/%s MiB",
            int(metrics.memory_used_in_mib),
            int(metrics.memory_total_in_mib),
        )

        # Renewal through CodeInterpreter (extend expiration time)
        renew_response = await code_interpreter.sandbox.renew(timedelta(minutes=20))
        assert renew_response is not None
        logger.info("✓ CodeInterpreter expiration renewed to %s", renew_response.expires_at)

        renewed_info = await code_interpreter.sandbox.get_info()
        assert abs((renewed_info.expires_at - renew_response.expires_at).total_seconds()) < 10
        now = renewed_info.expires_at.__class__.now(tz=renewed_info.expires_at.tzinfo)
        remaining = renewed_info.expires_at - now
        assert remaining > timedelta(minutes=18)
        assert remaining < timedelta(minutes=22)
        logger.info("✓ Expiration updated to %s", renewed_info.expires_at)

    @pytest.mark.timeout(900)
    @pytest.mark.order(2)
    async def test_02_java_code_execution(self):
        await self._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2E.code_interpreter
        assert code_interpreter is not None

        logger.info("=" * 80)
        logger.info("TEST 2: Java code execution")
        logger.info("=" * 80)

        async with managed_ctx(code_interpreter, SupportedLanguage.JAVA) as java_context:
            assert java_context.id is not None and java_context.id.strip()
            assert java_context.language == "java"
            logger.info("✓ Java context created")

            stdout_messages: list[OutputMessage] = []
            stderr_messages: list[OutputMessage] = []
            results: list[ExecutionResult] = []
            errors: list[ExecutionError] = []
            completed_events: list[ExecutionComplete] = []
            init_events: list[ExecutionInit] = []

            async def on_stdout(msg: OutputMessage):
                stdout_messages.append(msg)
                logger.info("Java stdout: %s", msg.text)

            async def on_stderr(msg: OutputMessage):
                stderr_messages.append(msg)
                logger.warning("Java stderr: %s", msg.text)

            async def on_result(result: ExecutionResult):
                results.append(result)
                logger.info("Java result: %s", result.text)

            async def on_complete(complete: ExecutionComplete):
                completed_events.append(complete)
                logger.info(
                    "Java execution completed in %s ms", complete.execution_time_in_millis
                )

            async def on_error(error: ExecutionError):
                errors.append(error)
                logger.error("Java error: %s - %s", error.name, error.value)

            async def on_init(init: ExecutionInit):
                init_events.append(init)
                logger.info("Java execution initialized with ID: %s", init.id)

            handlers = ExecutionHandlers(
                on_stdout=on_stdout,
                on_stderr=on_stderr,
                on_result=on_result,
                on_execution_complete=on_complete,
                on_error=on_error,
                on_init=on_init,
            )

            # Use retry for first execution in context (Java kernel init can be slow)
            simple_result = await run_with_retry(
                code_interpreter,
                "System.out.println(\"Hello from Java!\");\n"
                + "int result = 2 + 2;\n"
                + "System.out.println(\"2 + 2 = \" + result);\n"
                + "result",
                context=java_context,
                handlers=handlers,
                )
            assert simple_result is not None
            assert simple_result.id is not None and simple_result.id.strip()
            assert len(simple_result.result) > 0
            assert simple_result.result[0].text == "4"

            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=simple_result.id,
            )
            assert len(errors) == 0
            assert len(completed_events) == 1
            assert len(stdout_messages) > 0
            assert any("Hello from Java!" in m.text for m in stdout_messages)
            # Depending on kernel formatting, spaces may vary; normalize spaces for matching.
            assert any(
                "2+2=4" in m.text.replace(" ", "") for m in stdout_messages
            )
            assert all(m.is_error is False for m in stdout_messages)
            for m in stdout_messages[:3]:
                _assert_recent_timestamp_ms(m.timestamp)
            logger.info("✓ Simple Java execution successful")

            var_result = await code_interpreter.codes.run(
                "import java.util.*;\n"
                + "List<Integer> numbers = Arrays.asList(1, 2, 3, 4, 5);\n"
                + "int sum = numbers.stream().mapToInt(Integer::intValue).sum();\n"
                + "System.out.println(\"Numbers: \" + numbers);\n"
                + "System.out.println(\"Sum: \" + sum);\n"
                + "result",
                context=java_context,
                )
            assert var_result is not None
            assert var_result.id is not None
            assert len(var_result.result) > 0
            assert var_result.result[0].text == "4"
            logger.info("✓ Java variables and state persistence work correctly")

            # Error handling test
            stdout_messages.clear()
            stderr_messages.clear()
            errors.clear()
            completed_events.clear()
            init_events.clear()

            error_result = await code_interpreter.codes.run(
                "int x = 10 / 0; // This will cause ArithmeticException",
                context=java_context,
                handlers=handlers,
            )
            assert error_result is not None
            assert error_result.id is not None and error_result.id.strip()
            assert error_result.error is not None
            assert error_result.error.name == "EvalException"

            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=error_result.id,
            )
            assert len(errors) > 0
            assert errors[0].name == "EvalException"
            logger.info("✓ Java error handling works correctly")

    @pytest.mark.timeout(900)
    @pytest.mark.order(3)
    async def test_03_python_code_execution(self):
        await self._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2E.code_interpreter
        assert code_interpreter is not None

        logger.info("=" * 80)
        logger.info("TEST 3: Python code execution")
        logger.info("=" * 80)

        # New usage: directly pass a language string (ephemeral context).
        # This validates the `codes.run(..., language=...)` convenience interface.
        # Use retry helper for the first call — kernel initialization can be flaky.
        direct_lang_result = await run_with_retry(
            code_interpreter,
            "result = 2 + 2\nresult",
            language=SupportedLanguage.PYTHON,
        )
        assert direct_lang_result is not None
        assert direct_lang_result.id is not None and direct_lang_result.id.strip()
        assert direct_lang_result.error is None
        assert len(direct_lang_result.result) > 0
        assert direct_lang_result.result[0].text == "4"

        stdout_messages: list[OutputMessage] = []
        stderr_messages: list[OutputMessage] = []
        errors: list[ExecutionError] = []
        completed_events: list[ExecutionComplete] = []
        init_events: list[ExecutionInit] = []

        async def on_stdout(msg: OutputMessage):
            stdout_messages.append(msg)
            logger.info("Python stdout: %s", msg.text)

        async def on_stderr(msg: OutputMessage):
            stderr_messages.append(msg)
            logger.warning("Python stderr: %s", msg.text)

        async def on_complete(complete: ExecutionComplete):
            completed_events.append(complete)
            logger.info(
                "Python execution completed in %s ms", complete.execution_time_in_millis
            )

        async def on_error(error: ExecutionError):
            errors.append(error)
            logger.error("Python error: %s - %s", error.name, error.value)

        async def on_init(init: ExecutionInit):
            init_events.append(init)
            logger.info("Python execution initialized with ID: %s", init.id)

        handlers_py = ExecutionHandlers(
            on_stdout=on_stdout,
            on_stderr=on_stderr,
            on_execution_complete=on_complete,
            on_error=on_error,
            on_init=on_init,
        )

        async with managed_ctx(code_interpreter, SupportedLanguage.PYTHON) as python_context:
            assert python_context.id is not None and python_context.id.strip()
            logger.info("✓ Python context created")

            # Use retry for first execution in context (kernel init can be flaky)
            simple_result_py = await run_with_retry(
                code_interpreter,
                "print('Hello from Python!')\n"
                + "result = 2 + 2\n"
                + "print(f'2 + 2 = {result}')",
                context=python_context,
                handlers=handlers_py,
                )
            assert simple_result_py is not None
            assert simple_result_py.id is not None and simple_result_py.id.strip()

            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=simple_result_py.id,
            )
            assert len(errors) == 0
            assert len(completed_events) == 1
            assert any("Hello from Python!" in m.text for m in stdout_messages)
            assert any("2 + 2 = 4" in m.text for m in stdout_messages)
            logger.info("✓ Simple Python execution successful")

            stdout_messages.clear()
            stderr_messages.clear()
            errors.clear()
            completed_events.clear()
            init_events.clear()

            var_result_py = await code_interpreter.codes.run(
                "x = 42\n"
                + "y = 'persistent variable'\n"
                + "my_list = [1, 2, 3, 4, 5]\n"
                + "print(f'x={x}, y=\"{y}\", list={my_list}')\n"
                + "result",
                context=python_context,
                handlers=handlers_py,
                )
            assert var_result_py is not None
            assert var_result_py.id is not None and var_result_py.id.strip()
            assert len(var_result_py.result) > 0
            assert var_result_py.result[0].text == "4"
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=var_result_py.id,
            )
            logger.info("✓ Python variables and state persistence work correctly")

            stdout_messages.clear()
            stderr_messages.clear()
            errors.clear()
            completed_events.clear()
            init_events.clear()
            persist_result = await code_interpreter.codes.run(
                "print(f'Previously set variables: x={x}, y={y}')\n"
                + "z = sum(my_list)\n"
                + "print(f'Sum of list: {z}')",
                context=python_context,
                handlers=handlers_py,
                )
            assert persist_result is not None
            assert persist_result.id is not None and persist_result.id.strip()
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=persist_result.id,
            )
            assert any("Previously set variables: x=42" in m.text for m in stdout_messages)
            assert any("Sum of list: 15" in m.text for m in stdout_messages)
            logger.info("✓ Python variable persistence across executions works")

            # Error handling
            stdout_messages.clear()
            stderr_messages.clear()
            errors.clear()
            completed_events.clear()
            init_events.clear()

            error_result_py = await code_interpreter.codes.run(
                "print(undefined_variable)  # This will cause NameError",
                context=python_context,
                handlers=handlers_py,
            )
            assert error_result_py is not None
            assert error_result_py.id is not None and error_result_py.id.strip()
            assert error_result_py.error is not None or len(error_result_py.logs.stderr) > 0

            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=error_result_py.id,
            )
            assert len(errors) > 0
            if error_result_py.error:
                assert (
                        "NameError" in error_result_py.error.name
                        or "NameError" in error_result_py.error.value
                )
            assert "NameError" in errors[0].name or "NameError" in errors[0].value
            logger.info("✓ Python error handling works correctly")

    @pytest.mark.timeout(900)
    @pytest.mark.order(4)
    async def test_04_go_code_execution(self):
        await self._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2E.code_interpreter
        assert code_interpreter is not None

        logger.info("=" * 80)
        logger.info("TEST 4: Go code execution")
        logger.info("=" * 80)

        async with managed_ctx(code_interpreter, SupportedLanguage.GO) as go_context:
            assert go_context.id is not None and go_context.id.strip()
            assert go_context.language == "go"
            logger.info("✓ Go context created")

            stdout_messages: list[OutputMessage] = []
            errors: list[ExecutionError] = []
            completed_events: list[ExecutionComplete] = []
            init_events: list[ExecutionInit] = []

            async def on_stdout(msg: OutputMessage):
                stdout_messages.append(msg)
                logger.info("Go stdout: %s", msg.text)

            async def on_complete(complete: ExecutionComplete):
                completed_events.append(complete)
                logger.info("Go execution completed in %s ms", complete.execution_time_in_millis)

            async def on_error(error: ExecutionError):
                errors.append(error)
                logger.error("Go error: %s - %s", error.name, error.value)

            async def on_init(init: ExecutionInit):
                init_events.append(init)
                logger.info("Go execution initialized with ID: %s", init.id)

            handlers_go = ExecutionHandlers(
                on_stdout=on_stdout,
                on_execution_complete=on_complete,
                on_error=on_error,
                on_init=on_init,
            )

            # Use retry for first execution in context (Go compile can be slow)
            simple_result_go = await run_with_retry(
                code_interpreter,
                "package main\n"
                + "import \"fmt\"\n"
                + "func main() {\n"
                + "    fmt.Print(\"Hello from Go!\")\n"
                + "    result := 2 + 2\n"
                + "    fmt.Print(\"2 + 2 =\", result)\n"
                + "}",
                context=go_context,
                handlers=handlers_go,
                )
            assert simple_result_go is not None
            assert simple_result_go.id is not None and simple_result_go.id.strip()
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=simple_result_go.id,
            )
            assert len(errors) == 0
            assert len(completed_events) == 1
            assert len(stdout_messages) > 0
            logger.info("✓ Simple Go execution successful")

            data_result_go = await code_interpreter.codes.run(
                "package main\n"
                + "import \"fmt\"\n"
                + "func calculate(numbers []int) int {\n"
                + "    sum := 0\n"
                + "    for _, num := range numbers {\n"
                + "        sum += num\n"
                + "    }\n"
                + "    return sum\n"
                + "}\n"
                + "func main() {\n"
                + "    numbers := []int{1, 2, 3, 4, 5}\n"
                + "    sum := calculate(numbers)\n"
                + "    fmt.Print(\"Numbers:\", numbers)\n"
                + "    fmt.Print(\"Sum:\", sum)\n"
                + "}",
                context=go_context,
                )
            assert data_result_go is not None
            assert data_result_go.id is not None
            logger.info("✓ Go data structures and functions work correctly")

            # Compilation error
            stdout_messages.clear()
            errors.clear()
            completed_events.clear()
            init_events.clear()

            error_result_go = await code_interpreter.codes.run(
                "package main\n"
                + "func main() {\n"
                + "    undeclaredVariable++  // This will cause compilation error\n"
                + "}",
                context=go_context,
                handlers=handlers_go,
                )
            assert error_result_go is not None
            assert error_result_go.id is not None and error_result_go.id.strip()
            assert error_result_go.error is not None or len(error_result_go.logs.stderr) > 0
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=error_result_go.id,
            )
            logger.info("✓ Go error handling works correctly")

    @pytest.mark.timeout(900)
    @pytest.mark.order(5)
    async def test_05_typescript_code_execution(self):
        await self._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2E.code_interpreter
        assert code_interpreter is not None

        logger.info("=" * 80)
        logger.info("TEST 5: TypeScript code execution")
        logger.info("=" * 80)

        async with managed_ctx(code_interpreter, SupportedLanguage.TYPESCRIPT) as ts_context:
            assert ts_context.id is not None and ts_context.id.strip()
            assert ts_context.language == "typescript"
            logger.info("✓ TypeScript context created")

            stdout_messages: list[OutputMessage] = []
            errors: list[ExecutionError] = []
            completed_events: list[ExecutionComplete] = []
            init_events: list[ExecutionInit] = []

            async def on_stdout(msg: OutputMessage):
                stdout_messages.append(msg)
                logger.info("TypeScript stdout: %s", msg.text)

            async def on_complete(complete: ExecutionComplete):
                completed_events.append(complete)
                logger.info(
                    "TypeScript execution completed in %s ms", complete.execution_time_in_millis
                )

            async def on_error(error: ExecutionError):
                errors.append(error)
                logger.error("TypeScript error: %s - %s", error.name, error.value)

            async def on_init(init: ExecutionInit):
                init_events.append(init)
                logger.info("TypeScript execution initialized with ID: %s", init.id)

            handlers_ts = ExecutionHandlers(
                on_stdout=on_stdout,
                on_execution_complete=on_complete,
                on_error=on_error,
                on_init=on_init,
            )

            # Use retry for first execution in context (TS init can be slow)
            simple_result_ts = await run_with_retry(
                code_interpreter,
                "console.log('Hello from TypeScript!');\n"
                + "const result: number = 2 + 2;\n"
                + "console.log(`2 + 2 = ${result}`);",
                context=ts_context,
                handlers=handlers_ts,
                )
            assert simple_result_ts is not None
            assert simple_result_ts.id is not None and simple_result_ts.id.strip()
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=simple_result_ts.id,
            )
            assert len(errors) == 0
            assert len(completed_events) == 1
            assert any("Hello from TypeScript!" in m.text for m in stdout_messages)
            logger.info("✓ Simple TypeScript execution successful")

            types_result_ts = await code_interpreter.codes.run(
                "interface Person {\n"
                + "  name: string;\n"
                + "  age: number;\n"
                + "}\n"
                + "const person: Person = { name: 'John', age: 30 };\n"
                + "const numbers: number[] = [1, 2, 3, 4, 5];\n"
                + "const sum: number = numbers.reduce((a, b) => a + b, 0);\n"
                + "console.log(`Person: ${person.name}, Age: ${person.age}`);\n"
                + "console.log(`Numbers: ${numbers}`);\n"
                + "console.log(`Sum: ${sum}`);",
                context=ts_context,
                )
            assert types_result_ts is not None
            assert types_result_ts.id is not None
            logger.info("✓ TypeScript types and interfaces work correctly")

            # Type error
            stdout_messages.clear()
            errors.clear()
            completed_events.clear()
            init_events.clear()

            # Use a deterministic runtime error (TypeScript compile/type-checking may be configured permissively).
            error_result_ts = await code_interpreter.codes.run(
                "throw new Error('ts-runtime-error');",
                context=ts_context,
                handlers=handlers_ts,
            )
            assert error_result_ts is not None
            assert error_result_ts.id is not None and error_result_ts.id.strip()
            assert error_result_ts.error is not None or len(error_result_ts.logs.stderr) > 0
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=error_result_ts.id,
            )
            logger.info("✓ TypeScript error handling works correctly")

    @pytest.mark.timeout(900)
    @pytest.mark.order(6)
    async def test_06_multi_language_support_and_context_isolation(self):
        await self._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2E.code_interpreter
        assert code_interpreter is not None

        logger.info("=" * 80)
        logger.info("TEST 6: Multi-language support and context isolation")
        logger.info("=" * 80)

        async with managed_ctx_stack(
            code_interpreter,
            [
                SupportedLanguage.PYTHON,
                SupportedLanguage.PYTHON,
                SupportedLanguage.JAVA,
                SupportedLanguage.GO,
            ],
        ) as (python1, python2, java1, go1):
            logger.info("✓ Created multiple contexts for different languages")

            # Use retry helper for flaky kernel initialization
            result1 = await run_with_retry(
                code_interpreter,
                "secret_value1 = 'python1_secret'\nprint(f'Python1 secret: {secret_value1}')",
                context=python1,
            )
            result2 = await run_with_retry(
                code_interpreter,
                "secret_value2 = 'python2_secret'\nprint(f'Python2 secret: {secret_value2}')",
                context=python2,
            )
            assert result1 is not None and result1.id is not None
            assert result2 is not None and result2.id is not None
            logger.info("✓ Variables set in different Python contexts")

            check1 = await code_interpreter.codes.run(
                "print(f'Python1 still has: {secret_value1}')",
                context=python1,
            )
            check2 = await code_interpreter.codes.run(
                "print(f'Python2 has no: {secret_value1}')",
                context=python2,
            )
            assert check1 is not None
            assert check2 is not None
            assert check2.error is not None
            assert check2.error.name == "NameError"
            logger.info("✓ Context isolation verified - contexts are properly isolated")

            java_result = await run_with_retry(
                code_interpreter,
                "String javaSecret = \"java_secret\";\n"
                + "System.out.println(\"Java secret: \" + javaSecret);",
                context=java1,
                )
            go_result = await run_with_retry(
                code_interpreter,
                "package main\n"
                + "import \"fmt\"\n"
                + "func main() {\n"
                + "    goSecret := \"go_secret\"\n"
                + "    fmt.Print(\"Go secret:\", goSecret)\n"
                + "}",
                context=go1,
                )
            assert java_result is not None and java_result.id is not None
            assert go_result is not None and go_result.id is not None
            logger.info("✓ Cross-language execution works correctly")

    @pytest.mark.timeout(900)
    @pytest.mark.order(7)
    async def test_07_concurrent_code_execution(self):
        await self._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2E.code_interpreter
        assert code_interpreter is not None

        logger.info("=" * 80)
        logger.info("TEST 7: Concurrent code execution")
        logger.info("=" * 80)

        async with managed_ctx_stack(
            code_interpreter,
            [
                SupportedLanguage.PYTHON,
                SupportedLanguage.JAVA,
                SupportedLanguage.GO,
            ],
        ) as (python_c1, java_c1, go_c1):
            logger.info("✓ Created contexts for concurrent execution")

            async def run_python1():
                return await code_interpreter.codes.run(
                    "import time\n"
                    + "for i in range(3):\n"
                    + "    print(f'Python1 iteration {i}')\n"
                    + "    time.sleep(0.1)\n"
                    + "print('Python1 completed')",
                    context=python_c1,
                    )

            async def run_java_concurrent():
                return await code_interpreter.codes.run(
                    "for (int i = 0; i < 3; i++) {\n"
                    + "    System.out.println(\"Java iteration \" + i);\n"
                    + "    try { Thread.sleep(100); } catch (Exception e) {}\n"
                    + "}\n"
                    + "System.out.println(\"Java completed\");",
                    context=java_c1,
                    )

            async def run_go_concurrent():
                return await code_interpreter.codes.run(
                    "package main\n"
                    + "import \"fmt\"\n"
                    + "func main() {\n"
                    + "    for i := 0; i < 3; i++ {\n"
                    + "        fmt.Print(\"Go iteration\", i)\n"
                    + "    }\n"
                    + "    fmt.Print(\"Go completed\")\n"
                    + "}",
                    context=go_c1,
                    )

            results = await asyncio.gather(
                run_python1(), run_java_concurrent(), run_go_concurrent()
            )
            for result in results:
                assert result is not None
                assert result.id is not None
                logger.info("✓ Concurrent execution completed: %s", result.id)

    @pytest.mark.timeout(900)
    @pytest.mark.order(8)
    async def test_08_code_execution_interrupt(self):
        await self._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2E.code_interpreter
        assert code_interpreter is not None

        logger.info("=" * 80)
        logger.info("TEST 8: Code execution interrupt")
        logger.info("=" * 80)

        async with managed_ctx(code_interpreter, SupportedLanguage.PYTHON) as python_int_context:
            assert python_int_context.id is not None and python_int_context.id.strip()

            init_events_int: list[ExecutionInit] = []
            completed_events: list[ExecutionComplete] = []
            errors: list[ExecutionError] = []
            init_received = asyncio.Event()

            async def on_init(init: ExecutionInit):
                init_events_int.append(init)
                init_received.set()

            async def on_complete(complete: ExecutionComplete):
                completed_events.append(complete)

            async def on_error(error: ExecutionError):
                errors.append(error)

            handlers_int = ExecutionHandlers(
                on_init=on_init,
                on_execution_complete=on_complete,
                on_error=on_error,
            )

            execution_task = asyncio.create_task(
                code_interpreter.codes.run(
                    "import time\n"
                    + "print('Starting long-running Python execution')\n"
                    + "for i in range(100):\n"
                    + "    print(f'Python iteration {i}')\n"
                    + "    time.sleep(0.2)\n",
                    context=python_int_context,
                    handlers=handlers_int,
                    )
            )

            await asyncio.wait_for(init_received.wait(), timeout=15)
            assert len(init_events_int) == 1, "Execution should have been initialized exactly once"
            execution_id = init_events_int[-1].id
            assert execution_id is not None
            logger.info("✓ Execution initialized with ID: %s", execution_id)

            await asyncio.wait_for(
                code_interpreter.codes.interrupt(execution_id),
                timeout=15.0,
            )

            # After interrupt the SSE stream should close promptly.  Add a
            # hard timeout so that a slow/stuck server cannot block the test
            # for the full 900 s pytest-timeout.
            try:
                result_int = await asyncio.wait_for(execution_task, timeout=60.0)
            except (asyncio.TimeoutError, Exception) as exc:
                execution_task.cancel()
                logger.warning(
                    "Execution task did not return cleanly after interrupt: %s", exc
                )
                result_int = None

            if result_int is not None:
                assert result_int.id is not None
                assert result_int.id == execution_id

            quick_result = None
            try:
                quick_result = await asyncio.wait_for(
                    code_interpreter.codes.run(
                        "print('Quick Python execution')\n"
                        + "result = 2 + 2\n"
                        + "print(f'Result: {result}')",
                        context=python_int_context,
                        handlers=handlers_int,
                    ),
                    timeout=60.0,
                )
                assert quick_result is not None
                assert quick_result.id is not None
            except (asyncio.TimeoutError, Exception) as exc:
                logger.warning("Quick execution after interrupt failed: %s", exc)

            # Different backends may close the interrupted SSE stream without
            # emitting an explicit terminal event. Accept either a terminal
            # event or proof that the context became usable again.
            assert (
                len(completed_events) > 0
                or len(errors) > 0
                or quick_result is not None
            ), "expected terminal event or successful follow-up execution after interrupt"
            logger.info("✓ Python execution was interrupted successfully")

            # Interrupting a completed execution may or may not throw depending on backend behavior.
            try:
                if quick_result is not None:
                    await asyncio.wait_for(
                        code_interpreter.codes.interrupt(quick_result.id),
                        timeout=10.0,
                    )
            except Exception:
                pass

    @pytest.mark.timeout(600)
    @pytest.mark.order(9)
    async def test_09_context_management_endpoints(self):
        """Validate list/get/delete context APIs map to execd /code/contexts endpoints."""
        await self._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2E.code_interpreter
        assert code_interpreter is not None

        language = SupportedLanguage.BASH
        logger.info("=" * 80)
        logger.info("TEST 9: Context management endpoints (%s)", language)
        logger.info("=" * 80)

        # Ensure clean slate for bash contexts to avoid interference with other tests.
        await code_interpreter.codes.delete_contexts(language)

        ctx1 = await code_interpreter.codes.create_context(language)
        ctx2 = await code_interpreter.codes.create_context(language)
        assert ctx1.id is not None and ctx1.id.strip()
        assert ctx2.id is not None and ctx2.id.strip()
        assert ctx1.language == language
        assert ctx2.language == language
        logger.info("✓ Created two bash contexts: %s, %s", ctx1.id, ctx2.id)

        listed = await code_interpreter.codes.list_contexts(language)
        bash_context_ids = {c.id for c in listed if c.id}
        assert ctx1.id in bash_context_ids
        assert ctx2.id in bash_context_ids
        assert all(c.language == language for c in listed)
        logger.info("✓ list_contexts returned expected bash contexts")

        fetched = await code_interpreter.codes.get_context(ctx1.id)
        assert fetched.id == ctx1.id
        assert fetched.language == language
        logger.info("✓ get_context returned expected context %s", fetched.id)

        await code_interpreter.codes.delete_context(ctx1.id)
        remaining = await code_interpreter.codes.list_contexts(language)
        remaining_ids = {c.id for c in remaining if c.id}
        assert ctx1.id not in remaining_ids
        assert ctx2.id in remaining_ids
        logger.info("✓ delete_context removed %s", ctx1.id)

        await code_interpreter.codes.delete_contexts(language)
        final_contexts = [
            c for c in await code_interpreter.codes.list_contexts(language) if c.id
        ]
        assert len(final_contexts) == 0
        logger.info("✓ delete_contexts removed all bash contexts")


================================================
FILE: tests/python/tests/test_code_interpreter_e2e_sync.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Comprehensive Sync E2E tests for CodeInterpreterSync functionality.

This mirrors `test_code_interpreter_e2e.py` but uses the synchronous SDK.
"""

import logging
import time
from collections.abc import Callable
from concurrent.futures import ThreadPoolExecutor
from contextlib import ExitStack, contextmanager
from datetime import timedelta

import pytest
from code_interpreter import CodeInterpreterSync
from code_interpreter.models.code import SupportedLanguage
from opensandbox import SandboxSync
from opensandbox.config import ConnectionConfigSync
from opensandbox.constants import DEFAULT_EXECD_PORT
from opensandbox.models.execd import (
    ExecutionComplete,
    ExecutionError,
    ExecutionInit,
    ExecutionResult,
    OutputMessage,
)
from opensandbox.models.execd_sync import ExecutionHandlersSync
from opensandbox.models.sandboxes import Host, SandboxImageSpec, Volume

from tests.base_e2e_test import create_connection_config_sync, get_sandbox_image

logger = logging.getLogger(__name__)


def _now_ms() -> int:
    return int(time.time() * 1000)


def _assert_recent_timestamp_ms(ts: int, *, tolerance_ms: int = 180_000) -> None:
    assert isinstance(ts, int)
    assert ts > 0
    delta = abs(_now_ms() - ts)
    assert delta <= tolerance_ms, f"timestamp too far from now: delta={delta}ms (ts={ts})"


def _assert_endpoint_has_port(endpoint: str, expected_port: int) -> None:
    assert endpoint
    assert "://" not in endpoint, f"unexpected scheme in endpoint: {endpoint}"
    if "/" in endpoint:
        assert endpoint.endswith(f"/{expected_port}"), (
            f"endpoint route must end with /{expected_port}: {endpoint}"
        )
        assert endpoint.split("/", 1)[0], f"missing domain in endpoint: {endpoint}"
        return
    host, port = endpoint.rsplit(":", 1)
    assert host
    assert port.isdigit()
    assert int(port) == expected_port


def _assert_terminal_event_contract(
    *,
    init_events: list[ExecutionInit],
    completed_events: list[ExecutionComplete],
    errors: list[ExecutionError],
    execution_id: str | None,
) -> None:
    # Contract: init must exist, and exactly one of (error, complete) exists.
    assert len(init_events) == 1
    assert init_events[0].id is not None and init_events[0].id.strip()
    if execution_id is not None:
        assert init_events[0].id == execution_id
    _assert_recent_timestamp_ms(init_events[0].timestamp)
    assert (len(completed_events) > 0) or (len(errors) > 0), (
        f"expected exactly one of complete/error, got complete={len(completed_events)} "
        f"error={len(errors)}"
    )
    if len(completed_events) > 0:
        assert len(completed_events) == 1
        _assert_recent_timestamp_ms(completed_events[0].timestamp)
        assert completed_events[0].execution_time_in_millis >= 0
    if len(errors) > 0:
        assert errors[0].name
        assert errors[0].value is not None
        _assert_recent_timestamp_ms(errors[0].timestamp)


def _buffer_attempt_handlers_sync(
    handlers: ExecutionHandlersSync,
) -> tuple[ExecutionHandlersSync, Callable[[], None]]:
    buffered_events: list[tuple[str, object]] = []

    def on_stdout(msg) -> None:
        buffered_events.append(("stdout", msg))

    def on_stderr(msg) -> None:
        buffered_events.append(("stderr", msg))

    def on_result(result) -> None:
        buffered_events.append(("result", result))

    def on_complete(complete) -> None:
        buffered_events.append(("complete", complete))

    def on_error(error) -> None:
        buffered_events.append(("error", error))

    def on_init(init) -> None:
        buffered_events.append(("init", init))

    def flush() -> None:
        for event_type, payload in buffered_events:
            if event_type == "stdout" and handlers.on_stdout is not None:
                handlers.on_stdout(payload)
            elif event_type == "stderr" and handlers.on_stderr is not None:
                handlers.on_stderr(payload)
            elif event_type == "result" and handlers.on_result is not None:
                handlers.on_result(payload)
            elif (
                event_type == "complete"
                and handlers.on_execution_complete is not None
            ):
                handlers.on_execution_complete(payload)
            elif event_type == "error" and handlers.on_error is not None:
                handlers.on_error(payload)
            elif event_type == "init" and handlers.on_init is not None:
                handlers.on_init(payload)

    return (
        ExecutionHandlersSync(
            on_stdout=on_stdout if handlers.on_stdout is not None else None,
            on_stderr=on_stderr if handlers.on_stderr is not None else None,
            on_result=on_result if handlers.on_result is not None else None,
            on_execution_complete=(
                on_complete if handlers.on_execution_complete is not None else None
            ),
            on_error=on_error if handlers.on_error is not None else None,
            on_init=on_init if handlers.on_init is not None else None,
        ),
        flush,
    )


def run_with_retry_sync(
    code_interpreter: CodeInterpreterSync,
    code: str,
    *,
    context=None,
    language=None,
    handlers=None,
    max_retries: int = 3,
    retry_delay: float = 2.0,
):
    """
    Synchronous retry wrapper for code_interpreter.codes.run().

    Retries on:
    - Empty/None id responses (kernel not ready / session busy)
    - Retryable network errors (connection reset, server disconnected)
    """
    last_result = None
    last_exception = None

    for attempt in range(max_retries):
        try:
            attempt_handlers = handlers
            flush_attempt_events: Callable[[], None] | None = None
            if handlers is not None:
                attempt_handlers, flush_attempt_events = _buffer_attempt_handlers_sync(
                    handlers
                )
            result = code_interpreter.codes.run(
                code,
                context=context,
                language=language,
                handlers=attempt_handlers,
            )
            last_result = result
            if result is not None and result.id is not None:
                if flush_attempt_events is not None:
                    flush_attempt_events()
                return result
            # Empty result — retry
            if attempt < max_retries - 1:
                logger.warning(
                    "Execution returned empty result (attempt %d/%d), retrying in %.1fs...",
                    attempt + 1, max_retries, retry_delay,
                )
                time.sleep(retry_delay)
                retry_delay *= 1.5
        except Exception as e:
            last_exception = e
            error_str = str(e).lower()
            is_retryable = any(keyword in error_str for keyword in [
                "disconnected", "connection", "reset", "closed", "timeout",
                "remoteerror", "protocol", "peer closed", "session is busy",
            ])
            if is_retryable and attempt < max_retries - 1:
                logger.warning(
                    "Execution failed with %s (attempt %d/%d), retrying in %.1fs: %s",
                    type(e).__name__, attempt + 1, max_retries, retry_delay, str(e)[:100],
                )
                time.sleep(retry_delay)
                retry_delay *= 1.5
            else:
                raise

    if last_result is not None:
        return last_result
    if last_exception is not None:
        raise last_exception
    return None


@contextmanager
def managed_ctx_sync(code_interpreter: CodeInterpreterSync, language: str):
    ctx = code_interpreter.codes.create_context(language)
    try:
        yield ctx
    finally:
        try:
            if ctx.id:
                code_interpreter.codes.delete_context(ctx.id)
        except Exception:
            logger.warning(
                "Cleanup: failed to delete context %s (%s)", ctx.id, language, exc_info=True
            )


@contextmanager
def managed_ctx_stack_sync(code_interpreter: CodeInterpreterSync, languages: list[str]):
    with ExitStack() as stack:
        contexts = []
        for lang in languages:
            contexts.append(stack.enter_context(managed_ctx_sync(code_interpreter, lang)))
        yield contexts


class TestCodeInterpreterE2ESync:
    sandbox: SandboxSync | None = None
    code_interpreter: CodeInterpreterSync | None = None
    connection_config: ConnectionConfigSync | None = None
    _setup_done = False

    @pytest.fixture(scope="class", autouse=True)
    def _ci_lifecycle(self, request):
        """Create sandbox + code interpreter once and ALWAYS cleanup."""
        request.cls._ensure_code_interpreter_created()
        try:
            yield
        finally:
            sandbox = request.cls.sandbox
            if sandbox is not None:
                try:
                    sandbox.kill()
                except Exception as e:
                    logger.warning("Teardown: sandbox.kill() failed: %s", e, exc_info=True)
                try:
                    sandbox.close()
                except Exception as e:
                    logger.warning("Teardown: sandbox.close() failed: %s", e, exc_info=True)

            cfg = request.cls.connection_config
            if cfg is not None:
                try:
                    cfg.transport.close()
                except Exception:
                    pass

    @classmethod
    def _ensure_code_interpreter_created(cls) -> None:
        if cls._setup_done:
            return

        cls.connection_config = create_connection_config_sync()

        cls.sandbox = SandboxSync.create(
            image=SandboxImageSpec(get_sandbox_image()),
            entrypoint=["/opt/opensandbox/code-interpreter.sh"],
            connection_config=cls.connection_config,
            timeout=timedelta(minutes=15),
            ready_timeout=timedelta(seconds=60),
            metadata={"tag": "e2e-code-interpreter"},
            env={
                "E2E_TEST": "true",
                "GO_VERSION": "1.25",
                "JAVA_VERSION": "21",
                "NODE_VERSION": "22",
                "PYTHON_VERSION": "3.12",
                "EXECD_LOG_FILE": "/tmp/opensandbox-e2e/logs/execd.log",
            },
            health_check_polling_interval=timedelta(milliseconds=500),
            volumes=[
                Volume(
                    name="execd-log",
                    host=Host(path="/tmp/opensandbox-e2e/logs"),
                    mountPath="/tmp/opensandbox-e2e/logs",
                    readOnly=False,
                ),
            ],
        )

        cls.code_interpreter = CodeInterpreterSync.create(sandbox=cls.sandbox)
        assert cls.code_interpreter is not None
        assert isinstance(cls.code_interpreter.id, str)
        cls._setup_done = True

    @pytest.mark.timeout(600)
    @pytest.mark.order(1)
    def test_01_creation_and_basic_functionality(self):
        TestCodeInterpreterE2ESync._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2ESync.code_interpreter
        assert code_interpreter is not None

        assert code_interpreter.codes is not None
        assert code_interpreter.files is not None
        assert code_interpreter.commands is not None
        assert code_interpreter.metrics is not None

        assert code_interpreter.sandbox.is_healthy() is True

        info = code_interpreter.sandbox.get_info()
        assert str(code_interpreter.id) == str(info.id)
        assert info.status.state == "Running"

        endpoint = code_interpreter.sandbox.get_endpoint(DEFAULT_EXECD_PORT)
        assert endpoint is not None
        assert endpoint.endpoint is not None
        _assert_endpoint_has_port(endpoint.endpoint, DEFAULT_EXECD_PORT)

        metrics = code_interpreter.sandbox.get_metrics()
        assert metrics is not None
        assert metrics.cpu_count > 0
        assert 0.0 <= metrics.cpu_used_percentage <= 100.0
        assert metrics.memory_total_in_mib > 0
        assert 0.0 <= metrics.memory_used_in_mib <= metrics.memory_total_in_mib
        _assert_recent_timestamp_ms(metrics.timestamp)

        renew_response = code_interpreter.sandbox.renew(timedelta(minutes=20))
        assert renew_response is not None
        renewed_info = code_interpreter.sandbox.get_info()
        assert abs((renewed_info.expires_at - renew_response.expires_at).total_seconds()) < 10
        now = renewed_info.expires_at.__class__.now(tz=renewed_info.expires_at.tzinfo)
        remaining = renewed_info.expires_at - now
        assert remaining > timedelta(minutes=18)
        assert remaining < timedelta(minutes=22)

    @pytest.mark.timeout(900)
    @pytest.mark.order(2)
    def test_02_java_code_execution(self):
        TestCodeInterpreterE2ESync._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2ESync.code_interpreter
        assert code_interpreter is not None

        with managed_ctx_sync(code_interpreter, SupportedLanguage.JAVA) as java_context:
            assert java_context.id is not None and str(java_context.id).strip()
            assert java_context.language == "java"

            stdout_messages: list[OutputMessage] = []
            stderr_messages: list[OutputMessage] = []
            results: list[ExecutionResult] = []
            errors: list[ExecutionError] = []
            completed_events: list[ExecutionComplete] = []
            init_events: list[ExecutionInit] = []

            def on_stdout(msg):
                stdout_messages.append(msg)

            def on_stderr(msg):
                stderr_messages.append(msg)

            def on_result(result):
                results.append(result)

            def on_complete(complete):
                completed_events.append(complete)

            def on_error(error):
                errors.append(error)

            def on_init(init):
                init_events.append(init)

            handlers = ExecutionHandlersSync(
                on_stdout=on_stdout,
                on_stderr=on_stderr,
                on_result=on_result,
                on_execution_complete=on_complete,
                on_error=on_error,
                on_init=on_init,
            )

            simple_result = code_interpreter.codes.run(
                "System.out.println(\"Hello from Java!\");\n"
                + "int result = 2 + 2;\n"
                + "System.out.println(\"2 + 2 = \" + result);\n"
                + "result",
                context=java_context,
                handlers=handlers,
                )
            assert simple_result is not None
            assert simple_result.id is not None and simple_result.id.strip()
            assert simple_result.error is None
            assert len(simple_result.result) > 0
            assert simple_result.result[0].text == "4"

            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=simple_result.id,
            )
            assert len(errors) == 0
            assert len(completed_events) == 1
            assert len(stdout_messages) > 0
            assert any("Hello from Java!" in m.text for m in stdout_messages)
            assert any("2+2=4" in m.text.replace(" ", "") for m in stdout_messages)
            assert all(m.is_error is False for m in stdout_messages)
            for m in stdout_messages[:3]:
                _assert_recent_timestamp_ms(m.timestamp)

            var_result = code_interpreter.codes.run(
                "import java.util.*;\n"
                + "List<Integer> numbers = Arrays.asList(1, 2, 3, 4, 5);\n"
                + "int sum = numbers.stream().mapToInt(Integer::intValue).sum();\n"
                + "System.out.println(\"Numbers: \" + numbers);\n"
                + "System.out.println(\"Sum: \" + sum);\n"
                + "result",
                context=java_context,
                )
            assert var_result is not None
            assert var_result.id is not None
            assert len(var_result.result) > 0
            assert var_result.result[0].text == "4"

            stdout_messages.clear()
            stderr_messages.clear()
            errors.clear()
            completed_events.clear()
            init_events.clear()

            error_result = code_interpreter.codes.run(
                "int x = 10 / 0; // This will cause ArithmeticException",
                context=java_context,
                handlers=handlers,
            )
            assert error_result is not None
            assert error_result.id is not None and error_result.id.strip()
            assert error_result.error is not None
            assert error_result.error.name == "EvalException"
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=error_result.id,
            )
            assert len(errors) > 0
            assert errors[0].name == "EvalException"

    @pytest.mark.timeout(900)
    @pytest.mark.order(3)
    def test_03_python_code_execution(self):
        TestCodeInterpreterE2ESync._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2ESync.code_interpreter
        assert code_interpreter is not None

        # New usage: directly pass a language string (ephemeral context).
        # This validates the `codes.run(..., language=...)` convenience interface.
        direct_lang_result = run_with_retry_sync(
            code_interpreter,
            "result = 2 + 2\nresult",
            language=SupportedLanguage.PYTHON,
        )
        assert direct_lang_result is not None
        assert direct_lang_result.id is not None and direct_lang_result.id.strip()
        assert direct_lang_result.error is None
        assert len(direct_lang_result.result) > 0
        assert direct_lang_result.result[0].text == "4"

        stdout_messages: list[OutputMessage] = []
        stderr_messages: list[OutputMessage] = []
        errors: list[ExecutionError] = []
        completed_events: list[ExecutionComplete] = []
        init_events: list[ExecutionInit] = []

        def on_stdout(msg):
            stdout_messages.append(msg)

        def on_stderr(msg):
            stderr_messages.append(msg)

        def on_complete(complete):
            completed_events.append(complete)

        def on_error(error):
            errors.append(error)

        def on_init(init):
            init_events.append(init)

        handlers_py = ExecutionHandlersSync(
            on_stdout=on_stdout,
            on_stderr=on_stderr,
            on_execution_complete=on_complete,
            on_error=on_error,
            on_init=on_init,
        )

        with managed_ctx_sync(code_interpreter, SupportedLanguage.PYTHON) as python_context:
            assert python_context.id is not None and str(python_context.id).strip()

            simple_result_py = run_with_retry_sync(
                code_interpreter,
                "print('Hello from Python!')\n"
                + "result = 2 + 2\n"
                + "print(f'2 + 2 = {result}')",
                context=python_context,
                handlers=handlers_py,
            )
            assert simple_result_py is not None
            assert simple_result_py.id is not None and simple_result_py.id.strip()
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=simple_result_py.id,
            )
            assert len(errors) == 0
            assert len(completed_events) == 1
            assert any("Hello from Python!" in m.text for m in stdout_messages)
            assert any("2 + 2 = 4" in m.text for m in stdout_messages)

            var_result_py = code_interpreter.codes.run(
                "x = 42\n"
                + "y = 'persistent variable'\n"
                + "my_list = [1, 2, 3, 4, 5]\n"
                + "print(f'x={x}, y=\"{y}\", list={my_list}')\n"
                + "result",
                context=python_context,
                )
            assert var_result_py is not None
            assert var_result_py.id is not None
            assert len(var_result_py.result) > 0
            assert var_result_py.result[0].text == "4"

            persist_result = code_interpreter.codes.run(
                "print(f'Previously set variables: x={x}, y={y}')\n"
                + "z = sum(my_list)\n"
                + "print(f'Sum of list: {z}')",
                context=python_context,
                )
            assert persist_result is not None
            assert persist_result.id is not None

            stdout_messages.clear()
            stderr_messages.clear()
            errors.clear()
            completed_events.clear()
            init_events.clear()

            error_result_py = code_interpreter.codes.run(
                "print(undefined_variable)  # This will cause NameError",
                context=python_context,
                handlers=handlers_py,
            )
            assert error_result_py is not None
            assert error_result_py.id is not None and error_result_py.id.strip()
            assert error_result_py.error is not None or len(error_result_py.logs.stderr) > 0
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=error_result_py.id,
            )
            assert len(errors) > 0

    @pytest.mark.timeout(900)
    @pytest.mark.order(4)
    def test_04_go_code_execution(self):
        TestCodeInterpreterE2ESync._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2ESync.code_interpreter
        assert code_interpreter is not None

        with managed_ctx_sync(code_interpreter, SupportedLanguage.GO) as go_context:
            assert go_context.id is not None and str(go_context.id).strip()
            assert go_context.language == "go"

            stdout_messages: list[OutputMessage] = []
            errors: list[ExecutionError] = []
            completed_events: list[ExecutionComplete] = []
            init_events: list[ExecutionInit] = []

            def on_stdout(msg):
                stdout_messages.append(msg)

            def on_complete(complete):
                completed_events.append(complete)

            def on_error(error):
                errors.append(error)

            def on_init(init):
                init_events.append(init)

            handlers_go = ExecutionHandlersSync(
                on_stdout=on_stdout,
                on_execution_complete=on_complete,
                on_error=on_error,
                on_init=on_init,
            )

            simple_result_go = code_interpreter.codes.run(
                "package main\n"
                + "import \"fmt\"\n"
                + "func main() {\n"
                + "    fmt.Print(\"Hello from Go!\")\n"
                + "    result := 2 + 2\n"
                + "    fmt.Print(\"2 + 2 =\", result)\n"
                + "}",
                context=go_context,
                handlers=handlers_go,
                )
            assert simple_result_go is not None
            assert simple_result_go.id is not None and simple_result_go.id.strip()
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=simple_result_go.id,
            )
            assert len(errors) == 0
            assert len(stdout_messages) > 0

            data_result_go = code_interpreter.codes.run(
                "package main\n"
                + "import \"fmt\"\n"
                + "func calculate(numbers []int) int {\n"
                + "    sum := 0\n"
                + "    for _, num := range numbers {\n"
                + "        sum += num\n"
                + "    }\n"
                + "    return sum\n"
                + "}\n"
                + "func main() {\n"
                + "    numbers := []int{1, 2, 3, 4, 5}\n"
                + "    sum := calculate(numbers)\n"
                + "    fmt.Print(\"Numbers:\", numbers)\n"
                + "    fmt.Print(\"Sum:\", sum)\n"
                + "}",
                context=go_context,
                )
            assert data_result_go is not None
            assert data_result_go.id is not None

            stdout_messages.clear()
            errors.clear()
            completed_events.clear()
            init_events.clear()

            error_result_go = code_interpreter.codes.run(
                "package main\n"
                + "func main() {\n"
                + "    undeclaredVariable++  // This will cause compilation error\n"
                + "}",
                context=go_context,
                handlers=handlers_go,
                )
            assert error_result_go is not None
            assert error_result_go.id is not None and error_result_go.id.strip()
            assert error_result_go.error is not None or len(error_result_go.logs.stderr) > 0
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=error_result_go.id,
            )

    @pytest.mark.timeout(900)
    @pytest.mark.order(5)
    def test_05_typescript_code_execution(self):
        TestCodeInterpreterE2ESync._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2ESync.code_interpreter
        assert code_interpreter is not None

        with managed_ctx_sync(code_interpreter, SupportedLanguage.TYPESCRIPT) as ts_context:
            assert ts_context.id is not None and str(ts_context.id).strip()
            assert ts_context.language == "typescript"

            stdout_messages: list[OutputMessage] = []
            errors: list[ExecutionError] = []
            completed_events: list[ExecutionComplete] = []
            init_events: list[ExecutionInit] = []

            def on_stdout(msg):
                stdout_messages.append(msg)

            def on_complete(complete):
                completed_events.append(complete)

            def on_error(error):
                errors.append(error)

            def on_init(init):
                init_events.append(init)

            handlers_ts = ExecutionHandlersSync(
                on_stdout=on_stdout,
                on_execution_complete=on_complete,
                on_error=on_error,
                on_init=on_init,
            )

            simple_result_ts = code_interpreter.codes.run(
                "console.log('Hello from TypeScript!');\n"
                + "const result: number = 2 + 2;\n"
                + "console.log(`2 + 2 = ${result}`);",
                context=ts_context,
                handlers=handlers_ts,
                )
            assert simple_result_ts is not None
            assert simple_result_ts.id is not None and simple_result_ts.id.strip()
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=simple_result_ts.id,
            )
            assert len(errors) == 0
            assert len(completed_events) == 1
            assert any("Hello from TypeScript!" in m.text for m in stdout_messages)

            types_result_ts = code_interpreter.codes.run(
                "interface Person {\n"
                + "  name: string;\n"
                + "  age: number;\n"
                + "}\n"
                + "const person: Person = { name: 'John', age: 30 };\n"
                + "const numbers: number[] = [1, 2, 3, 4, 5];\n"
                + "const sum: number = numbers.reduce((a, b) => a + b, 0);\n"
                + "console.log(`Person: ${person.name}, Age: ${person.age}`);\n"
                + "console.log(`Numbers: ${numbers}`);\n"
                + "console.log(`Sum: ${sum}`);",
                context=ts_context,
                )
            assert types_result_ts is not None
            assert types_result_ts.id is not None

            stdout_messages.clear()
            errors.clear()
            completed_events.clear()
            init_events.clear()

            # Use a deterministic runtime error (TypeScript compile/type-checking may be configured permissively).
            error_result_ts = code_interpreter.codes.run(
                "throw new Error('ts-runtime-error');",
                context=ts_context,
                handlers=handlers_ts,
            )
            assert error_result_ts is not None
            assert error_result_ts.id is not None and error_result_ts.id.strip()
            assert error_result_ts.error is not None or len(error_result_ts.logs.stderr) > 0
            _assert_terminal_event_contract(
                init_events=init_events,
                completed_events=completed_events,
                errors=errors,
                execution_id=error_result_ts.id,
            )

    @pytest.mark.timeout(900)
    @pytest.mark.order(6)
    def test_06_multi_language_support_and_context_isolation(self):
        TestCodeInterpreterE2ESync._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2ESync.code_interpreter
        assert code_interpreter is not None

        with managed_ctx_stack_sync(
            code_interpreter,
            [
                SupportedLanguage.PYTHON,
                SupportedLanguage.PYTHON,
                SupportedLanguage.JAVA,
                SupportedLanguage.GO,
            ],
        ) as (python1, python2, java1, go1):
            assert python1.id is not None and str(python1.id).strip()
            assert python2.id is not None and str(python2.id).strip()
            assert java1.id is not None and str(java1.id).strip()
            assert go1.id is not None and str(go1.id).strip()

            # Use retry helper for flaky kernel initialization
            result1 = run_with_retry_sync(
                code_interpreter,
                "secret_value1 = 'python1_secret'\nprint(f'Python1 secret: {secret_value1}')",
                context=python1,
            )
            result2 = run_with_retry_sync(
                code_interpreter,
                "secret_value2 = 'python2_secret'\nprint(f'Python2 secret: {secret_value2}')",
                context=python2,
            )
            assert result1 is not None and result1.id is not None
            assert result2 is not None and result2.id is not None

            # Small delay to avoid "session is busy" between runs
            time.sleep(1)

            check1 = run_with_retry_sync(
                code_interpreter,
                "print(f'Python1 still has: {secret_value1}')",
                context=python1,
            )
            time.sleep(0.5)
            check2 = run_with_retry_sync(
                code_interpreter,
                "print(f'Python2 has no: {secret_value1}')",
                context=python2,
            )
            assert check1 is not None
            assert check2 is not None
            # check2 should fail with NameError (context isolation):
            # secret_value1 is defined in python1 but not in python2.
            # If check2.error is None, the SDK may have swallowed a "session
            # is busy" error and returned an empty Execution; retry once more.
            if check2.error is None and not check2.result and not check2.logs.stdout:
                logger.warning(
                    "check2 returned empty Execution (possible session-busy); retrying..."
                )
                time.sleep(2)
                check2 = run_with_retry_sync(
                    code_interpreter,
                    "print(f'Python2 has no: {secret_value1}')",
                    context=python2,
                )
            assert check2.error is not None, (
                f"Expected NameError for context isolation but got: {check2}"
            )
            assert check2.error.name == "NameError"

            java_result = run_with_retry_sync(
                code_interpreter,
                "String javaSecret = \"java_secret\";\n"
                    + "System.out.println(\"Java secret: \" + javaSecret);",
                context=java1,
            )
            go_result = run_with_retry_sync(
                code_interpreter,
                "package main\n"
                    + "import \"fmt\"\n"
                    + "func main() {\n"
                    + "    goSecret := \"go_secret\"\n"
                    + "    fmt.Print(\"Go secret:\", goSecret)\n"
                    + "}",
                context=go1,
            )
            assert java_result is not None and java_result.id is not None
            assert go_result is not None and go_result.id is not None

    @pytest.mark.timeout(900)
    @pytest.mark.order(7)
    def test_07_concurrent_code_execution(self):
        TestCodeInterpreterE2ESync._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2ESync.code_interpreter
        assert code_interpreter is not None

        with managed_ctx_stack_sync(
            code_interpreter,
            [
                SupportedLanguage.PYTHON,
                SupportedLanguage.JAVA,
                SupportedLanguage.GO,
            ],
        ) as (python_c1, java_c1, go_c1):
            from concurrent.futures import ThreadPoolExecutor
            from concurrent.futures import TimeoutError as FutureTimeout

            labels = ["Python", "Java", "Go"]

            def run_python1():
                return code_interpreter.codes.run(
                    "import time\n"
                    + "for i in range(3):\n"
                    + "    print(f'Python1 iteration {i}')\n"
                    + "    time.sleep(0.1)\n"
                    + "print('Python1 completed')",
                    context=python_c1,
                    )

            def run_java_concurrent():
                return code_interpreter.codes.run(
                    "for (int i = 0; i < 3; i++) {\n"
                    + "    System.out.println(\"Java iteration \" + i);\n"
                    + "    try { Thread.sleep(100); } catch (Exception e) {}\n"
                    + "}\n"
                    + "System.out.println(\"Java completed\");",
                    context=java_c1,
                    )

            def run_go_concurrent():
                return code_interpreter.codes.run(
                    "package main\n"
                    + "import \"fmt\"\n"
                    + "func main() {\n"
                    + "    for i := 0; i < 3; i++ {\n"
                    + "        fmt.Print(\"Go iteration\", i)\n"
                    + "    }\n"
                    + "    fmt.Print(\"Go completed\")\n"
                    + "}",
                    context=go_c1,
                    )

            with ThreadPoolExecutor(max_workers=4) as ex:
                futures = [
                    ex.submit(run_python1),
                    ex.submit(run_java_concurrent),
                    ex.submit(run_go_concurrent),
                ]

                succeeded = 0
                for i, future in enumerate(futures):
                    label = labels[i]
                    try:
                        result = future.result(timeout=120)
                        if result is not None and result.id is not None:
                            succeeded += 1
                            logger.info("Concurrent %s: OK (id=%s)", label, result.id)
                        else:
                            logger.warning(
                                "Concurrent %s: returned empty result: %s", label, result
                            )
                    except FutureTimeout:
                        logger.warning("Concurrent %s: timed out", label)
                    except Exception as e:
                        logger.warning("Concurrent %s: failed: %s", label, e)

            # In resource-constrained CI, "session is busy" may cause some
            # concurrent executions to return empty results.  Require at
            # least 2 of 3 to succeed.
            assert succeeded >= 2, (
                f"Only {succeeded}/3 concurrent executions succeeded; "
                f"expected at least 2"
            )

    @pytest.mark.timeout(900)
    @pytest.mark.order(8)
    def test_08_code_execution_interrupt(self):
        TestCodeInterpreterE2ESync._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2ESync.code_interpreter
        assert code_interpreter is not None

        with managed_ctx_sync(code_interpreter, SupportedLanguage.PYTHON) as python_int_context:
            assert python_int_context is not None and python_int_context.id is not None and str(python_int_context.id).strip()

            init_events_int: list[ExecutionInit] = []
            completed_events: list[ExecutionComplete] = []
            errors: list[ExecutionError] = []

            def on_init(init: ExecutionInit):
                init_events_int.append(init)

            def on_complete(complete: ExecutionComplete):
                completed_events.append(complete)

            def on_error(error: ExecutionError):
                errors.append(error)

            handlers_int = ExecutionHandlersSync(
                on_init=on_init,
                on_execution_complete=on_complete,
                on_error=on_error,
            )

            with ThreadPoolExecutor(max_workers=1) as ex:
                start = time.time()
                future = ex.submit(
                    code_interpreter.codes.run,
                    "import time\n"
                    + "print('Starting long-running Python execution')\n"
                    + "for i in range(50):\n"
                    + "    print(f'Python iteration {i}')\n"
                    + "    time.sleep(0.2)\n",
                    context=python_int_context,
                    handlers=handlers_int,
                    )

                deadline = time.time() + 15
                while len(init_events_int) == 0 and time.time() < deadline:
                    time.sleep(0.1)

                assert len(init_events_int) == 1, "Execution should have been initialized exactly once"
                execution_id = init_events_int[-1].id
                assert execution_id is not None and execution_id.strip()
                _assert_recent_timestamp_ms(init_events_int[-1].timestamp)

                code_interpreter.codes.interrupt(execution_id)

                result_int = future.result()
                assert result_int is not None
                assert result_int.id is not None
                assert result_int.id == execution_id
                assert (len(completed_events) > 0) or (len(errors) > 0)
                elapsed = time.time() - start
                assert elapsed < 30

            # Small delay after interrupt to let the kernel recover
            time.sleep(1)
            quick_result = run_with_retry_sync(
                code_interpreter,
                "print('Quick Python execution')\n"
                + "result = 2 + 2\n"
                + "print(f'Result: {result}')",
                context=python_int_context,
                handlers=handlers_int,
            )
            assert quick_result is not None
            assert quick_result.id is not None

            try:
                code_interpreter.codes.interrupt(quick_result.id)
            except Exception:
                pass

    @pytest.mark.timeout(600)
    @pytest.mark.order(9)
    def test_09_context_management_endpoints(self):
        """Validate list/get/delete context APIs map to execd /code/contexts endpoints (sync)."""
        TestCodeInterpreterE2ESync._ensure_code_interpreter_created()
        code_interpreter = TestCodeInterpreterE2ESync.code_interpreter
        assert code_interpreter is not None

        language = SupportedLanguage.PYTHON
        logger.info("=" * 80)
        logger.info("TEST 9: Context management endpoints (%s)", language)
        logger.info("=" * 80)

        # Ensure clean slate for bash contexts to avoid interference with other tests.
        code_interpreter.codes.delete_contexts(language)

        ctx1 = code_interpreter.codes.create_context(language)
        ctx2 = code_interpreter.codes.create_context(language)
        assert ctx1.id is not None and str(ctx1.id).strip()
        assert ctx2.id is not None and str(ctx2.id).strip()
        assert ctx1.language == language
        assert ctx2.language == language
        logger.info("✓ Created two bash contexts: %s, %s", ctx1.id, ctx2.id)

        listed = code_interpreter.codes.list_contexts(language)
        bash_context_ids = {c.id for c in listed if c.id}
        assert ctx1.id in bash_context_ids
        assert ctx2.id in bash_context_ids
        assert all(c.language == language for c in listed)
        logger.info("✓ list_contexts returned expected bash contexts")

        fetched = code_interpreter.codes.get_context(ctx1.id)
        assert fetched.id == ctx1.id
        assert fetched.language == language
        logger.info("✓ get_context returned expected context %s", fetched.id)

        code_interpreter.codes.delete_context(ctx1.id)
        remaining = code_interpreter.codes.list_contexts(language)
        remaining_ids = {c.id for c in remaining if c.id}
        assert ctx1.id not in remaining_ids
        assert ctx2.id in remaining_ids
        logger.info("✓ delete_context removed %s", ctx1.id)

        code_interpreter.codes.delete_contexts(language)
        final_contexts = [
            c for c in code_interpreter.codes.list_contexts(language) if c.id
        ]
        assert len(final_contexts) == 0
        logger.info("✓ delete_contexts removed all bash contexts")


================================================
FILE: tests/python/tests/test_sandbox_e2e.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Comprehensive E2E tests for Sandbox functionality.
"""

import asyncio
import logging
import time
from datetime import timedelta
from io import BytesIO

import pytest
from opensandbox import Sandbox
from opensandbox.constants import DEFAULT_EGRESS_PORT
from opensandbox.config import ConnectionConfig
from opensandbox.exceptions import SandboxApiException
from opensandbox.models.execd import (
    ExecutionComplete,
    ExecutionError,
    ExecutionHandlers,
    ExecutionInit,
    ExecutionResult,
    OutputMessage,
    RunCommandOpts,
)
from opensandbox.models.filesystem import (
    ContentReplaceEntry,
    MoveEntry,
    SearchEntry,
    SetPermissionEntry,
    WriteEntry,
)
from opensandbox.models.sandboxes import (
    PVC,
    Host,
    NetworkPolicy,
    NetworkRule,
    SandboxImageSpec,
    Volume,
)

from tests.base_e2e_test import (
    TEST_API_KEY,
    TEST_DOMAIN,
    TEST_PROTOCOL,
    create_connection_config,
    create_connection_config_server_proxy,
    get_sandbox_image,
)

logger = logging.getLogger(__name__)


def _now_ms() -> int:
    return int(time.time() * 1000)


def _assert_recent_timestamp_ms(ts: int, *, tolerance_ms: int = 60_000) -> None:
    assert isinstance(ts, int)
    assert ts > 0
    delta = abs(_now_ms() - ts)
    assert delta <= tolerance_ms, f"timestamp too far from now: delta={delta}ms (ts={ts})"


def _assert_endpoint_has_port(endpoint: str, expected_port: int) -> None:
    assert endpoint
    # In some deployments lifecycle returns direct "host:port".
    # In others it returns a reverse-proxy route like "domain/route/{id}/{port}".
    # In both cases, we expect NO scheme, and the port to be present deterministically.
    assert "://" not in endpoint, f"unexpected scheme in endpoint: {endpoint}"

    if "/" in endpoint:
        assert endpoint.endswith(f"/{expected_port}"), (
            f"endpoint route must end with /{expected_port}: {endpoint}"
        )
        # Keep this strict: the route must contain a non-empty domain prefix.
        assert endpoint.split("/", 1)[0], f"missing domain in endpoint: {endpoint}"
        return

    host, port = endpoint.rsplit(":", 1)
    assert host, f"missing host in endpoint: {endpoint}"
    assert port.isdigit(), f"non-numeric port in endpoint: {endpoint}"
    assert int(port) == expected_port, f"endpoint port mismatch: {endpoint} != :{expected_port}"


def _assert_times_close(created_at, modified_at, *, tolerance_seconds: float = 2.0) -> None:
    """
    Some filesystems / implementations may report created/modified with slight reordering.
    We only assert they're close, and rely on explicit update operations to validate mtime.
    """
    delta = abs((modified_at - created_at).total_seconds())
    assert delta <= tolerance_seconds, f"created/modified skew too large: {delta}s"


def _assert_modified_updated(before, after, *, min_delta_ms: int = 0, allow_skew_ms: int = 1000) -> None:
    """
    Validate modified_at moved forward after a mutating operation, allowing small clock jitter.
    """
    delta_ms = int((after - before).total_seconds() * 1000)
    assert delta_ms >= min_delta_ms - allow_skew_ms, (
        f"modified_at did not update as expected: delta_ms={delta_ms} "
        f"(min_delta_ms={min_delta_ms}, allow_skew_ms={allow_skew_ms})"
    )


@pytest.mark.asyncio
class TestSandboxE2E:
    """Comprehensive E2E tests for Sandbox functionality."""

    sandbox = None
    connection_config = None
    _setup_done = False

    @pytest.fixture(scope="class", autouse=True)
    async def _sandbox_lifecycle(self, request):
        """Create sandbox once and ALWAYS cleanup to avoid resource leaks."""
        await request.cls._ensure_sandbox_created()
        try:
            yield
        finally:
            sandbox = request.cls.sandbox
            if sandbox is not None:
                try:
                    await sandbox.kill()
                except Exception as e:
                    logger.warning("Teardown: sandbox.kill() failed: %s", e, exc_info=True)
                try:
                    await sandbox.close()
                except Exception as e:
                    logger.warning("Teardown: sandbox.close() failed: %s", e, exc_info=True)

    @classmethod
    async def _ensure_sandbox_created(cls):
        """Ensure sandbox is created before running tests."""
        if cls._setup_done:
            return

        logger.info("=" * 100)
        logger.info("SETUP: Creating sandbox")
        logger.info("=" * 100)

        cls.connection_config = create_connection_config()

        cls.sandbox = await Sandbox.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cls.connection_config,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            metadata={"tag": "e2e-test"},
            env={
                "E2E_TEST": "true",
                "GO_VERSION": "1.25",
                "JAVA_VERSION": "21",
                "NODE_VERSION": "22",
                "PYTHON_VERSION": "3.12"
            },
            health_check_polling_interval=timedelta(milliseconds=500),
        )

        logger.info(f"✓ Sandbox created: {cls.sandbox.id}")
        logger.info("=" * 100)

        cls._setup_done = True

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    async def test_01_sandbox_lifecycle_and_health(self):
        """Test sandbox lifecycle and health monitoring."""
        await self._ensure_sandbox_created()
        sandbox = TestSandboxE2E.sandbox

        logger.info("=" * 80)
        logger.info("TEST 1: Testing sandbox lifecycle and health monitoring")
        logger.info("=" * 80)

        logger.info("Step 1: Verify basic sandbox properties")
        assert sandbox is not None
        assert isinstance(sandbox.id, str)
        assert await sandbox.is_healthy() is True
        logger.info(f"✓ Sandbox ID: {sandbox.id}")
        logger.info("✓ Sandbox is healthy")

        logger.info("Step 2: Get sandbox information")
        info = await sandbox.get_info()
        assert info.id == sandbox.id
        assert info.status.state == "Running"
        assert info.created_at is not None
        assert info.expires_at is not None
        assert info.expires_at > info.created_at
        assert info.entrypoint == ["tail", "-f", "/dev/null"]

        duration = info.expires_at - info.created_at
        min_duration = timedelta(minutes=1)
        max_duration = timedelta(minutes=3)
        assert min_duration <= duration <= max_duration, \
            f"Duration {duration} should be between 1 and 3 minutes"

        assert info.metadata is not None
        assert info.metadata.get("tag") == "e2e-test"
        logger.info(
            "✓ Sandbox info: state=%s, created=%s, expires=%s",
            info.status.state,
            info.created_at,
            info.expires_at,
        )

        logger.info("Step 3: Get sandbox endpoint for default execd port")
        endpoint = await sandbox.get_endpoint(44772)
        assert endpoint is not None
        assert endpoint.endpoint is not None
        _assert_endpoint_has_port(endpoint.endpoint, 44772)
        logger.info(f"✓ Sandbox endpoint: {endpoint.endpoint}")

        logger.info("Step 4: Get and verify metrics")
        metrics = await sandbox.get_metrics()
        assert metrics is not None
        assert metrics.cpu_count > 0
        assert 0.0 <= metrics.cpu_used_percentage <= 100.0
        assert metrics.memory_total_in_mib > 0
        assert 0.0 <= metrics.memory_used_in_mib <= metrics.memory_total_in_mib
        _assert_recent_timestamp_ms(metrics.timestamp, tolerance_ms=120_000)
        logger.info(
            "✓ CPU: %s cores, %.2f%% used",
            metrics.cpu_count,
            metrics.cpu_used_percentage,
        )
        logger.info(
            "✓ Memory: %s/%s MiB",
            int(metrics.memory_used_in_mib),
            int(metrics.memory_total_in_mib),
        )

        logger.info("Step 5: Test sandbox renewal (extend expiration time)")
        renew_response = await sandbox.renew(timedelta(minutes=20))
        assert renew_response is not None
        assert renew_response.expires_at > info.expires_at
        logger.info("✓ Sandbox expiration renewed to %s", renew_response.expires_at)

        renewed_info = await sandbox.get_info()
        assert renewed_info.expires_at > info.expires_at
        assert renewed_info.id == sandbox.id
        assert renewed_info.status.state == "Running"

        # The renew API should return the new expiration time. Allow small backend-side skew.
        assert abs((renewed_info.expires_at - renew_response.expires_at).total_seconds()) < 10

        # Renewal is "now + timeout" (SDK behavior). Validate remaining TTL is close to 5 minutes.
        now = renewed_info.expires_at.__class__.now(tz=renewed_info.expires_at.tzinfo)
        remaining = renewed_info.expires_at - now
        assert remaining > timedelta(minutes=18), f"Remaining TTL too small: {remaining}"
        assert remaining < timedelta(minutes=22), f"Remaining TTL too large: {remaining}"

        logger.info(
            "✓ Sandbox expiration updated from %s to %s",
            info.expires_at,
            renewed_info.expires_at,
        )

        logger.info("Step 6: Test access to service components")

        assert sandbox.files is not None
        assert sandbox.commands is not None
        assert sandbox.metrics is not None
        assert sandbox.connection_config is not None
        logger.info("✓ All sandbox service components are accessible")

        logger.info("Step 7: Connect to existing sandbox by ID")
        sandbox2 = await Sandbox.connect(
            sandbox_id=sandbox.id,
            connection_config=TestSandboxE2E.connection_config,
        )
        try:
            assert sandbox2.id == sandbox.id
            assert await sandbox2.is_healthy() is True
            connect_result = await sandbox2.commands.run("echo connect-ok")
            assert connect_result.error is None
            assert len(connect_result.logs.stdout) == 1
            assert connect_result.logs.stdout[0].text == "connect-ok"
        finally:
            await sandbox2.close()

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    async def test_01b_manual_cleanup(self):
        sandbox = await Sandbox.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=TestSandboxE2E.connection_config,
            timeout=None,
            ready_timeout=timedelta(seconds=30),
            metadata={"tag": "manual-e2e-test"},
        )
        try:
            info = await sandbox.get_info()
            assert info.expires_at is None
            assert info.metadata is not None
            assert info.metadata.get("tag") == "manual-e2e-test"
        finally:
            await sandbox.kill()
            await sandbox.close()

        logger.info("TEST 1 PASSED: Sandbox lifecycle and health test completed successfully")


    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    async def test_01a_network_policy_create(self):
        logger.info("=" * 80)
        logger.info("TEST 1a: Creating sandbox with networkPolicy (async)")
        logger.info("=" * 80)

        cfg = create_connection_config()
        sandbox = await Sandbox.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            network_policy=NetworkPolicy(
                defaultAction="deny",
                egress=[NetworkRule(action="allow", target="pypi.org")],
            ),
        )
        try:
            await asyncio.sleep(5)
            result = await sandbox.commands.run("curl -I https://www.github.com")
            assert result.error is not None
            result = await sandbox.commands.run("curl -I https://pypi.org")
            assert result.error is None
        finally:
            try:
                await sandbox.kill()
            except Exception:
                pass
            await sandbox.close()

    @pytest.mark.timeout(180)
    @pytest.mark.order(1)
    async def test_01aa_network_policy_get_and_patch(self):
        logger.info("=" * 80)
        logger.info("TEST 1aa: networkPolicy get/patch (async)")
        logger.info("=" * 80)

        cfg = create_connection_config()
        sandbox = await Sandbox.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            network_policy=NetworkPolicy(
                defaultAction="deny",
                egress=[NetworkRule(action="allow", target="pypi.org")],
            ),
        )
        try:
            await asyncio.sleep(5)

            # Verify get egress policy right after create.
            policy = await sandbox.get_egress_policy()
            assert policy.default_action == "deny"
            assert policy.egress is not None
            assert any(rule.target == "pypi.org" and rule.action == "allow" for rule in policy.egress)

            # Baseline behavior: github blocked, pypi allowed.
            blocked = await sandbox.commands.run("curl -I https://www.github.com")
            assert blocked.error is not None
            allowed = await sandbox.commands.run("curl -I https://pypi.org")
            assert allowed.error is None

            # Patch policy: allow github, deny pypi.
            await sandbox.patch_egress_rules(
                [
                    NetworkRule(action="allow", target="www.github.com"),
                    NetworkRule(action="deny", target="pypi.org"),
                ],
            )
            await asyncio.sleep(2)

            patched_policy = await sandbox.get_egress_policy()
            assert patched_policy.egress is not None
            assert any(
                rule.target == "www.github.com" and rule.action == "allow"
                for rule in patched_policy.egress
            )
            assert any(
                rule.target == "pypi.org" and rule.action == "deny"
                for rule in patched_policy.egress
            )

            # Behavior after patch should be flipped.
            github_allowed = await sandbox.commands.run("curl -I https://www.github.com")
            assert github_allowed.error is None
            pypi_denied = await sandbox.commands.run("curl -I https://pypi.org")
            assert pypi_denied.error is not None
        finally:
            try:
                await sandbox.kill()
            except Exception:
                pass
            await sandbox.close()

    @pytest.mark.timeout(180)
    @pytest.mark.order(1)
    async def test_01ab_network_policy_get_and_patch_with_server_proxy(self):
        logger.info("=" * 80)
        logger.info("TEST 1ab: networkPolicy get/patch with server proxy (async)")
        logger.info("=" * 80)

        cfg = create_connection_config_server_proxy()
        sandbox = await Sandbox.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            network_policy=NetworkPolicy(
                defaultAction="deny",
                egress=[NetworkRule(action="allow", target="pypi.org")],
            ),
        )
        try:
            await asyncio.sleep(5)

            egress_endpoint = await sandbox.get_endpoint(DEFAULT_EGRESS_PORT)
            assert f"/sandboxes/{sandbox.id}/proxy/{DEFAULT_EGRESS_PORT}" in egress_endpoint.endpoint

            policy = await sandbox.get_egress_policy()
            assert policy.default_action == "deny"
            assert policy.egress is not None
            assert any(rule.target == "pypi.org" and rule.action == "allow" for rule in policy.egress)

            blocked = await sandbox.commands.run("curl -I https://www.github.com")
            assert blocked.error is not None
            allowed = await sandbox.commands.run("curl -I https://pypi.org")
            assert allowed.error is None

            await sandbox.patch_egress_rules(
                [
                    NetworkRule(action="allow", target="www.github.com"),
                    NetworkRule(action="deny", target="pypi.org"),
                ],
            )
            await asyncio.sleep(2)

            patched_policy = await sandbox.get_egress_policy()
            assert patched_policy.egress is not None
            assert any(
                rule.target == "www.github.com" and rule.action == "allow"
                for rule in patched_policy.egress
            )
            assert any(
                rule.target == "pypi.org" and rule.action == "deny"
                for rule in patched_policy.egress
            )
        finally:
            try:
                await sandbox.kill()
            except Exception:
                pass
            await sandbox.close()

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    async def test_01b_host_volume_mount(self):
        """Test creating a sandbox with a host volume mount."""
        logger.info("=" * 80)
        logger.info("TEST 1b: Creating sandbox with host volume mount (async)")
        logger.info("=" * 80)

        host_dir = "/tmp/opensandbox-e2e/host-volume-test"
        container_mount_path = "/mnt/host-data"

        cfg = create_connection_config()
        sandbox = await Sandbox.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            volumes=[
                Volume(
                    name="test-host-vol",
                    host=Host(path=host_dir),
                    mountPath=container_mount_path,
                    readOnly=False,
                ),
            ],
        )
        try:
            logger.info(f"✓ Sandbox with volume created: {sandbox.id}")

            # Step 1: Verify the host marker file is visible inside the sandbox
            logger.info("Step 1: Verify host marker file is readable inside the sandbox")
            result = await sandbox.commands.run(f"cat {container_mount_path}/marker.txt")
            assert result.error is None, f"Failed to read marker file: {result.error}"
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "opensandbox-e2e-marker"
            logger.info("✓ Host marker file read successfully inside sandbox")

            # Step 2: Write a file from inside the sandbox to the mounted path (read-write)
            logger.info("Step 2: Write a file from inside the sandbox to the mount path")
            result = await sandbox.commands.run(
                f"echo 'written-from-sandbox' > {container_mount_path}/sandbox-output.txt"
            )
            assert result.error is None, f"Failed to write file: {result.error}"

            # Step 3: Verify the written file is readable
            result = await sandbox.commands.run(f"cat {container_mount_path}/sandbox-output.txt")
            assert result.error is None
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "written-from-sandbox"
            logger.info("✓ File written and verified inside sandbox")

            # Step 4: Verify the mount path is a proper directory
            logger.info("Step 3: Verify mount path is a directory")
            result = await sandbox.commands.run(f"test -d {container_mount_path} && echo OK")
            assert result.error is None
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "OK"
            logger.info("✓ Mount path is a valid directory")

        finally:
            try:
                await sandbox.kill()
            except Exception:
                pass
            await sandbox.close()

        logger.info("TEST 1b PASSED: Host volume mount test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    async def test_01c_host_volume_mount_readonly(self):
        """Test creating a sandbox with a read-only host volume mount."""
        logger.info("=" * 80)
        logger.info("TEST 1c: Creating sandbox with read-only host volume mount (async)")
        logger.info("=" * 80)

        host_dir = "/tmp/opensandbox-e2e/host-volume-test"
        container_mount_path = "/mnt/host-data-ro"

        cfg = create_connection_config()
        sandbox = await Sandbox.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            volumes=[
                Volume(
                    name="test-host-vol-ro",
                    host=Host(path=host_dir),
                    mountPath=container_mount_path,
                    readOnly=True,
                ),
            ],
        )
        try:
            logger.info(f"✓ Sandbox with read-only volume created: {sandbox.id}")

            # Step 1: Verify the host marker file is readable
            result = await sandbox.commands.run(f"cat {container_mount_path}/marker.txt")
            assert result.error is None, f"Failed to read marker file: {result.error}"
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "opensandbox-e2e-marker"
            logger.info("✓ Host marker file read successfully in read-only mount")

            # Step 2: Verify writing is denied on read-only mount
            result = await sandbox.commands.run(
                f"touch {container_mount_path}/should-fail.txt"
            )
            assert result.error is not None, "Write should fail on read-only mount"
            logger.info("✓ Write correctly denied on read-only mount")

        finally:
            try:
                await sandbox.kill()
            except Exception:
                pass
            await sandbox.close()

        logger.info("TEST 1c PASSED: Read-only host volume mount test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    async def test_01d_pvc_named_volume_mount(self):
        """Test creating a sandbox with a PVC (Docker named volume) mount."""
        logger.info("=" * 80)
        logger.info("TEST 1d: Creating sandbox with PVC named volume mount (async)")
        logger.info("=" * 80)

        pvc_volume_name = "opensandbox-e2e-pvc-test"
        container_mount_path = "/mnt/pvc-data"

        cfg = create_connection_config()
        sandbox = await Sandbox.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            volumes=[
                Volume(
                    name="test-pvc-vol",
                    pvc=PVC(claimName=pvc_volume_name),
                    mountPath=container_mount_path,
                    readOnly=False,
                ),
            ],
        )
        try:
            logger.info(f"✓ Sandbox with PVC volume created: {sandbox.id}")

            # Step 1: Verify the marker file seeded into the named volume is readable
            logger.info("Step 1: Verify PVC marker file is readable inside the sandbox")
            result = await sandbox.commands.run(f"cat {container_mount_path}/marker.txt")
            assert result.error is None, f"Failed to read marker file: {result.error}"
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "pvc-marker-data"
            logger.info("✓ PVC marker file read successfully inside sandbox")

            # Step 2: Write a file from inside the sandbox to the named volume
            logger.info("Step 2: Write a file from inside the sandbox to the PVC mount")
            result = await sandbox.commands.run(
                f"echo 'written-to-pvc' > {container_mount_path}/pvc-output.txt"
            )
            assert result.error is None, f"Failed to write file: {result.error}"

            # Step 3: Verify the written file is readable
            result = await sandbox.commands.run(f"cat {container_mount_path}/pvc-output.txt")
            assert result.error is None
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "written-to-pvc"
            logger.info("✓ File written and verified inside sandbox via PVC mount")

            # Step 4: Verify the mount path is a proper directory
            result = await sandbox.commands.run(f"test -d {container_mount_path} && echo OK")
            assert result.error is None
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "OK"
            logger.info("✓ PVC mount path is a valid directory")

        finally:
            try:
                await sandbox.kill()
            except Exception:
                pass
            await sandbox.close()

        logger.info("TEST 1d PASSED: PVC named volume mount test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    async def test_01e_pvc_named_volume_mount_readonly(self):
        """Test creating a sandbox with a read-only PVC (Docker named volume) mount."""
        logger.info("=" * 80)
        logger.info("TEST 1e: Creating sandbox with read-only PVC named volume mount (async)")
        logger.info("=" * 80)

        pvc_volume_name = "opensandbox-e2e-pvc-test"
        container_mount_path = "/mnt/pvc-data-ro"

        cfg = create_connection_config()
        sandbox = await Sandbox.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            volumes=[
                Volume(
                    name="test-pvc-vol-ro",
                    pvc=PVC(claimName=pvc_volume_name),
                    mountPath=container_mount_path,
                    readOnly=True,
                ),
            ],
        )
        try:
            logger.info(f"✓ Sandbox with read-only PVC volume created: {sandbox.id}")

            # Step 1: Verify the marker file is readable on read-only mount
            result = await sandbox.commands.run(f"cat {container_mount_path}/marker.txt")
            assert result.error is None, f"Failed to read marker file: {result.error}"
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "pvc-marker-data"
            logger.info("✓ PVC marker file read successfully in read-only mount")

            # Step 2: Verify writing is denied on read-only mount
            result = await sandbox.commands.run(
                f"touch {container_mount_path}/should-fail.txt"
            )
            assert result.error is not None, "Write should fail on read-only PVC mount"
            logger.info("✓ Write correctly denied on read-only PVC mount")

        finally:
            try:
                await sandbox.kill()
            except Exception:
                pass
            await sandbox.close()

        logger.info("TEST 1e PASSED: Read-only PVC named volume mount test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    async def test_01f_pvc_named_volume_subpath_mount(self):
        """Test creating a sandbox with a PVC named volume mount using subPath."""
        logger.info("=" * 80)
        logger.info("TEST 1f: Creating sandbox with PVC named volume subPath mount (async)")
        logger.info("=" * 80)

        pvc_volume_name = "opensandbox-e2e-pvc-test"
        container_mount_path = "/mnt/train"

        cfg = create_connection_config()
        sandbox = await Sandbox.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            volumes=[
                Volume(
                    name="test-pvc-subpath",
                    pvc=PVC(claimName=pvc_volume_name),
                    mountPath=container_mount_path,
                    readOnly=False,
                    subPath="datasets/train",
                ),
            ],
        )
        try:
            logger.info(f"✓ Sandbox with PVC subPath volume created: {sandbox.id}")

            # Step 1: Verify the subpath marker file is readable
            logger.info("Step 1: Verify subPath marker file is readable")
            result = await sandbox.commands.run(f"cat {container_mount_path}/marker.txt")
            assert result.error is None, f"Failed to read subpath marker file: {result.error}"
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "pvc-subpath-marker"
            logger.info("✓ SubPath marker file read successfully")

            # Step 2: Verify we only see the subpath contents (not the full volume)
            logger.info("Step 2: Verify only subPath contents are visible")
            result = await sandbox.commands.run(f"ls {container_mount_path}/")
            assert result.error is None
            # Should contain marker.txt but NOT 'datasets' directory (we are inside it)
            stdout_text = "\n".join(msg.text for msg in result.logs.stdout)
            assert "marker.txt" in stdout_text
            assert "datasets" not in stdout_text
            logger.info("✓ Only subPath contents are visible inside the sandbox")

            # Step 3: Write a file and verify (retry read-back for transient SSE drops)
            logger.info("Step 3: Write and verify a file inside subPath mount")
            result = await sandbox.commands.run(
                f"echo 'subpath-write-test' > {container_mount_path}/output.txt"
            )
            assert result.error is None
            for _attempt in range(3):
                result = await sandbox.commands.run(f"cat {container_mount_path}/output.txt")
                if result.logs.stdout:
                    break
                await asyncio.sleep(1)
            assert result.error is None
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "subpath-write-test"
            logger.info("✓ File written and verified inside subPath mount")

        finally:
            try:
                await sandbox.kill()
            except Exception:
                pass
            await sandbox.close()

        logger.info("TEST 1f PASSED: PVC subPath named volume mount test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(2)
    async def test_02_basic_command_execution(self):
        """Test basic command execution."""
        await self._ensure_sandbox_created()
        sandbox = TestSandboxE2E.sandbox

        logger.info("=" * 80)
        logger.info("TEST 2: Testing basic command execution")
        logger.info("=" * 80)

        logger.info("Step 1: Simple echo command with handlers to capture events")
        stdout_messages = []
        stderr_messages = []
        results = []
        completed_events = []
        errors = []
        init_events = []

        async def on_stdout(msg: OutputMessage):
            stdout_messages.append(msg)
            logger.info(f"Stdout: {msg.text}")

        async def on_stderr(msg: OutputMessage):
            stderr_messages.append(msg)
            logger.warning(f"Stderr: {msg.text}")

        async def on_result(result: ExecutionResult):
            results.append(result)
            logger.info(f"Result: {result.text}")

        async def on_execution_complete(complete: ExecutionComplete):
            completed_events.append(complete)
            logger.info(f"Execution completed in {complete.execution_time_in_millis} ms")

        async def on_error(error: ExecutionError):
            errors.append(error)
            logger.error(f"Error: {error.name} - {error.value}")

        async def on_init(init: ExecutionInit):
            init_events.append(init)
            logger.info(f"Execution initialized with ID: {init.id}")

        handlers = ExecutionHandlers(
            on_stdout=on_stdout,
            on_stderr=on_stderr,
            on_result=on_result,
            on_execution_complete=on_execution_complete,
            on_error=on_error,
            on_init=on_init
        )

        echo_result = await sandbox.commands.run(
            "echo 'Hello OpenSandbox E2E'",
            handlers=handlers,
        )

        # Verify result
        assert echo_result is not None
        assert echo_result.id is not None and echo_result.id.strip()
        assert echo_result.error is None
        assert len(echo_result.logs.stdout) == 1
        assert echo_result.logs.stdout[0].text == "Hello OpenSandbox E2E"
        assert echo_result.logs.stdout[0].is_error is False
        _assert_recent_timestamp_ms(echo_result.logs.stdout[0].timestamp)
        assert len(echo_result.logs.stderr) == 0

        # Verify handlers captured events
        assert len(init_events) == 1, "Execution should have exactly one init event"
        assert len(completed_events) == 1, "Execution should have exactly one completion event"
        assert init_events[0].id == echo_result.id
        _assert_recent_timestamp_ms(init_events[0].timestamp)
        _assert_recent_timestamp_ms(completed_events[0].timestamp)
        assert completed_events[0].execution_time_in_millis >= 0

        assert len(stdout_messages) == 1, "Should have captured exactly one stdout message"
        assert stdout_messages[0].text == "Hello OpenSandbox E2E"
        assert stdout_messages[0].is_error is False
        _assert_recent_timestamp_ms(stdout_messages[0].timestamp)

        assert len(errors) == 0, "Should have no errors for successful command"

        logger.info(
            "✓ Captured %s stdout, %s stderr, %s results, %s errors, %s completions, %s inits",
            len(stdout_messages),
            len(stderr_messages),
            len(results),
            len(errors),
            len(completed_events),
            len(init_events),
        )

        logger.info("Step 2: Command with working directory")
        pwd_result = await sandbox.commands.run(
            "pwd",
            opts=RunCommandOpts(working_directory="/tmp"),
        )
        assert pwd_result is not None
        assert pwd_result.id is not None and pwd_result.id.strip()
        assert pwd_result.error is None
        assert len(pwd_result.logs.stdout) == 1
        assert pwd_result.logs.stdout[0].text == "/tmp"
        assert pwd_result.logs.stdout[0].is_error is False
        _assert_recent_timestamp_ms(pwd_result.logs.stdout[0].timestamp)
        logger.info(f"✓ PWD command executed: {pwd_result}")

        logger.info("Step 3: Background command")
        start_time = time.time()
        await sandbox.commands.run(
            "sleep 30",
            opts=RunCommandOpts(background=True),
        )
        end_time = time.time()

        execution_time = (end_time - start_time) * 1000
        assert execution_time < 10000, \
            f"Background command should return quickly, but took {execution_time} ms"
        logger.info(f"✓ Background command returned in {execution_time:.2f} ms")

        logger.info("Step 4: Test failing command")
        # Clear event lists for fail test
        stdout_messages.clear()
        stderr_messages.clear()
        errors.clear()
        completed_events.clear()
        init_events.clear()

        fail_result = await sandbox.commands.run(
            "nonexistent-command-that-does-not-exist",
            handlers=handlers,
        )

        # Verify error result
        assert fail_result is not None
        assert fail_result.id is not None and fail_result.id.strip()
        assert fail_result.error is not None
        assert fail_result.error.name == "CommandExecError"
        assert len(fail_result.logs.stderr) > 0
        assert any(
            "nonexistent-command-that-does-not-exist" in m.text for m in fail_result.logs.stderr
        )
        assert all(m.is_error is True for m in fail_result.logs.stderr)
        _assert_recent_timestamp_ms(fail_result.logs.stderr[0].timestamp)

        # Verify handlers captured error events
        assert len(init_events) == 1, "Execution should have exactly one init event"
        assert init_events[0].id == fail_result.id
        _assert_recent_timestamp_ms(init_events[0].timestamp)
        # Contract: error and complete are mutually exclusive; failing command should emit error only.
        assert len(errors) >= 1, "Should have captured error events"
        assert len(completed_events) == 0, "Failing command should not emit completion event"

        assert errors[0].name == "CommandExecError", "Error name should match"
        assert len(stderr_messages) > 0, "Should have captured stderr messages"
        assert "nonexistent-command-that-does-not-exist" in stderr_messages[0].text, (
            "Stderr should contain command name"
        )

        logger.info(f"✓ Failed command result: {fail_result}")

        logger.info("TEST 2 PASSED: Basic command execution test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(3)
    async def test_02a_command_status_and_logs(self):
        """Test command status + background logs."""
        await self._ensure_sandbox_created()
        sandbox = TestSandboxE2E.sandbox

        exec_result = await sandbox.commands.run(
            "sh -c 'echo log-line-1; echo log-line-2; sleep 2'",
            opts=RunCommandOpts(background=True),
        )
        assert exec_result.id is not None
        command_id = exec_result.id

        status = await sandbox.commands.get_command_status(command_id)
        assert status.id == command_id
        assert isinstance(status.running, bool)

        logs_text = ""
        cursor = None
        for _ in range(20):
            logs = await sandbox.commands.get_background_command_logs(command_id, cursor=cursor)
            logs_text += logs.content
            cursor = logs.cursor if logs.cursor is not None else cursor
            if "log-line-2" in logs_text:
                break
            await asyncio.sleep(1.0)

        assert "log-line-1" in logs_text
        assert "log-line-2" in logs_text

    @pytest.mark.timeout(120)
    @pytest.mark.order(3)
    async def test_02b_run_command_with_envs(self):
        """Test run_command env injection via RunCommandOpts.envs."""
        await self._ensure_sandbox_created()
        sandbox = TestSandboxE2E.sandbox

        env_key = "OPEN_SANDBOX_E2E_CMD_ENV"
        env_value = f"env-ok-{int(time.time())}"
        probe_command = (
            f"sh -c 'if [ -z \"${{{env_key}:-}}\" ]; then echo \"__EMPTY__\"; "
            f"else echo \"${{{env_key}}}\"; fi'"
        )

        # Baseline: variable should be empty when not injected.
        baseline = await sandbox.commands.run(probe_command)
        assert baseline.error is None
        baseline_output = "\n".join(msg.text for msg in baseline.logs.stdout).strip()
        assert baseline_output == "__EMPTY__"

        # Inject environment variables for this command only.
        injected = await sandbox.commands.run(
            probe_command,
            opts=RunCommandOpts(
                envs={
                    env_key: env_value,
                    "OPEN_SANDBOX_E2E_SECOND_ENV": "second-ok",
                }
            ),
        )
        assert injected.error is None
        injected_output = "\n".join(msg.text for msg in injected.logs.stdout).strip()
        assert injected_output == env_value

    @pytest.mark.timeout(120)
    @pytest.mark.order(4)
    async def test_03_basic_filesystem_operations(self):
        """Test basic filesystem operations."""
        await self._ensure_sandbox_created()
        sandbox = TestSandboxE2E.sandbox

        logger.info("=" * 80)
        logger.info("TEST 3: Testing basic filesystem operations")
        logger.info("=" * 80)

        test_dir1 = f"/tmp/fs_test1_{int(time.time() * 1000)}"
        test_dir2 = f"/tmp/fs_test2_{int(time.time() * 1000)}"

        logger.info("Step 1: Create directories")
        dir_entry1 = WriteEntry(path=test_dir1, mode=755)
        dir_entry2 = WriteEntry(path=test_dir2, mode=644)
        await sandbox.files.create_directories([dir_entry1, dir_entry2])
        logger.info(f"✓ Created directories: {test_dir1}, {test_dir2}")

        dir_info_map = await sandbox.files.get_file_info([test_dir1, test_dir2])
        assert test_dir1 in dir_info_map
        assert test_dir2 in dir_info_map
        assert dir_info_map[test_dir1].path == test_dir1
        assert dir_info_map[test_dir2].path == test_dir2
        assert dir_info_map[test_dir1].mode == 755
        assert dir_info_map[test_dir2].mode == 644
        assert dir_info_map[test_dir1].owner
        assert dir_info_map[test_dir1].group
        _assert_times_close(dir_info_map[test_dir1].created_at, dir_info_map[test_dir1].modified_at)

        ls_result = await sandbox.commands.run(
            "ls -la | grep fs_test",
            opts=RunCommandOpts(working_directory="/tmp"),
        )
        assert len(ls_result.logs.stdout) == 2, "Should find exactly 2 directories"
        logger.info(f"✓ Directory verification: {ls_result}")

        logger.info("Step 2: Create and write files")
        test_file1 = f"{test_dir1}/test_file1.txt"
        test_file2 = f"{test_dir1}/test_file2.txt"
        test_file3 = f"{test_dir1}/test_file3.txt"
        test_content = "Hello Filesystem!\\nLine 2 with special chars: åäö\\nLine 3"

        write_entry1 = WriteEntry(path=test_file1, data=test_content, mode=644)
        write_entry2 = WriteEntry(path=test_file2, data=test_content.encode('utf-8'), mode=755)
        write_entry3 = WriteEntry(
            path=test_file3,
            data=BytesIO(test_content.encode('utf-8')),
            group="nogroup",
            owner="nobody",
            mode=755
        )
        await sandbox.files.write_files([write_entry1, write_entry2, write_entry3])
        logger.info("✓ Created 3 test files")

        logger.info("Step 3: Read and verify file content using different methods")
        read_content1 = await sandbox.files.read_file(test_file1, encoding='utf-8')
        read_content1_partial = await sandbox.files.read_file(
            test_file1, encoding='utf-8', range_header="bytes=0-9"
        )

        read_bytes2 = await sandbox.files.read_bytes(test_file2)
        read_content2 = read_bytes2.decode('utf-8')

        stream3 = await sandbox.files.read_bytes_stream(test_file3)
        read_content3_bytes = b""
        async for chunk in stream3:
            read_content3_bytes += chunk
        read_content3 = read_content3_bytes.decode("utf-8")

        expected_size = len(test_content.encode("utf-8"))
        assert read_content1 == test_content
        assert read_content2 == test_content
        assert read_content3 == test_content
        assert read_content1_partial == test_content[:10]
        logger.info("✓ All file reads successful and content verified")

        logger.info("Step 4: Get and verify file info")
        all_test_files = [test_file1, test_file2, test_file3]
        file_info_map = await sandbox.files.get_file_info(all_test_files)

        file_info1 = file_info_map[test_file1]
        assert file_info1 is not None
        assert file_info1.path == test_file1
        assert file_info1.size == expected_size
        assert file_info1.mode == 644
        assert file_info1.owner is not None
        assert file_info1.group is not None
        _assert_times_close(file_info1.created_at, file_info1.modified_at)

        file_info2 = file_info_map[test_file2]
        assert file_info2 is not None
        assert file_info2.path == test_file2
        assert file_info2.size == expected_size
        assert file_info2.mode == 755
        assert file_info2.owner is not None
        assert file_info2.group is not None
        _assert_times_close(file_info2.created_at, file_info2.modified_at)

        file_info3 = file_info_map[test_file3]
        assert file_info3 is not None
        assert file_info3.path == test_file3
        assert file_info3.size == expected_size
        assert file_info3.mode == 755
        assert file_info3.owner == "nobody"
        assert file_info3.group == "nogroup"
        _assert_times_close(file_info3.created_at, file_info3.modified_at)
        logger.info(f"✓ File info verified: size={file_info1.size}, mode={oct(file_info1.mode)}")

        logger.info("Step 5: Test search functionality")
        search_all_entry = SearchEntry(path=test_dir1, pattern="*")
        all_files_list = await sandbox.files.search(search_all_entry)
        all_files = {entry.path: entry for entry in all_files_list}

        assert len(all_files) == 3
        assert test_file1 in all_files
        assert test_file2 in all_files
        assert test_file3 in all_files
        assert all_files[test_file1].size == expected_size
        _assert_times_close(all_files[test_file1].created_at, all_files[test_file1].modified_at)
        logger.info("✓ Search found all 3 files")

        logger.info("Step 6: Test permission changes")
        perm_entry1 = SetPermissionEntry(
            path=test_file1,
            mode=755,
            owner="nobody",
            group="nogroup"
        )
        perm_entry2 = SetPermissionEntry(
            path=test_file2,
            mode=600,
            owner="nobody",
            group="nogroup"
        )
        await sandbox.files.set_permissions([perm_entry1, perm_entry2])

        updated_info_map = await sandbox.files.get_file_info([test_file1, test_file2])
        updated_info1 = updated_info_map[test_file1]
        updated_info2 = updated_info_map[test_file2]

        assert updated_info1.mode == 755
        assert updated_info1.owner == "nobody"
        assert updated_info1.group == "nogroup"

        assert updated_info2.mode == 600
        assert updated_info2.owner == "nobody"
        assert updated_info2.group == "nogroup"
        logger.info("✓ Permissions updated successfully")

        logger.info("Step 7: Update file content")
        before_update_info = (await sandbox.files.get_file_info([test_file1]))[test_file1]
        updated_content1 = test_content + "\\nAppended line to file1"
        updated_content2 = test_content + "\\nAppended line to file2"

        # Ensure server-visible mtime delta is measurable.
        await asyncio.sleep(0.05)

        update_entry1 = WriteEntry(path=test_file1, data=updated_content1, mode=644)
        update_entry2 = WriteEntry(path=test_file2, data=updated_content2, mode=755)
        await sandbox.files.write_files([update_entry1, update_entry2])

        new_content1 = await sandbox.files.read_file(test_file1, encoding="utf-8")
        new_content2 = await sandbox.files.read_file(test_file2, encoding="utf-8")

        assert new_content1 == updated_content1
        assert new_content2 == updated_content2
        logger.info("✓ File content updated successfully")

        after_update_info = (await sandbox.files.get_file_info([test_file1]))[test_file1]
        assert after_update_info.size == len(updated_content1.encode("utf-8"))
        _assert_modified_updated(before_update_info.modified_at, after_update_info.modified_at, min_delta_ms=1)

        logger.info("Step 8: Replace file contents via API (replace_contents)")
        before_replace_info = after_update_info
        await asyncio.sleep(0.05)
        replace_entry = ContentReplaceEntry(
            path=test_file1,
            old_content="Appended line to file1",
            new_content="Replaced line in file1",
        )
        await sandbox.files.replace_contents([replace_entry])
        replaced_content1 = await sandbox.files.read_file(test_file1, encoding="utf-8")
        assert "Replaced line in file1" in replaced_content1
        assert "Appended line to file1" not in replaced_content1

        after_replace_info = (await sandbox.files.get_file_info([test_file1]))[test_file1]
        _assert_modified_updated(before_replace_info.modified_at, after_replace_info.modified_at, min_delta_ms=1)

        logger.info("Step 9: Move/rename a file via API (move_files)")
        moved_path = f"{test_dir2}/moved_file3.txt"
        await sandbox.files.move_files([MoveEntry(src=test_file3, dest=moved_path)])
        moved_bytes = await sandbox.files.read_bytes(moved_path)
        assert moved_bytes.decode("utf-8") == test_content
        with pytest.raises(Exception):
            await sandbox.files.read_bytes(test_file3)

        logger.info("Step 10: Delete file via API (delete_files)")
        await sandbox.files.delete_files([test_file2])
        with pytest.raises(Exception):
            await sandbox.files.read_file(test_file2, encoding="utf-8")

        # After move+delete, search should reflect the updated view.
        files_after = await sandbox.files.search(SearchEntry(path=test_dir1, pattern="*"))
        assert {e.path for e in files_after} == {test_file1}

        logger.info("Step 11: Delete directories recursively (delete_directories)")
        await sandbox.files.delete_directories([test_dir1, test_dir2])
        verify_dirs_deleted = await sandbox.commands.run(
            f"test ! -d {test_dir1} && test ! -d {test_dir2} && echo OK",
            opts=RunCommandOpts(working_directory="/tmp"),
        )
        assert verify_dirs_deleted.error is None
        assert len(verify_dirs_deleted.logs.stdout) == 1
        assert verify_dirs_deleted.logs.stdout[0].text == "OK"

        logger.info("TEST 3 PASSED: Basic filesystem operations test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(5)
    async def test_04_interrupt_command(self):
        """Test interrupting a long-running command."""
        await self._ensure_sandbox_created()
        sandbox = TestSandboxE2E.sandbox

        logger.info("=" * 80)
        logger.info("TEST 4: Testing command interrupt")
        logger.info("=" * 80)

        init_events: list[ExecutionInit] = []
        completed_events: list[ExecutionComplete] = []
        errors: list[ExecutionError] = []
        init_received = asyncio.Event()

        async def on_init(init: ExecutionInit):
            init_events.append(init)
            init_received.set()

        async def on_execution_complete(complete: ExecutionComplete):
            completed_events.append(complete)

        async def on_error(error: ExecutionError):
            errors.append(error)

        handlers = ExecutionHandlers(
            on_init=on_init,
            on_execution_complete=on_execution_complete,
            on_error=on_error,
        )

        start = time.time()
        task = asyncio.create_task(
            sandbox.commands.run(
                "sleep 30",
                handlers=handlers,
            )
        )

        await asyncio.wait_for(init_received.wait(), timeout=15)
        assert len(init_events) == 1
        assert init_events[0].id is not None and init_events[0].id.strip()
        _assert_recent_timestamp_ms(init_events[0].timestamp)

        await sandbox.commands.interrupt(init_events[0].id)

        execution = await asyncio.wait_for(task, timeout=30)
        elapsed = time.time() - start

        assert execution is not None
        assert execution.id == init_events[0].id
        assert elapsed < 20, f"Interrupted command took too long: {elapsed:.2f}s"
        # Contract: error and complete are mutually exclusive.
        assert (len(completed_events) > 0) or (len(errors) > 0), (
            f"expected exactly one of complete/error, got complete={len(completed_events)} "
            f"error={len(errors)}"
        )
        if len(completed_events) > 0:
            assert len(completed_events) == 1
            _assert_recent_timestamp_ms(completed_events[0].timestamp, tolerance_ms=180_000)

        # Interrupt should stop the process early; most implementations surface an error and/or stderr.
        assert execution.error is not None or len(execution.logs.stderr) > 0
        if execution.error is not None:
            assert execution.error.name
            assert execution.error.value
            _assert_recent_timestamp_ms(execution.error.timestamp, tolerance_ms=180_000)

    @pytest.mark.timeout(120)
    @pytest.mark.order(6)
    async def test_05_sandbox_pause(self):
        """Test sandbox pause operation."""
        await self._ensure_sandbox_created()
        sandbox = TestSandboxE2E.sandbox

        logger.info("=" * 80)
        logger.info("TEST 5: Testing sandbox pause operation")
        logger.info("=" * 80)

        # Sandbox has been exercised through tests 01-04; a brief settle is sufficient.
        await asyncio.sleep(2)
        assert await sandbox.is_healthy(), "Sandbox should be healthy before pause"

        logger.info("Requesting sandbox pause...")
        await sandbox.pause()

        start_time = time.time()
        poll_count = 0
        final_status = None

        logger.info("Polling for status change (timeout: 30s)...")
        while poll_count < 30:
            await asyncio.sleep(1)
            poll_count += 1

            info = await sandbox.get_info()
            current_status = info.status
            logger.info(f"Poll {poll_count}: Status = {current_status.state}")

            if current_status.state == "Pausing":
                continue
            else:
                final_status = current_status
                break

        assert final_status is not None, "Failed to get final status after pause operation"
        assert final_status.state == "Paused", "Sandbox should be in Paused state"

        # Verify pause semantics: execd should be unreachable.
        # The global HTTP request_timeout is 3 min, so we wrap the single
        # is_healthy() call in a short asyncio timeout.  A paused container's
        # frozen process will never reply, causing either a timeout (good) or
        # an immediate connection refusal (also good).
        try:
            healthy = await asyncio.wait_for(sandbox.is_healthy(), timeout=15)
        except asyncio.TimeoutError:
            healthy = False
        assert healthy is False, "Sandbox should be unhealthy after pause"

        elapsed_time = (time.time() - start_time) * 1000
        logger.info(f"✓ Sandbox pause confirmed in {elapsed_time:.2f} ms")

    @pytest.mark.timeout(120)
    @pytest.mark.order(7)
    async def test_06_sandbox_resume(self):
        """Test sandbox resume operation."""
        await self._ensure_sandbox_created()
        sandbox = TestSandboxE2E.sandbox

        logger.info("=" * 80)
        logger.info("TEST 6: Testing sandbox resume operation")
        logger.info("=" * 80)

        logger.info("Requesting sandbox resume...")
        resumed = await Sandbox.resume(
            sandbox_id=sandbox.id,
            connection_config=TestSandboxE2E.connection_config,
        )
        # Replace the class-held instance so subsequent operations/teardown use the resumed instance.
        TestSandboxE2E.sandbox = resumed
        sandbox = resumed

        start_time = time.time()
        poll_count = 0
        final_status = None

        logger.info("Polling for status change (timeout: 1 minute)...")
        while poll_count < 60:
            await asyncio.sleep(1)
            poll_count += 1

            info = await sandbox.get_info()
            current_status = info.status
            logger.info(f"Poll {poll_count}: Status = {current_status.state}")

            if current_status.state == "Running":
                final_status = current_status
                break

        assert final_status is not None, "Failed to get final status after resume operation"
        assert final_status.state == "Running", "Sandbox should be in Running state after resume"

        logger.info("Verifying sandbox health after resume...")
        healthy = False
        for _ in range(30):
            healthy = await sandbox.is_healthy()
            if healthy:
                break
            await asyncio.sleep(1)
        assert healthy is True, "Sandbox should be healthy after resume"

        # Minimal smoke check: after resume, the existing Sandbox instance should still be usable.
        # This helps validate that SDK re-bound its execd adapters (endpoint may change across resume).
        echo = await sandbox.commands.run("echo resume-ok")
        assert echo.error is None
        assert len(echo.logs.stdout) == 1
        assert echo.logs.stdout[0].text == "resume-ok"

        elapsed_time = (time.time() - start_time) * 1000
        logger.info(f"✓ Sandbox resume completed in {elapsed_time:.2f} ms")
        logger.info("TEST 5 PASSED: Sandbox resume operation test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(8)
    async def test_07_x_request_id_passthrough_on_server_error(self):
        request_id = f"e2e-py-server-{int(time.time() * 1000)}"
        missing_sandbox_id = f"missing-{request_id}"
        cfg = ConnectionConfig(
            domain=TEST_DOMAIN,
            api_key=TEST_API_KEY,
            request_timeout=timedelta(minutes=3),
            protocol=TEST_PROTOCOL,
            headers={"X-Request-ID": request_id},
        )

        with pytest.raises(SandboxApiException) as ei:
            connected = await Sandbox.connect(sandbox_id=missing_sandbox_id, connection_config=cfg)
            await connected.get_info()
        assert ei.value.request_id == request_id


================================================
FILE: tests/python/tests/test_sandbox_e2e_sync.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Comprehensive Sync E2E tests for SandboxSync functionality.

This mirrors `test_sandbox_e2e.py` but uses the synchronous SDK.
"""

import logging
import time
from concurrent.futures import ThreadPoolExecutor
from datetime import timedelta
from io import BytesIO

import httpx
import pytest
from opensandbox import SandboxSync
from opensandbox.config.connection_sync import ConnectionConfigSync
from opensandbox.exceptions import SandboxApiException
from opensandbox.models.execd import (
    ExecutionComplete,
    ExecutionError,
    ExecutionInit,
    OutputMessage,
    RunCommandOpts,
)
from opensandbox.models.execd_sync import ExecutionHandlersSync
from opensandbox.models.filesystem import (
    ContentReplaceEntry,
    MoveEntry,
    SearchEntry,
    SetPermissionEntry,
    WriteEntry,
)
from opensandbox.models.sandboxes import (
    PVC,
    Host,
    NetworkPolicy,
    NetworkRule,
    SandboxImageSpec,
    Volume,
)

from tests.base_e2e_test import (
    TEST_API_KEY,
    TEST_DOMAIN,
    TEST_PROTOCOL,
    create_connection_config_sync,
    get_sandbox_image,
)

logger = logging.getLogger(__name__)


def _now_ms() -> int:
    return int(time.time() * 1000)


def _assert_recent_timestamp_ms(ts: int, *, tolerance_ms: int = 60_000) -> None:
    assert isinstance(ts, int)
    assert ts > 0
    delta = abs(_now_ms() - ts)
    assert delta <= tolerance_ms, f"timestamp too far from now: delta={delta}ms (ts={ts})"


def _assert_endpoint_has_port(endpoint: str, expected_port: int) -> None:
    assert endpoint
    # In some deployments lifecycle returns direct "host:port".
    # In others it returns a reverse-proxy route like "domain/route/{id}/{port}".
    # In both cases, we expect NO scheme, and the port to be present deterministically.
    assert "://" not in endpoint, f"unexpected scheme in endpoint: {endpoint}"

    if "/" in endpoint:
        assert endpoint.endswith(f"/{expected_port}"), (
            f"endpoint route must end with /{expected_port}: {endpoint}"
        )
        assert endpoint.split("/", 1)[0], f"missing domain in endpoint: {endpoint}"
        return

    host, port = endpoint.rsplit(":", 1)
    assert host, f"missing host in endpoint: {endpoint}"
    assert port.isdigit(), f"non-numeric port in endpoint: {endpoint}"
    assert int(port) == expected_port, f"endpoint port mismatch: {endpoint} != :{expected_port}"


def _assert_times_close(created_at, modified_at, *, tolerance_seconds: float = 2.0) -> None:
    """
    Some filesystems / implementations may report created/modified with slight reordering.
    We only assert they're close, and rely on explicit update operations to validate mtime.
    """
    delta = abs((modified_at - created_at).total_seconds())
    assert delta <= tolerance_seconds, f"created/modified skew too large: {delta}s"


def _assert_modified_updated(before, after, *, min_delta_ms: int = 0, allow_skew_ms: int = 1000) -> None:
    """
    Validate modified_at moved forward after a mutating operation, allowing small clock jitter.
    """
    delta_ms = int((after - before).total_seconds() * 1000)
    assert delta_ms >= min_delta_ms - allow_skew_ms, (
        f"modified_at did not update as expected: delta_ms={delta_ms} "
        f"(min_delta_ms={min_delta_ms}, allow_skew_ms={allow_skew_ms})"
    )


class TestSandboxE2ESync:
    """Comprehensive E2E tests for SandboxSync functionality (ordered)."""

    sandbox = None
    connection_config = None
    _setup_done = False

    @pytest.fixture(scope="class", autouse=True)
    def _sandbox_lifecycle(self, request):
        """Create sandbox once and ALWAYS cleanup to avoid resource leaks."""
        request.cls._ensure_sandbox_created()
        try:
            yield
        finally:
            sandbox = request.cls.sandbox
            if sandbox is not None:
                try:
                    sandbox.kill()
                except Exception as e:
                    logger.warning("Teardown: sandbox.kill() failed: %s", e, exc_info=True)
                try:
                    sandbox.close()
                except Exception as e:
                    logger.warning("Teardown: sandbox.close() failed: %s", e, exc_info=True)

            cfg = request.cls.connection_config
            if cfg is not None:
                try:
                    cfg.transport.close()
                except Exception:
                    pass

    @classmethod
    def _ensure_sandbox_created(cls) -> None:
        if cls._setup_done:
            return

        logger.info("=" * 100)
        logger.info("SETUP: Creating sandbox (sync)")
        logger.info("=" * 100)

        cls.connection_config = create_connection_config_sync()

        cls.sandbox = SandboxSync.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cls.connection_config,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            metadata={"tag": "e2e-test"},
            env={
                "E2E_TEST": "true",
                "GO_VERSION": "1.25",
                "JAVA_VERSION": "21",
                "NODE_VERSION": "22",
                "PYTHON_VERSION": "3.12",
            },
            health_check_polling_interval=timedelta(milliseconds=500),
        )

        logger.info("✓ Sandbox created: %s", cls.sandbox.id)
        cls._setup_done = True

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    def test_01_sandbox_lifecycle_and_health(self) -> None:
        """Test sandbox lifecycle and health monitoring."""
        TestSandboxE2ESync._ensure_sandbox_created()
        sandbox = TestSandboxE2ESync.sandbox
        assert sandbox is not None

        logger.info("=" * 80)
        logger.info("TEST 1: Testing sandbox lifecycle and health monitoring (sync)")
        logger.info("=" * 80)

        assert isinstance(sandbox.id, str)
        assert sandbox.is_healthy() is True

        info = sandbox.get_info()
        assert info.id == sandbox.id
        assert info.status.state == "Running"
        assert info.created_at is not None
        assert info.expires_at is not None
        assert info.expires_at > info.created_at
        assert info.entrypoint == ["tail", "-f", "/dev/null"]

        duration = info.expires_at - info.created_at
        min_duration = timedelta(minutes=1)
        max_duration = timedelta(minutes=3)
        assert min_duration <= duration <= max_duration, (
            f"Duration {duration} should be between 1 and 3 minutes"
        )

        assert info.metadata is not None
        assert info.metadata.get("tag") == "e2e-test"

        endpoint = sandbox.get_endpoint(44772)
        assert endpoint is not None
        assert endpoint.endpoint is not None
        _assert_endpoint_has_port(endpoint.endpoint, 44772)

        metrics = sandbox.get_metrics()
        assert metrics is not None
        assert metrics.cpu_count > 0
        assert 0.0 <= metrics.cpu_used_percentage <= 100.0
        assert metrics.memory_total_in_mib > 0
        assert 0.0 <= metrics.memory_used_in_mib <= metrics.memory_total_in_mib
        _assert_recent_timestamp_ms(metrics.timestamp, tolerance_ms=120_000)

        await_renew = timedelta(minutes=20)
        renew_response = sandbox.renew(await_renew)
        assert renew_response is not None
        assert renew_response.expires_at > info.expires_at

        renewed_info = sandbox.get_info()
        assert renewed_info.expires_at > info.expires_at
        assert abs((renewed_info.expires_at - renew_response.expires_at).total_seconds()) < 10

        now = renewed_info.expires_at.__class__.now(tz=renewed_info.expires_at.tzinfo)
        remaining = renewed_info.expires_at - now
        assert remaining > timedelta(minutes=18), f"Remaining TTL too small: {remaining}"
        assert remaining < timedelta(minutes=22), f"Remaining TTL too large: {remaining}"

        assert sandbox.files is not None
        assert sandbox.commands is not None
        assert sandbox.metrics is not None
        assert sandbox.connection_config is not None

        # Connect to existing sandbox by ID and run a basic command.
        sandbox2 = SandboxSync.connect(
            sandbox.id, connection_config=TestSandboxE2ESync.connection_config
        )
        try:
            assert sandbox2.id == sandbox.id
            assert sandbox2.is_healthy() is True
            connect_result = sandbox2.commands.run(
                "echo connect-ok",
            )
            assert connect_result.error is None
            assert len(connect_result.logs.stdout) == 1
            assert connect_result.logs.stdout[0].text == "connect-ok"
        finally:
            sandbox2.close()

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    def test_01b_manual_cleanup(self) -> None:
        sandbox = SandboxSync.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=TestSandboxE2ESync.connection_config,
            timeout=None,
            ready_timeout=timedelta(seconds=30),
            metadata={"tag": "manual-e2e-test"},
        )
        try:
            info = sandbox.get_info()
            assert info.expires_at is None
            assert info.metadata is not None
            assert info.metadata.get("tag") == "manual-e2e-test"
        finally:
            sandbox.kill()
            sandbox.close()

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    def test_01a_network_policy_create(self) -> None:
        logger.info("=" * 80)
        logger.info("TEST 1a: Creating sandbox with networkPolicy (sync)")
        logger.info("=" * 80)

        cfg = create_connection_config_sync()
        sandbox = SandboxSync.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            network_policy=NetworkPolicy(
                defaultAction="deny",
                egress=[NetworkRule(action="allow", target="pypi.org")],
            ),
        )
        try:
            time.sleep(5)
            result = sandbox.commands.run("curl -I https://www.github.com")
            assert result.error is not None
            result = sandbox.commands.run("curl -I https://pypi.org")
            assert result.error is None
        finally:
            try:
                sandbox.kill()
            except Exception:
                pass
            sandbox.close()
            try:
                cfg.transport.close()
            except Exception:
                pass

    @pytest.mark.timeout(180)
    @pytest.mark.order(1)
    def test_01aa_network_policy_get_and_patch(self) -> None:
        logger.info("=" * 80)
        logger.info("TEST 1aa: networkPolicy get/patch (sync)")
        logger.info("=" * 80)

        cfg = create_connection_config_sync()
        sandbox = SandboxSync.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            network_policy=NetworkPolicy(
                defaultAction="deny",
                egress=[NetworkRule(action="allow", target="pypi.org")],
            ),
        )
        try:
            time.sleep(5)

            policy = sandbox.get_egress_policy()
            assert policy.default_action == "deny"
            assert policy.egress is not None
            assert any(rule.target == "pypi.org" and rule.action == "allow" for rule in policy.egress)

            blocked = sandbox.commands.run("curl -I https://www.github.com")
            assert blocked.error is not None
            allowed = sandbox.commands.run("curl -I https://pypi.org")
            assert allowed.error is None

            sandbox.patch_egress_rules(
                [
                    NetworkRule(action="allow", target="www.github.com"),
                    NetworkRule(action="deny", target="pypi.org"),
                ],
            )
            time.sleep(2)

            patched_policy = sandbox.get_egress_policy()
            assert patched_policy.egress is not None
            assert any(
                rule.target == "www.github.com" and rule.action == "allow"
                for rule in patched_policy.egress
            )
            assert any(
                rule.target == "pypi.org" and rule.action == "deny"
                for rule in patched_policy.egress
            )

            github_allowed = sandbox.commands.run("curl -I https://www.github.com")
            assert github_allowed.error is None
            pypi_denied = sandbox.commands.run("curl -I https://pypi.org")
            assert pypi_denied.error is not None
        finally:
            try:
                sandbox.kill()
            except Exception:
                pass
            sandbox.close()
            try:
                cfg.transport.close()
            except Exception:
                pass

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    def test_01b_host_volume_mount(self) -> None:
        """Test creating a sandbox with a host volume mount (sync)."""
        logger.info("=" * 80)
        logger.info("TEST 1b: Creating sandbox with host volume mount (sync)")
        logger.info("=" * 80)

        host_dir = "/tmp/opensandbox-e2e/host-volume-test"
        container_mount_path = "/mnt/host-data"

        cfg = create_connection_config_sync()
        sandbox = SandboxSync.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            volumes=[
                Volume(
                    name="test-host-vol",
                    host=Host(path=host_dir),
                    mountPath=container_mount_path,
                    readOnly=False,
                ),
            ],
        )
        try:
            logger.info("✓ Sandbox with volume created: %s", sandbox.id)

            # Step 1: Verify the host marker file is visible inside the sandbox
            result = sandbox.commands.run(f"cat {container_mount_path}/marker.txt")
            assert result.error is None, f"Failed to read marker file: {result.error}"
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "opensandbox-e2e-marker"
            logger.info("✓ Host marker file read successfully inside sandbox")

            # Step 2: Write a file from inside the sandbox to the mounted path (read-write)
            result = sandbox.commands.run(
                f"echo 'written-from-sandbox' > {container_mount_path}/sandbox-output.txt"
            )
            assert result.error is None, f"Failed to write file: {result.error}"

            # Step 3: Verify the written file is readable
            result = sandbox.commands.run(f"cat {container_mount_path}/sandbox-output.txt")
            assert result.error is None
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "written-from-sandbox"
            logger.info("✓ File written and verified inside sandbox")

            # Step 4: Verify the mount path is a proper directory
            result = sandbox.commands.run(f"test -d {container_mount_path} && echo OK")
            assert result.error is None
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "OK"
            logger.info("✓ Mount path is a valid directory")

        finally:
            try:
                sandbox.kill()
            except Exception:
                pass
            sandbox.close()
            try:
                cfg.transport.close()
            except Exception:
                pass

        logger.info("TEST 1b PASSED: Host volume mount test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    def test_01c_host_volume_mount_readonly(self) -> None:
        """Test creating a sandbox with a read-only host volume mount (sync)."""
        logger.info("=" * 80)
        logger.info("TEST 1c: Creating sandbox with read-only host volume mount (sync)")
        logger.info("=" * 80)

        host_dir = "/tmp/opensandbox-e2e/host-volume-test"
        container_mount_path = "/mnt/host-data-ro"

        cfg = create_connection_config_sync()
        sandbox = SandboxSync.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            volumes=[
                Volume(
                    name="test-host-vol-ro",
                    host=Host(path=host_dir),
                    mountPath=container_mount_path,
                    readOnly=True,
                ),
            ],
        )
        try:
            logger.info("✓ Sandbox with read-only volume created: %s", sandbox.id)

            # Step 1: Verify the host marker file is readable
            result = sandbox.commands.run(f"cat {container_mount_path}/marker.txt")
            assert result.error is None, f"Failed to read marker file: {result.error}"
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "opensandbox-e2e-marker"
            logger.info("✓ Host marker file read successfully in read-only mount")

            # Step 2: Verify writing is denied on read-only mount
            result = sandbox.commands.run(
                f"touch {container_mount_path}/should-fail.txt"
            )
            assert result.error is not None, "Write should fail on read-only mount"
            logger.info("✓ Write correctly denied on read-only mount")

        finally:
            try:
                sandbox.kill()
            except Exception:
                pass
            sandbox.close()
            try:
                cfg.transport.close()
            except Exception:
                pass

        logger.info("TEST 1c PASSED: Read-only host volume mount test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    def test_01d_pvc_named_volume_mount(self) -> None:
        """Test creating a sandbox with a PVC (Docker named volume) mount (sync)."""
        logger.info("=" * 80)
        logger.info("TEST 1d: Creating sandbox with PVC named volume mount (sync)")
        logger.info("=" * 80)

        pvc_volume_name = "opensandbox-e2e-pvc-test"
        container_mount_path = "/mnt/pvc-data"

        cfg = create_connection_config_sync()
        sandbox = SandboxSync.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            volumes=[
                Volume(
                    name="test-pvc-vol",
                    pvc=PVC(claimName=pvc_volume_name),
                    mountPath=container_mount_path,
                    readOnly=False,
                ),
            ],
        )
        try:
            logger.info("✓ Sandbox with PVC volume created: %s", sandbox.id)

            # Step 1: Verify the marker file seeded into the named volume is readable
            result = sandbox.commands.run(f"cat {container_mount_path}/marker.txt")
            assert result.error is None, f"Failed to read marker file: {result.error}"
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "pvc-marker-data"
            logger.info("✓ PVC marker file read successfully inside sandbox")

            # Step 2: Write a file from inside the sandbox to the named volume
            result = sandbox.commands.run(
                f"echo 'written-to-pvc' > {container_mount_path}/pvc-output.txt"
            )
            assert result.error is None, f"Failed to write file: {result.error}"

            # Step 3: Verify the written file is readable
            result = sandbox.commands.run(f"cat {container_mount_path}/pvc-output.txt")
            assert result.error is None
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "written-to-pvc"
            logger.info("✓ File written and verified inside sandbox via PVC mount")

            # Step 4: Verify the mount path is a proper directory
            result = sandbox.commands.run(f"test -d {container_mount_path} && echo OK")
            assert result.error is None
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "OK"
            logger.info("✓ PVC mount path is a valid directory")

        finally:
            try:
                sandbox.kill()
            except Exception:
                pass
            sandbox.close()
            try:
                cfg.transport.close()
            except Exception:
                pass

        logger.info("TEST 1d PASSED: PVC named volume mount test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    def test_01e_pvc_named_volume_mount_readonly(self) -> None:
        """Test creating a sandbox with a read-only PVC (Docker named volume) mount (sync)."""
        logger.info("=" * 80)
        logger.info("TEST 1e: Creating sandbox with read-only PVC named volume mount (sync)")
        logger.info("=" * 80)

        pvc_volume_name = "opensandbox-e2e-pvc-test"
        container_mount_path = "/mnt/pvc-data-ro"

        cfg = create_connection_config_sync()
        sandbox = SandboxSync.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            volumes=[
                Volume(
                    name="test-pvc-vol-ro",
                    pvc=PVC(claimName=pvc_volume_name),
                    mountPath=container_mount_path,
                    readOnly=True,
                ),
            ],
        )
        try:
            logger.info("✓ Sandbox with read-only PVC volume created: %s", sandbox.id)

            # Step 1: Verify the marker file is readable
            result = sandbox.commands.run(f"cat {container_mount_path}/marker.txt")
            assert result.error is None, f"Failed to read marker file: {result.error}"
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "pvc-marker-data"
            logger.info("✓ PVC marker file read successfully in read-only mount")

            # Step 2: Verify writing is denied on read-only mount
            result = sandbox.commands.run(
                f"touch {container_mount_path}/should-fail.txt"
            )
            assert result.error is not None, "Write should fail on read-only PVC mount"
            logger.info("✓ Write correctly denied on read-only PVC mount")

        finally:
            try:
                sandbox.kill()
            except Exception:
                pass
            sandbox.close()
            try:
                cfg.transport.close()
            except Exception:
                pass

        logger.info("TEST 1e PASSED: Read-only PVC named volume mount test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(1)
    def test_01f_pvc_named_volume_subpath_mount(self) -> None:
        """Test creating a sandbox with a PVC named volume mount using subPath (sync)."""
        logger.info("=" * 80)
        logger.info("TEST 1f: Creating sandbox with PVC named volume subPath mount (sync)")
        logger.info("=" * 80)

        pvc_volume_name = "opensandbox-e2e-pvc-test"
        container_mount_path = "/mnt/train"

        cfg = create_connection_config_sync()
        sandbox = SandboxSync.create(
            image=SandboxImageSpec(get_sandbox_image()),
            connection_config=cfg,
            timeout=timedelta(minutes=2),
            ready_timeout=timedelta(seconds=30),
            volumes=[
                Volume(
                    name="test-pvc-subpath",
                    pvc=PVC(claimName=pvc_volume_name),
                    mountPath=container_mount_path,
                    readOnly=False,
                    subPath="datasets/train",
                ),
            ],
        )
        try:
            logger.info("✓ Sandbox with PVC subPath volume created: %s", sandbox.id)

            # Step 1: Verify the subpath marker file is readable
            result = sandbox.commands.run(f"cat {container_mount_path}/marker.txt")
            assert result.error is None, f"Failed to read subpath marker file: {result.error}"
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "pvc-subpath-marker"
            logger.info("✓ SubPath marker file read successfully")

            # Step 2: Verify we only see the subpath contents (not the full volume)
            result = sandbox.commands.run(f"ls {container_mount_path}/")
            assert result.error is None
            stdout_text = "\n".join(msg.text for msg in result.logs.stdout)
            assert "marker.txt" in stdout_text
            assert "datasets" not in stdout_text
            logger.info("✓ Only subPath contents are visible inside the sandbox")

            # Step 3: Write a file and verify (retry read-back for transient SSE drops)
            result = sandbox.commands.run(
                f"echo 'subpath-write-test' > {container_mount_path}/output.txt"
            )
            assert result.error is None
            for _attempt in range(3):
                result = sandbox.commands.run(f"cat {container_mount_path}/output.txt")
                if result.logs.stdout:
                    break
                time.sleep(1)
            assert result.error is None
            assert len(result.logs.stdout) == 1
            assert result.logs.stdout[0].text == "subpath-write-test"
            logger.info("✓ File written and verified inside subPath mount")

        finally:
            try:
                sandbox.kill()
            except Exception:
                pass
            sandbox.close()
            try:
                cfg.transport.close()
            except Exception:
                pass

        logger.info("TEST 1f PASSED: PVC subPath named volume mount test completed successfully")

    @pytest.mark.timeout(120)
    @pytest.mark.order(2)
    def test_02_basic_command_execution(self) -> None:
        """Test basic command execution."""
        TestSandboxE2ESync._ensure_sandbox_created()
        sandbox = TestSandboxE2ESync.sandbox
        assert sandbox is not None

        logger.info("=" * 80)
        logger.info("TEST 2: Testing basic command execution (sync)")
        logger.info("=" * 80)

        stdout_messages: list[OutputMessage] = []
        stderr_messages: list[OutputMessage] = []
        results = []
        completed_events: list[ExecutionComplete] = []
        errors: list[ExecutionError] = []
        init_events: list[ExecutionInit] = []

        def on_stdout(msg):
            stdout_messages.append(msg)

        def on_stderr(msg):
            stderr_messages.append(msg)

        def on_result(result):
            results.append(result)

        def on_execution_complete(complete):
            completed_events.append(complete)

        def on_error(error):
            errors.append(error)

        def on_init(init):
            init_events.append(init)

        handlers = ExecutionHandlersSync(
            on_stdout=on_stdout,
            on_stderr=on_stderr,
            on_result=on_result,
            on_execution_complete=on_execution_complete,
            on_error=on_error,
            on_init=on_init,
        )

        echo_result = sandbox.commands.run(
            "echo 'Hello OpenSandbox E2E'",
            handlers=handlers,
        )

        assert echo_result is not None
        assert echo_result.id is not None and echo_result.id.strip()
        assert echo_result.error is None
        assert len(echo_result.logs.stdout) == 1
        assert echo_result.logs.stdout[0].text == "Hello OpenSandbox E2E"
        assert echo_result.logs.stdout[0].is_error is False
        _assert_recent_timestamp_ms(echo_result.logs.stdout[0].timestamp)
        assert len(echo_result.logs.stderr) == 0

        assert len(init_events) == 1
        assert len(completed_events) == 1
        assert init_events[0].id == echo_result.id
        _assert_recent_timestamp_ms(init_events[0].timestamp)
        _assert_recent_timestamp_ms(completed_events[0].timestamp)
        assert completed_events[0].execution_time_in_millis >= 0

        assert len(stdout_messages) == 1
        assert stdout_messages[0].text == "Hello OpenSandbox E2E"
        assert stdout_messages[0].is_error is False
        _assert_recent_timestamp_ms(stdout_messages[0].timestamp)
        assert len(errors) == 0

        pwd_result = sandbox.commands.run(
            "pwd",
            opts=RunCommandOpts(working_directory="/tmp"),
        )
        assert pwd_result is not None
        assert pwd_result.id is not None and pwd_result.id.strip()
        assert pwd_result.error is None
        assert len(pwd_result.logs.stdout) == 1
        assert pwd_result.logs.stdout[0].text == "/tmp"
        assert pwd_result.logs.stdout[0].is_error is False
        _assert_recent_timestamp_ms(pwd_result.logs.stdout[0].timestamp)

        start_time = time.time()
        sandbox.commands.run(
            "sleep 30",
            opts=RunCommandOpts(background=True),
        )
        end_time = time.time()
        execution_time_ms = (end_time - start_time) * 1000
        assert execution_time_ms < 10000

        stdout_messages.clear()
        stderr_messages.clear()
        errors.clear()
        completed_events.clear()
        init_events.clear()

        fail_result = sandbox.commands.run(
            "nonexistent-command-that-does-not-exist",
            handlers=handlers,
        )

        assert fail_result.error is not None
        assert fail_result.error.name == "CommandExecError"
        assert len(fail_result.logs.stderr) > 0
        assert any(
            "nonexistent-command-that-does-not-exist" in m.text for m in fail_result.logs.stderr
        )
        assert all(m.is_error is True for m in fail_result.logs.stderr)
        _assert_recent_timestamp_ms(fail_result.logs.stderr[0].timestamp)

        assert len(init_events) == 1
        assert init_events[0].id == fail_result.id
        _assert_recent_timestamp_ms(init_events[0].timestamp)
        # Contract: error and complete are mutually exclusive; failing command should emit error only.
        assert len(errors) >= 1
        assert len(completed_events) == 0

        assert errors[0].name == "CommandExecError"
        assert len(stderr_messages) > 0
        assert "nonexistent-command-that-does-not-exist" in stderr_messages[0].text

    @pytest.mark.timeout(120)
    @pytest.mark.order(3)
    def test_02a_command_status_and_logs(self) -> None:
        """Test command status + background logs (sync)."""
        TestSandboxE2ESync._ensure_sandbox_created()
        sandbox = TestSandboxE2ESync.sandbox
        assert sandbox is not None

        exec_result = sandbox.commands.run(
            "sh -c 'echo log-line-1; echo log-line-2; sleep 2'",
            opts=RunCommandOpts(background=True),
        )
        assert exec_result.id is not None
        command_id = exec_result.id

        status = sandbox.commands.get_command_status(command_id)
        assert status.id == command_id
        assert isinstance(status.running, bool)

        logs_text = ""
        cursor = None
        for _ in range(20):
            logs = sandbox.commands.get_background_command_logs(command_id, cursor=cursor)
            logs_text += logs.content
            cursor = logs.cursor if logs.cursor is not None else cursor
            if "log-line-2" in logs_text:
                break
            time.sleep(1.0)

        assert "log-line-1" in logs_text
        assert "log-line-2" in logs_text

    @pytest.mark.timeout(120)
    @pytest.mark.order(3)
    def test_02b_run_command_with_envs(self) -> None:
        """Test run_command env injection via RunCommandOpts.envs (sync)."""
        TestSandboxE2ESync._ensure_sandbox_created()
        sandbox = TestSandboxE2ESync.sandbox
        assert sandbox is not None

        env_key = "OPEN_SANDBOX_E2E_CMD_ENV"
        env_value = f"env-ok-{int(time.time())}"
        probe_command = (
            f"sh -c 'if [ -z \"${{{env_key}:-}}\" ]; then echo \"__EMPTY__\"; "
            f"else echo \"${{{env_key}}}\"; fi'"
        )

        # Baseline: variable should be empty when not injected.
        baseline = sandbox.commands.run(probe_command)
        assert baseline.error is None
        baseline_output = "\n".join(msg.text for msg in baseline.logs.stdout).strip()
        assert baseline_output == "__EMPTY__"

        # Inject environment variables for this command only.
        injected = sandbox.commands.run(
            probe_command,
            opts=RunCommandOpts(
                envs={
                    env_key: env_value,
                    "OPEN_SANDBOX_E2E_SECOND_ENV": "second-ok",
                }
            ),
        )
        assert injected.error is None
        injected_output = "\n".join(msg.text for msg in injected.logs.stdout).strip()
        assert injected_output == env_value

    @pytest.mark.timeout(120)
    @pytest.mark.order(4)
    def test_03_basic_filesystem_operations(self) -> None:
        """Test basic filesystem operations."""
        TestSandboxE2ESync._ensure_sandbox_created()
        sandbox = TestSandboxE2ESync.sandbox
        assert sandbox is not None

        logger.info("=" * 80)
        logger.info("TEST 3: Testing basic filesystem operations (sync)")
        logger.info("=" * 80)

        test_dir1 = f"/tmp/fs_test1_{int(time.time() * 1000)}"
        test_dir2 = f"/tmp/fs_test2_{int(time.time() * 1000)}"

        dir_entry1 = WriteEntry(path=test_dir1, mode=755)
        dir_entry2 = WriteEntry(path=test_dir2, mode=644)
        sandbox.files.create_directories([dir_entry1, dir_entry2])

        dir_info_map = sandbox.files.get_file_info([test_dir1, test_dir2])
        assert test_dir1 in dir_info_map
        assert test_dir2 in dir_info_map
        assert dir_info_map[test_dir1].path == test_dir1
        assert dir_info_map[test_dir2].path == test_dir2
        assert dir_info_map[test_dir1].mode == 755
        assert dir_info_map[test_dir2].mode == 644
        assert dir_info_map[test_dir1].owner
        assert dir_info_map[test_dir1].group
        _assert_times_close(dir_info_map[test_dir1].created_at, dir_info_map[test_dir1].modified_at)

        ls_result = sandbox.commands.run(
            "ls -la | grep fs_test",
            opts=RunCommandOpts(working_directory="/tmp"),
        )
        assert len(ls_result.logs.stdout) == 2

        test_file1 = f"{test_dir1}/test_file1.txt"
        test_file2 = f"{test_dir1}/test_file2.txt"
        test_file3 = f"{test_dir1}/test_file3.txt"
        test_content = "Hello Filesystem!\nLine 2 with special chars: åäö\nLine 3"

        write_entry1 = WriteEntry(path=test_file1, data=test_content, mode=644)
        write_entry2 = WriteEntry(path=test_file2, data=test_content.encode("utf-8"), mode=755)
        write_entry3 = WriteEntry(
            path=test_file3,
            data=BytesIO(test_content.encode("utf-8")),
            group="nogroup",
            owner="nobody",
            mode=755,
        )
        sandbox.files.write_files([write_entry1, write_entry2, write_entry3])

        read_content1 = sandbox.files.read_file(test_file1, encoding="utf-8")
        read_content1_partial = sandbox.files.read_file(
            test_file1,
            encoding="utf-8",
            range_header="bytes=0-9",
        )
        read_bytes2 = sandbox.files.read_bytes(test_file2)
        read_content2 = read_bytes2.decode("utf-8")

        stream3 = sandbox.files.read_bytes_stream(test_file3)
        read_content3_bytes = b""
        for chunk in stream3:
            read_content3_bytes += chunk
        read_content3 = read_content3_bytes.decode("utf-8")

        expected_size = len(test_content.encode("utf-8"))
        assert read_content1 == test_content
        assert read_content2 == test_content
        assert read_content3 == test_content
        assert read_content1_partial == test_content[:10]

        file_info_map = sandbox.files.get_file_info([test_file1, test_file2, test_file3])
        file_info1 = file_info_map[test_file1]
        assert file_info1.path == test_file1
        assert file_info1.size == expected_size
        assert file_info1.mode == 644
        assert file_info1.owner is not None
        assert file_info1.group is not None
        _assert_times_close(file_info1.created_at, file_info1.modified_at)

        file_info2 = file_info_map[test_file2]
        assert file_info2.path == test_file2
        assert file_info2.size == expected_size
        assert file_info2.mode == 755
        assert file_info2.owner is not None
        assert file_info2.group is not None
        _assert_times_close(file_info2.created_at, file_info2.modified_at)

        file_info3 = file_info_map[test_file3]
        assert file_info3.path == test_file3
        assert file_info3.size == expected_size
        assert file_info3.mode == 755
        assert file_info3.owner == "nobody"
        assert file_info3.group == "nogroup"
        _assert_times_close(file_info3.created_at, file_info3.modified_at)

        search_all_entry = SearchEntry(path=test_dir1, pattern="*")
        all_files_list = sandbox.files.search(search_all_entry)
        all_files = {entry.path: entry for entry in all_files_list}
        assert len(all_files) == 3
        assert test_file1 in all_files
        assert test_file2 in all_files
        assert test_file3 in all_files
        assert all_files[test_file1].size == expected_size
        _assert_times_close(all_files[test_file1].created_at, all_files[test_file1].modified_at)

        perm_entry1 = SetPermissionEntry(path=test_file1, mode=755, owner="nobody", group="nogroup")
        perm_entry2 = SetPermissionEntry(path=test_file2, mode=600, owner="nobody", group="nogroup")
        sandbox.files.set_permissions([perm_entry1, perm_entry2])

        updated_info_map = sandbox.files.get_file_info([test_file1, test_file2])
        updated_info1 = updated_info_map[test_file1]
        updated_info2 = updated_info_map[test_file2]
        assert updated_info1.mode == 755
        assert updated_info1.owner == "nobody"
        assert updated_info1.group == "nogroup"
        assert updated_info2.mode == 600
        assert updated_info2.owner == "nobody"
        assert updated_info2.group == "nogroup"

        before_update_info = sandbox.files.get_file_info([test_file1])[test_file1]
        updated_content1 = test_content + "\nAppended line to file1"
        updated_content2 = test_content + "\nAppended line to file2"
        time.sleep(0.05)
        sandbox.files.write_files(
            [
                WriteEntry(path=test_file1, data=updated_content1, mode=644),
                WriteEntry(path=test_file2, data=updated_content2, mode=755),
            ]
        )

        new_content1 = sandbox.files.read_file(test_file1, encoding="utf-8")
        new_content2 = sandbox.files.read_file(test_file2, encoding="utf-8")
        assert new_content1 == updated_content1
        assert new_content2 == updated_content2

        after_update_info = sandbox.files.get_file_info([test_file1])[test_file1]
        assert after_update_info.size == len(updated_content1.encode("utf-8"))
        _assert_modified_updated(before_update_info.modified_at, after_update_info.modified_at, min_delta_ms=1)

        # Replace file contents via API (replace_contents)
        before_replace_info = after_update_info
        time.sleep(0.05)
        sandbox.files.replace_contents(
            [
                ContentReplaceEntry(
                    path=test_file1,
                    old_content="Appended line to file1",
                    new_content="Replaced line in file1",
                )
            ]
        )
        replaced_content1 = sandbox.files.read_file(test_file1, encoding="utf-8")
        assert "Replaced line in file1" in replaced_content1
        assert "Appended line to file1" not in replaced_content1
        after_replace_info = sandbox.files.get_file_info([test_file1])[test_file1]
        _assert_modified_updated(before_replace_info.modified_at, after_replace_info.modified_at, min_delta_ms=1)

        # Move/rename a file via API (move_files)
        moved_path = f"{test_dir2}/moved_file3.txt"
        sandbox.files.move_files([MoveEntry(src=test_file3, dest=moved_path)])
        moved_bytes = sandbox.files.read_bytes(moved_path)
        assert moved_bytes.decode("utf-8") == test_content
        with pytest.raises(Exception):
            sandbox.files.read_bytes(test_file3)

        # Delete file via API (delete_files)
        sandbox.files.delete_files([test_file2])
        with pytest.raises(Exception):
            sandbox.files.read_file(test_file2, encoding="utf-8")

        files_after = sandbox.files.search(SearchEntry(path=test_dir1, pattern="*"))
        assert {e.path for e in files_after} == {test_file1}

        # Delete directories recursively (delete_directories)
        sandbox.files.delete_directories([test_dir1, test_dir2])
        verify_dirs_deleted = sandbox.commands.run(
            f"test ! -d {test_dir1} && test ! -d {test_dir2} && echo OK",
            opts=RunCommandOpts(working_directory="/tmp"),
        )
        assert verify_dirs_deleted.error is None
        assert len(verify_dirs_deleted.logs.stdout) == 1
        assert verify_dirs_deleted.logs.stdout[0].text == "OK"

    @pytest.mark.timeout(360)
    @pytest.mark.order(5)
    def test_04_interrupt_command(self) -> None:
        """Test interrupting a long-running command."""
        TestSandboxE2ESync._ensure_sandbox_created()
        sandbox = TestSandboxE2ESync.sandbox
        assert sandbox is not None

        logger.info("=" * 80)
        logger.info("TEST 4: Testing command interrupt (sync)")
        logger.info("=" * 80)

        init_events: list[ExecutionInit] = []
        completed_events: list[ExecutionComplete] = []
        errors: list[ExecutionError] = []

        def on_init(init: ExecutionInit):
            init_events.append(init)

        def on_complete(complete: ExecutionComplete):
            completed_events.append(complete)

        def on_error(error: ExecutionError):
            errors.append(error)

        handlers = ExecutionHandlersSync(
            on_init=on_init,
            on_execution_complete=on_complete,
            on_error=on_error,
        )

        start = time.time()
        with ThreadPoolExecutor(max_workers=1) as ex:
            future = ex.submit(
                sandbox.commands.run,
                "sleep 30",
                handlers=handlers,
            )
            deadline = time.time() + 15
            while len(init_events) == 0 and time.time() < deadline:
                time.sleep(0.1)
            assert len(init_events) == 1
            assert init_events[0].id is not None and init_events[0].id.strip()
            _assert_recent_timestamp_ms(init_events[0].timestamp)

            sandbox.commands.interrupt(init_events[0].id)
            execution = future.result(timeout=30)

        elapsed = time.time() - start
        assert execution is not None
        assert execution.id == init_events[0].id
        assert elapsed < 20, f"Interrupted command took too long: {elapsed:.2f}s"
        assert (len(completed_events) > 0) or (len(errors) > 0), (
            f"expected exactly one of complete/error, got complete={len(completed_events)} "
            f"error={len(errors)}"
        )
        if len(completed_events) > 0:
            assert len(completed_events) == 1
            _assert_recent_timestamp_ms(completed_events[0].timestamp, tolerance_ms=180_000)
        assert execution.error is not None or len(execution.logs.stderr) > 0
        if execution.error is not None:
            assert execution.error.name
            assert execution.error.value
            _assert_recent_timestamp_ms(execution.error.timestamp, tolerance_ms=180_000)

    @pytest.mark.timeout(120)
    @pytest.mark.order(6)
    def test_05_sandbox_pause(self) -> None:
        """Test sandbox pause operation."""
        TestSandboxE2ESync._ensure_sandbox_created()
        sandbox = TestSandboxE2ESync.sandbox
        assert sandbox is not None

        logger.info("=" * 80)
        logger.info("TEST 5: Testing sandbox pause operation (sync)")
        logger.info("=" * 80)

        # Sandbox has been exercised through tests 01-04; a brief settle is sufficient.
        time.sleep(2)
        assert sandbox.is_healthy(), "Sandbox should be healthy before pause"

        sandbox.pause()

        poll_count = 0
        final_status = None
        while poll_count < 30:
            time.sleep(1)
            poll_count += 1
            info = sandbox.get_info()
            current_status = info.status
            logger.info("Poll %s: Status = %s", poll_count, current_status.state)
            if current_status.state == "Pausing":
                continue
            final_status = current_status
            break

        assert final_status is not None
        assert final_status.state == "Paused"

        # Verify pause semantics: execd should be unreachable.
        # The global HTTP request_timeout is 3 min, so we run the single
        # is_healthy() call in a thread with a short timeout.  A paused
        # container's frozen process will never reply, causing either a
        # timeout (good) or an immediate connection refusal (also good).
        # NOTE: shutdown(wait=False) so we don't block on the lingering
        # HTTP request after our 15 s deadline.
        pool = ThreadPoolExecutor(max_workers=1)
        try:
            healthy = pool.submit(sandbox.is_healthy).result(timeout=15)
        except Exception:
            healthy = False
        finally:
            pool.shutdown(wait=False)
        assert healthy is False, "Sandbox should be unhealthy after pause"

    @pytest.mark.timeout(120)
    @pytest.mark.order(7)
    def test_06_sandbox_resume(self) -> None:
        """Test sandbox resume operation."""
        TestSandboxE2ESync._ensure_sandbox_created()
        sandbox = TestSandboxE2ESync.sandbox
        assert sandbox is not None

        logger.info("=" * 80)
        logger.info("TEST 6: Testing sandbox resume operation (sync)")
        logger.info("=" * 80)

        resumed = SandboxSync.resume(
            sandbox_id=sandbox.id,
            connection_config=TestSandboxE2ESync.connection_config,
        )
        TestSandboxE2ESync.sandbox = resumed
        sandbox = resumed

        poll_count = 0
        final_status = None
        while poll_count < 60:
            time.sleep(1)
            poll_count += 1
            info = sandbox.get_info()
            current_status = info.status
            logger.info("Poll %s: Status = %s", poll_count, current_status.state)
            if current_status.state == "Running":
                final_status = current_status
                break

        assert final_status is not None
        assert final_status.state == "Running"
        healthy = False
        for _ in range(30):
            healthy = sandbox.is_healthy()
            if healthy:
                break
            time.sleep(1)
        assert healthy is True, "Sandbox should be healthy after resume"

        # Minimal smoke check: after resume, the existing SandboxSync instance should still be usable.
        echo = sandbox.commands.run("echo resume-ok")
        assert echo.error is None
        assert len(echo.logs.stdout) == 1
        assert echo.logs.stdout[0].text == "resume-ok"

    @pytest.mark.timeout(120)
    @pytest.mark.order(8)
    def test_07_x_request_id_passthrough_on_server_error(self) -> None:
        request_id = f"e2e-py-sync-server-{int(time.time() * 1000)}"
        missing_sandbox_id = f"missing-{request_id}"
        cfg = ConnectionConfigSync(
            domain=TEST_DOMAIN,
            api_key=TEST_API_KEY,
            request_timeout=timedelta(minutes=3),
            protocol=TEST_PROTOCOL,
            headers={"X-Request-ID": request_id},
            transport=httpx.HTTPTransport(
                limits=httpx.Limits(
                    max_connections=100,
                    max_keepalive_connections=20,
                    keepalive_expiry=15,
                )
            ),
        )

        try:
            with pytest.raises(SandboxApiException) as ei:
                connected = SandboxSync.connect(missing_sandbox_id, connection_config=cfg)
                connected.get_info()
            assert ei.value.request_id == request_id
        finally:
            cfg.transport.close()


================================================
FILE: tests/python/tests/test_sandbox_manager_e2e.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Comprehensive E2E tests for SandboxManager functionality.

Focus: Validate `list_sandbox_infos` filter semantics precisely:
- `states` filter is OR logic
- `metadata` filter is AND logic

We create 3 dedicated sandboxes per run to keep assertions deterministic.
"""

import asyncio
import logging
import time
from datetime import timedelta
from uuid import uuid4

import pytest
from opensandbox import Sandbox, SandboxManager
from opensandbox.config import ConnectionConfig
from opensandbox.models.sandboxes import (
    SandboxFilter,
    SandboxImageSpec,
)

from tests.base_e2e_test import create_connection_config, get_sandbox_image

logger = logging.getLogger(__name__)


async def _create_sandbox(
    *,
    connection_config: ConnectionConfig,
    image: str,
    metadata: dict[str, str],
    env: dict[str, str],
    timeout: timedelta,
    ready_timeout: timedelta,
) -> Sandbox:
    return await Sandbox.create(
        image=SandboxImageSpec(image),
        connection_config=connection_config,
        resource={"cpu": "1", "memory": "2Gi"},
        timeout=timeout,
        ready_timeout=ready_timeout,
        metadata=metadata,
        env=env,
        health_check_polling_interval=timedelta(milliseconds=500),
    )


async def _wait_for_state(
    *,
    manager: SandboxManager,
    sandbox_id,
    expected_state: str,
    timeout: timedelta = timedelta(minutes=3),
) -> None:
    deadline = time.time() + timeout.total_seconds()
    last_state = None
    while time.time() < deadline:
        info = await manager.get_sandbox_info(sandbox_id)
        last_state = info.status.state
        if last_state == expected_state:
            return
        await asyncio.sleep(1)
    raise AssertionError(f"Timed out waiting for state={expected_state}, last_state={last_state}")


@pytest.mark.asyncio
class TestSandboxManagerE2E:
    """E2E tests for SandboxManager list/filter semantics."""

    connection_config: ConnectionConfig | None = None
    manager: SandboxManager | None = None
    tag: str | None = None
    s1: Sandbox | None = None
    s2: Sandbox | None = None
    s3: Sandbox | None = None

    @pytest.fixture(scope="class", autouse=True)
    async def _manager_setup(self, request):
        cls = request.cls
        # Create connection config (user-owned transport; we close it explicitly).
        cls.connection_config = create_connection_config()

        cls.manager = await SandboxManager.create(connection_config=cls.connection_config)
        cls.tag = f"e2e-sandbox-manager-{uuid4().hex[:8]}"

        # Create 3 sandboxes with controlled metadata.
        # s1: tag + team=t1 + env=prod
        # s2: tag + team=t1 + env=dev
        # s3: tag + env=prod (no team), then pause to get Paused state
        cls.s1 = await _create_sandbox(
            connection_config=cls.connection_config,
            image=get_sandbox_image(),
            metadata={"tag": cls.tag, "team": "t1", "env": "prod"},
            env={"E2E_TEST": "true", "CASE": "mgr-s1"},
            timeout=timedelta(minutes=5),
            ready_timeout=timedelta(seconds=60),
        )
        cls.s2 = await _create_sandbox(
            connection_config=cls.connection_config,
            image=get_sandbox_image(),
            metadata={"tag": cls.tag, "team": "t1", "env": "dev"},
            env={"E2E_TEST": "true", "CASE": "mgr-s2"},
            timeout=timedelta(minutes=5),
            ready_timeout=timedelta(seconds=60),
        )
        cls.s3 = await _create_sandbox(
            connection_config=cls.connection_config,
            image=get_sandbox_image(),
            metadata={"tag": cls.tag, "env": "prod"},
            env={"E2E_TEST": "true", "CASE": "mgr-s3"},
            timeout=timedelta(minutes=5),
            ready_timeout=timedelta(seconds=60),
        )

        assert await cls.s1.is_healthy() is True
        assert await cls.s2.is_healthy() is True
        assert await cls.s3.is_healthy() is True

        # Pause s3 to create a deterministic non-Running state for OR-state tests.
        await cls.manager.pause_sandbox(cls.s3.id)
        await _wait_for_state(manager=cls.manager, sandbox_id=cls.s3.id, expected_state="Paused")

        try:
            yield
        finally:
            # Best-effort cleanup: kill sandboxes (remote) and close local resources.
            for s in [cls.s1, cls.s2, cls.s3]:
                if s is None:
                    continue
                try:
                    await s.kill()
                except Exception:
                    pass
                try:
                    await s.close()
                except Exception:
                    pass

            if cls.manager is not None:
                try:
                    await cls.manager.close()
                except Exception:
                    pass

            if cls.connection_config is not None:
                try:
                    await cls.connection_config.transport.aclose()
                except Exception:
                    pass

    @pytest.mark.timeout(600)
    async def test_01_states_filter_or_logic(self):
        manager = TestSandboxManagerE2E.manager
        assert manager is not None
        assert TestSandboxManagerE2E.tag is not None
        assert TestSandboxManagerE2E.s1 is not None and TestSandboxManagerE2E.s2 is not None and TestSandboxManagerE2E.s3 is not None

        # states filter is OR: should return sandboxes in ANY of the requested states.
        result = await manager.list_sandbox_infos(
            SandboxFilter(states=["Running", "Paused"], metadata={"tag": TestSandboxManagerE2E.tag}, page_size=50)
        )
        ids = {info.id for info in result.sandbox_infos}
        assert {TestSandboxManagerE2E.s1.id, TestSandboxManagerE2E.s2.id, TestSandboxManagerE2E.s3.id}.issubset(ids)

        paused_only = await manager.list_sandbox_infos(
            SandboxFilter(states=["Paused"], metadata={"tag": TestSandboxManagerE2E.tag}, page_size=50)
        )
        paused_ids = {info.id for info in paused_only.sandbox_infos}
        assert TestSandboxManagerE2E.s3.id in paused_ids
        assert TestSandboxManagerE2E.s1.id not in paused_ids
        assert TestSandboxManagerE2E.s2.id not in paused_ids

        running_only = await manager.list_sandbox_infos(
            SandboxFilter(states=["Running"], metadata={"tag": TestSandboxManagerE2E.tag}, page_size=50)
        )
        running_ids = {info.id for info in running_only.sandbox_infos}
        assert TestSandboxManagerE2E.s1.id in running_ids
        assert TestSandboxManagerE2E.s2.id in running_ids
        assert TestSandboxManagerE2E.s3.id not in running_ids

    @pytest.mark.timeout(600)
    async def test_02_metadata_filter_and_logic(self):
        manager = TestSandboxManagerE2E.manager
        assert manager is not None
        assert TestSandboxManagerE2E.tag is not None
        assert TestSandboxManagerE2E.s1 is not None and TestSandboxManagerE2E.s2 is not None and TestSandboxManagerE2E.s3 is not None

        # metadata filter is AND across all key-value pairs.
        # tag+team=t1 should match s1 and s2 (both have team=t1), not s3.
        tag_and_team = await manager.list_sandbox_infos(
            SandboxFilter(metadata={"tag": TestSandboxManagerE2E.tag, "team": "t1"}, page_size=50)
        )
        ids = {info.id for info in tag_and_team.sandbox_infos}
        assert TestSandboxManagerE2E.s1.id in ids
        assert TestSandboxManagerE2E.s2.id in ids
        assert TestSandboxManagerE2E.s3.id not in ids

        # tag+team=t1+env=prod should match only s1 (AND narrows results).
        tag_team_env = await manager.list_sandbox_infos(
            SandboxFilter(metadata={"tag": TestSandboxManagerE2E.tag, "team": "t1", "env": "prod"}, page_size=50)
        )
        ids = {info.id for info in tag_team_env.sandbox_infos}
        assert TestSandboxManagerE2E.s1.id in ids
        assert TestSandboxManagerE2E.s2.id not in ids
        assert TestSandboxManagerE2E.s3.id not in ids

        # tag+env=prod should match s1 and s3.
        tag_env = await manager.list_sandbox_infos(
            SandboxFilter(metadata={"tag": TestSandboxManagerE2E.tag, "env": "prod"}, page_size=50)
        )
        ids = {info.id for info in tag_env.sandbox_infos}
        assert TestSandboxManagerE2E.s1.id in ids
        assert TestSandboxManagerE2E.s3.id in ids
        assert TestSandboxManagerE2E.s2.id not in ids

        # Negative: tag+team=t2 should match none.
        none_match = await manager.list_sandbox_infos(
            SandboxFilter(metadata={"tag": TestSandboxManagerE2E.tag, "team": "t2"}, page_size=50)
        )
        assert all(
            info.id not in {TestSandboxManagerE2E.s1.id, TestSandboxManagerE2E.s2.id, TestSandboxManagerE2E.s3.id}
            for info in none_match.sandbox_infos
        )


================================================
FILE: tests/python/tests/test_sandbox_manager_e2e_sync.py
================================================
#
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Comprehensive Sync E2E tests for SandboxManagerSync functionality.

Focus: Validate `list_sandbox_infos` filter semantics precisely:
- `states` filter is OR logic
- `metadata` filter is AND logic

We create 3 dedicated sandboxes per run to keep assertions deterministic.
"""

import time
from datetime import timedelta
from uuid import uuid4

import pytest
from opensandbox import SandboxManagerSync, SandboxSync
from opensandbox.models.sandboxes import (
    SandboxFilter,
    SandboxImageSpec,
)

from tests.base_e2e_test import create_connection_config_sync, get_sandbox_image


class TestSandboxManagerE2ESync:
    @pytest.mark.timeout(600)
    def test_01_states_filter_or_logic(self):
        cfg = create_connection_config_sync()

        manager = SandboxManagerSync.create(connection_config=cfg)
        tag = f"e2e-sandbox-manager-{uuid4().hex[:8]}"

        s1 = s2 = s3 = None
        try:
            s1 = SandboxSync.create(
                image=SandboxImageSpec(get_sandbox_image()),
                connection_config=cfg,
                resource={"cpu": "1", "memory": "2Gi"},
                timeout=timedelta(minutes=5),
                ready_timeout=timedelta(seconds=60),
                metadata={"tag": tag, "team": "t1", "env": "prod"},
                env={"E2E_TEST": "true", "CASE": "mgr-s1"},
                health_check_polling_interval=timedelta(milliseconds=500),
            )
            s2 = SandboxSync.create(
                image=SandboxImageSpec(get_sandbox_image()),
                connection_config=cfg,
                resource={"cpu": "1", "memory": "2Gi"},
                timeout=timedelta(minutes=5),
                ready_timeout=timedelta(seconds=60),
                metadata={"tag": tag, "team": "t1", "env": "dev"},
                env={"E2E_TEST": "true", "CASE": "mgr-s2"},
                health_check_polling_interval=timedelta(milliseconds=500),
            )
            s3 = SandboxSync.create(
                image=SandboxImageSpec(get_sandbox_image()),
                connection_config=cfg,
                resource={"cpu": "1", "memory": "2Gi"},
                timeout=timedelta(minutes=5),
                ready_timeout=timedelta(seconds=60),
                metadata={"tag": tag, "env": "prod"},
                env={"E2E_TEST": "true", "CASE": "mgr-s3"},
                health_check_polling_interval=timedelta(milliseconds=500),
            )

            assert s1.is_healthy() is True
            assert s2.is_healthy() is True
            assert s3.is_healthy() is True

            # Pause s3 and wait for state transition
            manager.pause_sandbox(s3.id)
            deadline = time.time() + 180
            while time.time() < deadline:
                info = manager.get_sandbox_info(s3.id)
                if info.status.state == "Paused":
                    break
                time.sleep(1)
            assert manager.get_sandbox_info(s3.id).status.state == "Paused"

            # OR states
            both = manager.list_sandbox_infos(
                SandboxFilter(states=["Running", "Paused"], metadata={"tag": tag}, page_size=50)
            )
            ids = {info.id for info in both.sandbox_infos}
            assert {s1.id, s2.id, s3.id}.issubset(ids)

            paused_only = manager.list_sandbox_infos(
                SandboxFilter(states=["Paused"], metadata={"tag": tag}, page_size=50)
            )
            paused_ids = {info.id for info in paused_only.sandbox_infos}
            assert s3.id in paused_ids
            assert s1.id not in paused_ids
            assert s2.id not in paused_ids

            running_only = manager.list_sandbox_infos(
                SandboxFilter(states=["Running"], metadata={"tag": tag}, page_size=50)
            )
            running_ids = {info.id for info in running_only.sandbox_infos}
            assert s1.id in running_ids
            assert s2.id in running_ids
            assert s3.id not in running_ids
        finally:
            for s in [s1, s2, s3]:
                if s is None:
                    continue
                try:
                    s.kill()
                except Exception:
                    pass
                try:
                    s.close()
                except Exception:
                    pass
            manager.close()

    @pytest.mark.timeout(600)
    def test_02_metadata_filter_and_logic(self):
        cfg = create_connection_config_sync()

        manager = SandboxManagerSync.create(connection_config=cfg)
        tag = f"e2e-sandbox-manager-{uuid4().hex[:8]}"

        s1 = s2 = s3 = None
        try:
            s1 = SandboxSync.create(
                image=SandboxImageSpec(get_sandbox_image()),
                connection_config=cfg,
                resource={"cpu": "1", "memory": "2Gi"},
                timeout=timedelta(minutes=5),
                ready_timeout=timedelta(seconds=60),
                metadata={"tag": tag, "team": "t1", "env": "prod"},
                env={"E2E_TEST": "true", "CASE": "mgr-s1"},
                health_check_polling_interval=timedelta(milliseconds=500),
            )
            s2 = SandboxSync.create(
                image=SandboxImageSpec(get_sandbox_image()),
                connection_config=cfg,
                resource={"cpu": "1", "memory": "2Gi"},
                timeout=timedelta(minutes=5),
                ready_timeout=timedelta(seconds=60),
                metadata={"tag": tag, "team": "t1", "env": "dev"},
                env={"E2E_TEST": "true", "CASE": "mgr-s2"},
                health_check_polling_interval=timedelta(milliseconds=500),
            )
            s3 = SandboxSync.create(
                image=SandboxImageSpec(get_sandbox_image()),
                connection_config=cfg,
                resource={"cpu": "1", "memory": "2Gi"},
                timeout=timedelta(minutes=5),
                ready_timeout=timedelta(seconds=60),
                metadata={"tag": tag, "env": "prod"},
                env={"E2E_TEST": "true", "CASE": "mgr-s3"},
                health_check_polling_interval=timedelta(milliseconds=500),
            )

            assert s1.is_healthy() is True
            assert s2.is_healthy() is True
            assert s3.is_healthy() is True

            # AND metadata
            tag_and_team = manager.list_sandbox_infos(
                SandboxFilter(metadata={"tag": tag, "team": "t1"}, page_size=50)
            )
            ids = {info.id for info in tag_and_team.sandbox_infos}
            assert s1.id in ids
            assert s2.id in ids
            assert s3.id not in ids

            tag_team_env = manager.list_sandbox_infos(
                SandboxFilter(metadata={"tag": tag, "team": "t1", "env": "prod"}, page_size=50)
            )
            ids = {info.id for info in tag_team_env.sandbox_infos}
            assert s1.id in ids
            assert s2.id not in ids
            assert s3.id not in ids

            tag_env = manager.list_sandbox_infos(
                SandboxFilter(metadata={"tag": tag, "env": "prod"}, page_size=50)
            )
            ids = {info.id for info in tag_env.sandbox_infos}
            assert s1.id in ids
            assert s3.id in ids
            assert s2.id not in ids

            none_match = manager.list_sandbox_infos(
                SandboxFilter(metadata={"tag": tag, "team": "t2"}, page_size=50)
            )
            assert all(info.id not in {s1.id, s2.id, s3.id} for info in none_match.sandbox_infos)
        finally:
            for s in [s1, s2, s3]:
                if s is None:
                    continue
                try:
                    s.kill()
                except Exception:
                    pass
                try:
                    s.close()
                except Exception:
                    pass
            manager.close()