Repository: lambdaclass/options_backtester
Branch: master
Commit: 09514782c168
Files: 181
Total size: 1.5 MB
Directory structure:
gitextract_dxl517zn/
├── .github/
│ └── workflows/
│ └── ci.yml
├── .gitignore
├── .python-version
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── benchmarks/
│ ├── benchmark_large_pipeline.py
│ ├── benchmark_matrix.py
│ ├── benchmark_rust_vs_python.py
│ ├── benchmark_sweep.py
│ └── compare_with_bt.py
├── data/
│ ├── README.md
│ ├── convert_optionsdx.py
│ ├── fetch_data.py
│ └── fetch_signals.py
├── flake.nix
├── options_portfolio_backtester/
│ ├── __init__.py
│ ├── analytics/
│ │ ├── __init__.py
│ │ ├── charts.py
│ │ ├── optimization.py
│ │ ├── stats.py
│ │ ├── summary.py
│ │ ├── tearsheet.py
│ │ └── trade_log.py
│ ├── convexity/
│ │ ├── __init__.py
│ │ ├── _utils.py
│ │ ├── allocator.py
│ │ ├── backtest.py
│ │ ├── config.py
│ │ ├── scoring.py
│ │ └── viz.py
│ ├── core/
│ │ ├── __init__.py
│ │ └── types.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── providers.py
│ │ └── schema.py
│ ├── engine/
│ │ ├── __init__.py
│ │ ├── algo_adapters.py
│ │ ├── clock.py
│ │ ├── engine.py
│ │ ├── multi_strategy.py
│ │ ├── pipeline.py
│ │ └── strategy_tree.py
│ ├── execution/
│ │ ├── __init__.py
│ │ ├── _rust_bridge.py
│ │ ├── cost_model.py
│ │ ├── fill_model.py
│ │ ├── signal_selector.py
│ │ └── sizer.py
│ ├── portfolio/
│ │ ├── __init__.py
│ │ ├── greeks.py
│ │ ├── portfolio.py
│ │ ├── position.py
│ │ └── risk.py
│ └── strategy/
│ ├── __init__.py
│ ├── presets.py
│ ├── strategy.py
│ └── strategy_leg.py
├── pyproject.toml
├── rust/
│ ├── .cargo/
│ │ └── config.toml
│ ├── Cargo.toml
│ ├── ob_core/
│ │ ├── Cargo.toml
│ │ ├── benches/
│ │ │ └── hot_paths.rs
│ │ └── src/
│ │ ├── backtest.rs
│ │ ├── balance.rs
│ │ ├── convexity_backtest.rs
│ │ ├── convexity_scoring.rs
│ │ ├── cost_model.rs
│ │ ├── entries.rs
│ │ ├── exits.rs
│ │ ├── fill_model.rs
│ │ ├── filter.rs
│ │ ├── inventory.rs
│ │ ├── lib.rs
│ │ ├── risk.rs
│ │ ├── signal_selector.rs
│ │ ├── stats.rs
│ │ └── types.rs
│ └── ob_python/
│ ├── Cargo.toml
│ └── src/
│ ├── arrow_bridge.rs
│ ├── lib.rs
│ ├── py_backtest.rs
│ ├── py_balance.rs
│ ├── py_convexity.rs
│ ├── py_entries.rs
│ ├── py_execution.rs
│ ├── py_exits.rs
│ ├── py_filter.rs
│ ├── py_stats.rs
│ └── py_sweep.rs
├── setup.cfg
└── tests/
├── __init__.py
├── analytics/
│ ├── __init__.py
│ ├── test_analytics_pbt.py
│ ├── test_charts.py
│ ├── test_optimization.py
│ ├── test_stats.py
│ ├── test_stats_python_path.py
│ ├── test_summary.py
│ ├── test_tearsheet.py
│ └── test_trade_log.py
├── bench/
│ ├── __init__.py
│ ├── _test_helpers.py
│ ├── extract_prod_slices.py
│ ├── generate_test_data.py
│ ├── test_edge_cases.py
│ ├── test_execution_models.py
│ ├── test_invariants.py
│ ├── test_multi_leg.py
│ ├── test_partial_exits.py
│ └── test_sweep.py
├── compat/
│ ├── __init__.py
│ └── test_bt_overlap_gate.py
├── conftest.py
├── convexity/
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_allocator.py
│ ├── test_backtest.py
│ └── test_config.py
├── core/
│ ├── __init__.py
│ ├── test_types.py
│ └── test_types_pbt.py
├── data/
│ ├── __init__.py
│ ├── test_filter.py
│ ├── test_property_based.py
│ ├── test_providers.py
│ ├── test_providers_extended.py
│ └── test_schema.py
├── engine/
│ ├── __init__.py
│ ├── test_algo_adapters.py
│ ├── test_capital_conservation.py
│ ├── test_chaos.py
│ ├── test_clock.py
│ ├── test_engine.py
│ ├── test_engine_deep.py
│ ├── test_engine_unit.py
│ ├── test_full_liquidation.py
│ ├── test_max_notional.py
│ ├── test_multi_strategy.py
│ ├── test_multi_strategy_engine.py
│ ├── test_per_leg_overrides.py
│ ├── test_pipeline.py
│ ├── test_portfolio_integration.py
│ ├── test_regression_snapshots.py
│ ├── test_risk_wiring.py
│ ├── test_rust_parity.py
│ ├── test_signal_selector_wiring.py
│ └── test_strategy_tree.py
├── execution/
│ ├── __init__.py
│ ├── test_cost_model.py
│ ├── test_execution_deep.py
│ ├── test_execution_pbt.py
│ ├── test_fill_model.py
│ ├── test_rust_parity_execution.py
│ ├── test_signal_selector.py
│ └── test_sizer.py
├── portfolio/
│ ├── __init__.py
│ ├── test_greeks_aggregation.py
│ ├── test_portfolio.py
│ ├── test_position.py
│ ├── test_property_based.py
│ └── test_risk.py
├── strategy/
│ ├── __init__.py
│ ├── test_presets.py
│ ├── test_strangle.py
│ ├── test_strategy.py
│ ├── test_strategy_deep.py
│ ├── test_strategy_leg.py
│ └── test_strategy_pbt.py
├── test_cleanup.py
├── test_data/
│ ├── ivy_5assets_data.csv
│ ├── ivy_portfolio.csv
│ ├── options_data.csv
│ ├── test_data_options.csv
│ └── test_data_stocks.csv
├── test_deep_analytics_convexity.py
├── test_intrinsic_sign.py
├── test_intrinsic_value.py
├── test_property_based.py
└── test_smoke.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/workflows/ci.yml
================================================
name: Test
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: cachix/install-nix-action@v30
- name: Download data
run: nix develop --command python data/fetch_data.py all --symbols SPY --force
- name: Run tests
run: nix develop --command python -m pytest -v tests/ --ignore=tests/bench --ignore=tests/compat
- name: Type check
run: nix develop --command python -m mypy options_portfolio_backtester --ignore-missing-imports
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
.venv-bt/
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# Generated charts/plots
*.png
# Mac OS-specific storage files
.DS_Store
# VS Code
.vscode/
_ob_rust.so
mutants/
# Large regenerable test data (from tests/bench/extract_prod_slices.py)
tests/data/*.csv
# exclude data from source control by default
data/*
!data/README.md
!data/fetch_data.py
!data/fetch_signals.py
!data/convert_optionsdx.py
!data/raw/
data/raw/*
!data/raw/.gitkeep
!data/processed/
data/processed/*
!data/processed/.gitkeep
!data/processed/signals.csv
================================================
FILE: .python-version
================================================
3.12
================================================
FILE: CONTRIBUTING.md
================================================
Contributing
============
Contributions are welcome and very much appreciated. Credit will be appropriately given.
## License
By contributing, you agree that your contributions will be licensed under its MIT License.
## Code contributions
To begin contributing to the project, please follow these steps.
1. [Fork](https://help.github.com/en/articles/fork-a-repo) the repo.
2. Clone your fork locally:
```shell
$ git clone git@github.com:your_username/backtester_options.git
```
3. Create the environment and install dependencies:
```shell
$ make init
```
4. Create your development branch from `master`
```shell
$ git checkout -b your_branch master
```
5. Start coding your contribution (Thanks!)
6. Make sure your code passes all tests, lints and is formatted correctly (`TODO`: Add linting, code formatting to Travis.)
6. Submit a pull request with a brief explanation of your work.
## Types of Contributions
### Bug reports
Make sure to follow the setup steps detailed in the [readme](README.md). If you find a bug, please create an issue with the label `bug` and provide the following information:
- Operating System and version.
- Steps taken to replicate the bug.
- What was the expected output and what actually happend.
- Any details of your local environment that might be helpful for troubleshooting.
### Bug fixes
If you find a bug issue you want to fix, follow the steps outlined [above](#code-contributions) and submit a pull request with a link to the original issue.
### Proposing Features
Create an issue detailing what functionality you'd like to see implemented. If you can, provide general advice as to how the proposed feature could be done.
### Implementing Features
Find an issue with the label `help wanted` or `improvement` and [start coding](#code-contributions).
When you are done, submit a pull request with a link to the original issue and some code samples showing how the code works. Tests are expected when adding new functionality.
### Documentation
We encourage users to improve our project documentation, either via docstrings, markdown documents to be added to the [project wiki](https://github.com/lambdaclass/backtester_options/wiki) or writing blog posts.
Let us know via issues labeled `docs` and we'll credit you appropriately.
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2019 Federico Carrone
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: Makefile
================================================
NIX_CMD := XDG_CACHE_HOME=$(CURDIR)/.cache nix --extra-experimental-features 'nix-command flakes' develop --command
RUNCMD := $(NIX_CMD)
PYTHON := python
.PHONY: test test-bench lint typecheck notebooks rust-build rust-test rust-bench bench install-dev compare-bt benchmark-matrix walk-forward-report parity-gate bench-rust-vs-python help
.DEFAULT_GOAL := help
test: ## Run all tests
$(RUNCMD) $(PYTHON) -m pytest -v tests
test-bench: ## Run benchmark/property tests
$(RUNCMD) $(PYTHON) -m pytest -v -m bench tests/bench
lint: ## Run ruff linter
$(RUNCMD) $(PYTHON) -m ruff check options_portfolio_backtester
typecheck: ## Run mypy type checker
$(RUNCMD) $(PYTHON) -m mypy options_portfolio_backtester --ignore-missing-imports
notebooks: ## Execute all notebooks
@for nb in notebooks/*.ipynb; do \
echo "Running $$nb..."; \
$(RUNCMD) $(PYTHON) -m jupyter nbconvert --to notebook --execute "$$nb" \
--output "$$(basename $$nb)" --ExecutePreprocessor.timeout=600 || true; \
done
rust-build: ## Build Rust extension with maturin (release)
$(RUNCMD) bash -c 'cd rust && maturin develop --manifest-path ob_python/Cargo.toml --release'
rust-test: ## Run Rust unit tests
$(RUNCMD) bash -c 'cd rust && cargo test'
rust-bench: ## Run Rust benchmarks (criterion)
$(RUNCMD) bash -c 'cd rust && cargo bench'
bench: rust-build ## Run Python benchmarks (requires Rust build)
$(RUNCMD) $(PYTHON) -m pytest tests/bench/ -v -m bench --benchmark-only 2>/dev/null || \
echo "Install pytest-benchmark for Python benchmarks"
install-dev: ## Install local dev deps into active nix dev environment
$(PYTHON) -m pip install -e '.[dev,charts,notebooks,rust]'
compare-bt: ## Compare stock-only monthly rebalance vs bt library
$(RUNCMD) $(PYTHON) scripts/compare_with_bt.py
benchmark-matrix: ## Run standardized runtime/accuracy matrix vs bt
$(RUNCMD) $(PYTHON) scripts/benchmark_matrix.py
walk-forward-report: ## Run walk-forward/OOS harness and save report
$(RUNCMD) $(PYTHON) scripts/walk_forward_report.py
bench-rust-vs-python: ## Benchmark Rust vs Python vs bt (options + stock-only)
$(RUNCMD) $(PYTHON) scripts/benchmark_rust_vs_python.py --stock-only
parity-gate: ## Run bt overlap parity CI gate (bench marker)
$(RUNCMD) $(PYTHON) -m pytest -v tests/compat/test_bt_overlap_gate.py -m bench
help:
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) |\
awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
================================================
FILE: README.md
================================================
Options Portfolio Backtester
============================
Backtest options strategies with realistic execution, Greeks-aware risk management, and contract-level inventory. Also handles equities and multi-asset portfolios. Optional Rust core for speed.
## Get started
### Install
With Nix:
```shell
nix develop
```
Without Nix (Python >= 3.12):
```shell
python -m venv .venv && source .venv/bin/activate
make install-dev
```
### Get data
```shell
python data/fetch_data.py all --symbols SPY
```
Downloads SPY stock prices and options chains to `data/processed/`. Supports 104+ symbols. See [`data/README.md`](data/README.md) for details.
### Run your first backtest
```python
from options_portfolio_backtester import (
BacktestEngine, Stock, Type, Direction,
HistoricalOptionsData, TiingoData,
Strategy, StrategyLeg,
NearestDelta, PerContractCommission,
RiskManager, MaxDelta, MaxDrawdown,
)
# Load data
options_data = HistoricalOptionsData("data/processed/options.csv")
stocks_data = TiingoData("data/processed/stocks.csv")
schema = options_data.schema
# Define strategy: buy OTM puts on SPY, exit when DTE drops below 30
strategy = Strategy(schema)
leg = StrategyLeg("leg_1", schema, option_type=Type.PUT, direction=Direction.BUY)
leg.entry_filter = (
(schema.underlying == "SPY")
& (schema.dte >= 60) & (schema.dte <= 120)
& (schema.delta >= -0.25) & (schema.delta <= -0.10)
)
leg.exit_filter = schema.dte <= 30
strategy.add_leg(leg)
# Run backtest: 97% stocks, 3% options
engine = BacktestEngine(
allocation={"stocks": 0.97, "options": 0.03, "cash": 0.0},
initial_capital=1_000_000,
cost_model=PerContractCommission(rate=0.65),
signal_selector=NearestDelta(target_delta=-0.20),
risk_manager=RiskManager([MaxDelta(100.0), MaxDrawdown(0.20)]),
)
engine.stocks = [Stock("SPY", 1.0)]
engine.stocks_data = stocks_data
engine.options_data = options_data
engine.options_strategy = strategy
engine.run(rebalance_freq=1)
# Results
print(engine.balance["total capital"].iloc[-1]) # final capital
print(len(engine.trade_log)) # number of trades
```
### Strategy presets
Instead of building legs manually:
```python
from options_portfolio_backtester import Strangle
strangle = Strangle(schema, "short", "SPY",
dte_entry_range=(30, 60), dte_exit=7,
otm_pct=5, pct_tolerance=1,
exit_thresholds=(0.2, 0.2))
```
Available presets: `Strangle`, `IronCondor`, `CoveredCall`, `CashSecuredPut`, `Collar`, `Butterfly`.
### Stock-only backtest with algo pipeline
For equity portfolios without options, use the pipeline API:
```python
from options_portfolio_backtester.engine.pipeline import (
AlgoPipelineBacktester,
RunMonthly, SelectAll, WeighInvVol, LimitWeights, Rebalance,
)
import pandas as pd
prices = pd.read_csv("data/processed/stocks.csv", parse_dates=["date"])
prices = prices.pivot(index="date", columns="symbol", values="adjClose")
bt = AlgoPipelineBacktester(
prices=prices,
initial_capital=1_000_000,
algos=[
RunMonthly(),
SelectAll(),
WeighInvVol(lookback=252),
LimitWeights(limit=0.25),
Rebalance(),
],
)
bt.run()
```
## Execution models
Every component is swappable. Pass them to `BacktestEngine(...)` or override per-leg.
**Signal selectors** — which contract to pick from candidates:
`FirstMatch()`, `NearestDelta(target)`, `MaxOpenInterest()`
**Cost models** — commissions and fees:
`NoCosts()`, `PerContractCommission(rate)`, `TieredCommission(tiers)`, `SpreadSlippage(pct)`
**Fill models** — execution price:
`MarketAtBidAsk()`, `MidPrice()`, `VolumeAwareFill(threshold)`
**Position sizers** — how many contracts:
`CapitalBased()`, `FixedQuantity(qty)`, `FixedDollar(amount)`, `PercentOfPortfolio(pct)`
**Risk constraints** — pre-trade gating:
`MaxDelta(limit)`, `MaxVega(limit)`, `MaxDrawdown(max_dd_pct)`
## Rebalancing model
At each rebalance date, the engine follows a **full liquidation** approach:
1. **Liquidate all options** — every open option position is sold at current market price (bid for long, ask for short)
2. **Compute total capital** — cash + stock value (options are zero after liquidation)
3. **Rebalance stocks** — sell all stocks, buy fresh at target allocation (e.g. 97%)
4. **Buy new options** — use the full options allocation (e.g. 3%) to purchase contracts matching entry criteria (DTE, delta, etc.)
This ensures:
- **Clean accounting** — no stale option value carried across rebalances, no money creation
- **Fresh positions** — every rebalance picks the best available contracts for current market conditions
- **Simple math** — `total_capital = cash + stocks` at the point of redeployment, no complex delta tracking
Between rebalance dates, positions are held (mark-to-market for balance tracking). If `check_exits_daily=True`, exit filters run daily but no new entries are made until the next rebalance.
For the **Spitznagel leverage** model (`options_budget` parameter), options are funded separately from the stock allocation so `{stocks: 1.0, options: 0.005}` means 100% equity + 0.5% put budget on top.
## Rust acceleration
Optional. Falls back to Python when not installed.
```shell
make rust-build
```
| Benchmark | Python | Rust |
|-----------|--------|------|
| Full options backtest (24.7M rows) | 10.0s | **4.2s** |
| Stock-only monthly rebalance | 3.7s | **0.6s** |
| Parallel grid sweep (100 configs) | — | **5-8x** faster (Rayon, bypasses GIL) |
## Data
```shell
# SPY stock + options data
python data/fetch_data.py all --symbols SPY
# Multiple symbols
python data/fetch_data.py all --symbols SPY IWM QQQ --start 2020-01-01 --end 2023-01-01
# FRED macro signals (VIX, GDP, Buffett Indicator, etc.)
python data/fetch_signals.py
# Convert OptionsDX format
python data/convert_optionsdx.py data/raw/spx_eod_2020.csv --output data/processed/spx_options.csv
```
You can also bring your own CSVs. Required columns:
- **Stocks**: `date`, `symbol`, `adjClose`
- **Options**: `quotedate`, `underlying`, `type`, `strike`, `expiration`, `dte`, `bid`, `ask`, `volume`, `openinterest`, `delta`
## Tests
```shell
make test # all tests (1300+)
make test-regression # regression snapshots (locked golden values)
make test-chaos # fault injection (corrupted/adversarial data)
make muttest # mutation testing on core modules
make lint # ruff
make typecheck # mypy
make rust-test # Rust unit tests
```
## Architecture
```
options_portfolio_backtester/
├── core/ # Types: Direction, OptionType, Greeks, Fill, Order
├── data/ # Schema DSL, CSV providers
├── strategy/ # Strategy, StrategyLeg, presets
├── execution/ # CostModel, FillModel, Sizer, SignalSelector
├── portfolio/ # Portfolio, OptionPosition, RiskManager
├── engine/ # BacktestEngine, AlgoPipelineBacktester, StrategyTreeEngine
└── analytics/ # BacktestStats, TradeLog, TearsheetReport, charts
rust/
├── ob_core/ # Backtest loop, stats, execution models, filter parser
└── ob_python/ # PyO3 bindings, parallel sweep, Arrow bridge
```
## Pipeline algos
40+ composable algos for the `AlgoPipelineBacktester`. All follow `__call__(ctx) -> StepDecision`.
**Scheduling**: `RunDaily`, `RunWeekly`, `RunMonthly`, `RunQuarterly`, `RunYearly`, `RunOnce`, `RunOnDate`, `RunAfterDate`, `RunAfterDays`, `RunEveryNPeriods`, `RunIfOutOfBounds`, `Or`, `Not`, `Require`
**Selection**: `SelectAll`, `SelectThese`, `SelectHasData`, `SelectN`, `SelectMomentum`, `SelectWhere`, `SelectRandomly`, `SelectActive`, `SelectRegex`
**Weighting**: `WeighEqually`, `WeighSpecified`, `WeighTarget`, `WeighInvVol`, `WeighMeanVar`, `WeighERC`, `TargetVol`, `WeighRandomly`
**Risk & rebalancing**: `LimitWeights`, `LimitDeltas`, `ScaleWeights`, `HedgeRisks`, `Margin`, `MaxDrawdownGuard`, `Rebalance`, `RebalanceOverTime`, `CapitalFlow`, `CloseDead`, `ClosePositionsAfterDates`, `ReplayTransactions`, `CouponPayingPosition`
## Research
Research notebooks and analysis: [finance_research](https://github.com/unbalancedparentheses/finance_research).
================================================
FILE: benchmarks/benchmark_large_pipeline.py
================================================
"""Large-scale performance benchmark: Rust vs Python on production data.
Runs the same strategy through Rust full-loop and Python BacktestEngine on
the full SPY options dataset (24.7M rows, 4500+ trading days) with frequent
rebalancing to produce thousands of trades.
Usage:
python scripts/benchmark_large_pipeline.py
python scripts/benchmark_large_pipeline.py --rebalance-freq 2 --runs 3
"""
from __future__ import annotations
import argparse
import gc
import os
import sys
import time
from dataclasses import dataclass, field
from pathlib import Path
import numpy as np
import pandas as pd
REPO_ROOT = Path(__file__).resolve().parents[1]
from options_portfolio_backtester import BacktestEngine as LegacyBacktest
from options_portfolio_backtester.data.providers import HistoricalOptionsData, TiingoData
from options_portfolio_backtester.core.types import Direction, Stock, OptionType as Type
from options_portfolio_backtester.strategy.strategy import Strategy
from options_portfolio_backtester.strategy.strategy_leg import StrategyLeg
from options_portfolio_backtester.engine.engine import BacktestEngine
from options_portfolio_backtester.engine._dispatch import use_rust
from options_portfolio_backtester.engine import _dispatch as _rust_dispatch
from options_portfolio_backtester.execution.cost_model import NoCosts
STOCKS_FILE = os.path.join(REPO_ROOT, "data", "processed", "stocks.csv")
OPTIONS_FILE = os.path.join(REPO_ROOT, "data", "processed", "options.csv")
@dataclass
class BenchResult:
name: str
runtime_s: float
final_capital: float
total_return_pct: float
n_trades: int
n_balance_rows: int
dispatch_mode: str
peak_mem_mb: float = 0.0
per_run_times: list[float] = field(default_factory=list)
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Large-scale Rust vs Python benchmark.")
p.add_argument("--runs", type=int, default=3,
help="Number of timing runs (default: 3).")
p.add_argument("--rebalance-freq", type=int, default=1,
help="Rebalance frequency in business months (1=monthly).")
p.add_argument("--dte-min", type=int, default=20,
help="Min DTE for entry filter.")
p.add_argument("--dte-max", type=int, default=60,
help="Max DTE for entry filter.")
p.add_argument("--dte-exit", type=int, default=10,
help="DTE threshold for exit.")
p.add_argument("--initial-capital", type=int, default=1_000_000,
help="Initial capital.")
p.add_argument("--options-pct", type=float, default=0.10,
help="Options allocation pct (0.10 = 10%%).")
return p.parse_args()
def _load_data():
print("Loading data...")
t0 = time.perf_counter()
stocks_data = TiingoData(STOCKS_FILE)
options_data = HistoricalOptionsData(OPTIONS_FILE)
load_time = time.perf_counter() - t0
n_opt = len(options_data._data)
n_stk = len(stocks_data._data)
n_dates = options_data._data["quotedate"].nunique()
print(f" Loaded in {load_time:.2f}s")
print(f" Options: {n_opt:,} rows, {n_dates:,} trading days")
print(f" Stocks: {n_stk:,} rows")
return stocks_data, options_data
def _strategy(schema, dte_min, dte_max, dte_exit):
strat = Strategy(schema)
leg = StrategyLeg("leg_1", schema, option_type=Type.PUT, direction=Direction.BUY)
leg.entry_filter = (
(schema.underlying == "SPY")
& (schema.dte >= dte_min)
& (schema.dte <= dte_max)
)
leg.exit_filter = schema.dte <= dte_exit
strat.add_legs([leg])
return strat
def _copy_data(stocks_data, options_data):
"""Deep-copy data handlers to avoid cross-run contamination."""
sd = TiingoData.__new__(TiingoData)
sd.__dict__.update(stocks_data.__dict__)
sd._data = stocks_data._data.copy()
od = HistoricalOptionsData.__new__(HistoricalOptionsData)
od.__dict__.update(options_data.__dict__)
od._data = options_data._data.copy()
return sd, od
def run_engine(
stocks_data, options_data, args, runs, force_python=False,
) -> BenchResult:
"""Run BacktestEngine. If force_python, temporarily disable Rust dispatch."""
label = "Python BacktestEngine" if force_python else "Rust BacktestEngine"
times = []
engine = None
for i in range(runs):
sd, od = _copy_data(stocks_data, options_data)
engine = BacktestEngine(
{"stocks": 1.0 - args.options_pct, "options": args.options_pct, "cash": 0.0},
cost_model=NoCosts(),
initial_capital=args.initial_capital,
)
engine.stocks = [Stock("SPY", 1.0)]
engine.stocks_data = sd
engine.options_data = od
engine.options_strategy = _strategy(od.schema, args.dte_min, args.dte_max, args.dte_exit)
gc.collect()
saved_rust = _rust_dispatch.RUST_AVAILABLE
if force_python:
_rust_dispatch.RUST_AVAILABLE = False
try:
t0 = time.perf_counter()
engine.run(rebalance_freq=args.rebalance_freq)
elapsed = time.perf_counter() - t0
finally:
_rust_dispatch.RUST_AVAILABLE = saved_rust
times.append(elapsed)
print(f" {label} run {i+1}/{runs}: {elapsed:.3f}s")
assert engine is not None
mode = engine.run_metadata.get("dispatch_mode", "unknown")
final = float(engine.balance["total capital"].iloc[-1])
n_trades = len(engine.trade_log) if not engine.trade_log.empty else 0
total_ret = (final / args.initial_capital - 1.0) * 100.0
return BenchResult(
name=label,
runtime_s=float(np.mean(times)),
final_capital=final,
total_return_pct=total_ret,
n_trades=n_trades,
n_balance_rows=len(engine.balance),
dispatch_mode=mode,
per_run_times=times,
)
def run_legacy(stocks_data, options_data, args, runs) -> BenchResult:
"""Run legacy Backtest class."""
times = []
bt = None
for i in range(runs):
sd, od = _copy_data(stocks_data, options_data)
bt = LegacyBacktest(
{"stocks": 1.0 - args.options_pct, "options": args.options_pct, "cash": 0.0},
initial_capital=args.initial_capital,
)
bt.stocks = [Stock("SPY", 1.0)]
bt.stocks_data = sd
bt.options_data = od
bt.options_strategy = _strategy(od.schema, args.dte_min, args.dte_max, args.dte_exit)
gc.collect()
t0 = time.perf_counter()
bt.run(rebalance_freq=args.rebalance_freq)
elapsed = time.perf_counter() - t0
times.append(elapsed)
print(f" Legacy Python run {i+1}/{runs}: {elapsed:.3f}s")
assert bt is not None
final = float(bt.balance["total capital"].iloc[-1])
n_trades = len(bt.trade_log) if not bt.trade_log.empty else 0
total_ret = (final / bt.initial_capital - 1.0) * 100.0
return BenchResult(
name="Legacy Python Backtest",
runtime_s=float(np.mean(times)),
final_capital=final,
total_return_pct=total_ret,
n_trades=n_trades,
n_balance_rows=len(bt.balance),
dispatch_mode="python-legacy",
per_run_times=times,
)
def print_result(r: BenchResult, indent: str = " ") -> None:
print(f"{indent}{r.name}")
print(f"{indent} dispatch: {r.dispatch_mode}")
print(f"{indent} avg runtime: {r.runtime_s:.3f}s")
print(f"{indent} per-run times: [{', '.join(f'{t:.3f}s' for t in r.per_run_times)}]")
print(f"{indent} final capital: ${r.final_capital:,.2f}")
print(f"{indent} total return: {r.total_return_pct:.4f}%")
print(f"{indent} trades: {r.n_trades:,}")
print(f"{indent} balance rows: {r.n_balance_rows:,}")
def print_comparison(a: BenchResult, b: BenchResult) -> None:
if a.runtime_s > 0:
speedup = b.runtime_s / a.runtime_s
else:
speedup = float("nan")
cap_delta = abs(a.final_capital - b.final_capital)
ret_delta = a.total_return_pct - b.total_return_pct
cap_pct = (cap_delta / max(a.final_capital, 1)) * 100
print(f" {a.name} vs {b.name}:")
print(f" speedup: {speedup:.2f}x ({a.name} is {'faster' if speedup > 1 else 'slower'})")
print(f" capital delta: ${cap_delta:,.2f} ({cap_pct:.4f}%)")
print(f" return delta: {ret_delta:+.4f} pct-pts")
if a.n_trades > 0 and b.n_trades > 0:
print(f" trade count: {a.n_trades:,} vs {b.n_trades:,} ({'match' if a.n_trades == b.n_trades else 'MISMATCH'})")
def main() -> None:
args = parse_args()
for f in (STOCKS_FILE, OPTIONS_FILE):
if not Path(f).exists():
print(f"ERROR: Missing data file: {f}")
print("Run this benchmark from the repo root with production data in data/processed/")
sys.exit(1)
print(f"\n{'='*65}")
print("Large-Scale Performance Benchmark: Rust vs Python")
print(f"{'='*65}")
print(f" Rust available: {use_rust()}")
print(f" runs per backend: {args.runs}")
print(f" rebalance freq: {args.rebalance_freq} BMS")
print(f" strategy: BUY PUT, DTE {args.dte_min}-{args.dte_max}, exit DTE <= {args.dte_exit}")
print(f" allocation: {(1-args.options_pct)*100:.0f}% stocks / {args.options_pct*100:.0f}% options")
print(f" initial capital: ${args.initial_capital:,}")
print()
stocks_data, options_data = _load_data()
print()
# -- Run all backends --
results = []
if use_rust():
print("Running Rust BacktestEngine...")
rust_result = run_engine(stocks_data, options_data, args, args.runs, force_python=False)
results.append(rust_result)
print()
print("Running Python BacktestEngine...")
python_result = run_engine(stocks_data, options_data, args, args.runs, force_python=True)
results.append(python_result)
print()
print("Running Legacy Python Backtest...")
legacy_result = run_legacy(stocks_data, options_data, args, args.runs)
results.append(legacy_result)
print()
# -- Report --
print(f"{'='*65}")
print("Results")
print(f"{'='*65}")
for r in results:
print_result(r)
print()
print(f"{'='*65}")
print("Comparisons")
print(f"{'='*65}")
if use_rust():
print_comparison(rust_result, python_result)
print()
print_comparison(rust_result, legacy_result)
print()
print_comparison(python_result, legacy_result)
print()
# -- Summary table --
print(f"{'='*65}")
print("Summary Table")
print(f"{'='*65}")
rows = []
for r in results:
rows.append({
"Backend": r.name,
"Dispatch": r.dispatch_mode,
"Avg Time (s)": f"{r.runtime_s:.3f}",
"Trades": f"{r.n_trades:,}",
"Final Capital": f"${r.final_capital:,.0f}",
"Return %": f"{r.total_return_pct:.2f}",
})
df = pd.DataFrame(rows)
print(df.to_string(index=False))
if use_rust() and rust_result.runtime_s > 0:
print(f"\n Rust speedup over Python Engine: {python_result.runtime_s / rust_result.runtime_s:.2f}x")
print(f" Rust speedup over Legacy: {legacy_result.runtime_s / rust_result.runtime_s:.2f}x")
print("\nDone.")
if __name__ == "__main__":
main()
================================================
FILE: benchmarks/benchmark_matrix.py
================================================
"""Standardized benchmark matrix for options_portfolio_backtester vs bt.
Runs multiple scenarios over date ranges/rebalance frequencies and writes
a CSV scorecard with runtime and parity metrics.
"""
from __future__ import annotations
import argparse
import sys
import time
from dataclasses import dataclass
from pathlib import Path
import numpy as np
import pandas as pd
REPO_ROOT = Path(__file__).resolve().parents[1]
from options_portfolio_backtester import BacktestEngine as Backtest
from options_portfolio_backtester.data.providers import TiingoData
from options_portfolio_backtester.core.types import Stock
@dataclass(frozen=True)
class Scenario:
label: str
start: pd.Timestamp
end: pd.Timestamp
rebalance_months: int
initial_capital: float
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Benchmark matrix vs bt.")
p.add_argument("--stocks-file", default="data/processed/stocks.csv")
p.add_argument("--symbols", default="SPY")
p.add_argument("--weights", default=None)
p.add_argument("--date-ranges", default="2008-01-01:2025-12-12,2016-01-01:2025-12-12")
p.add_argument("--rebalance-months", default="1,3")
p.add_argument("--initial-capitals", default="1000000")
p.add_argument("--runs", type=int, default=3)
p.add_argument("--output", default="data/processed/benchmark_matrix.csv")
return p.parse_args()
def parse_csv_list(s: str, cast):
return [cast(x.strip()) for x in s.split(",") if x.strip()]
def normalize_weights(symbols: list[str], raw_weights: str | None) -> list[float]:
if raw_weights is None:
return [1.0 / len(symbols)] * len(symbols)
vals = [float(x) for x in raw_weights.split(",")]
if len(vals) != len(symbols):
raise ValueError("--weights length must match --symbols length")
total = float(sum(vals))
if total <= 0:
raise ValueError("--weights must sum to > 0")
return [v / total for v in vals]
def compute_metrics(total_capital: pd.Series) -> tuple[float, float, float, float, float]:
total_capital = total_capital.dropna()
if total_capital.empty:
return 0.0, 0.0, 0.0, 0.0, 0.0
rets = total_capital.pct_change().dropna()
total_return = total_capital.iloc[-1] / total_capital.iloc[0] - 1.0
n_years = len(total_capital) / 252.0
cagr = (total_capital.iloc[-1] / total_capital.iloc[0]) ** (1.0 / n_years) - 1.0 if n_years > 0 else 0.0
peak = total_capital.cummax()
dd = total_capital / peak - 1.0
max_dd = float(dd.min()) if not dd.empty else 0.0
vol = float(rets.std(ddof=1) * np.sqrt(252)) if len(rets) > 1 else 0.0
sharpe = float((rets.mean() / rets.std(ddof=1)) * np.sqrt(252)) if len(rets) > 1 and rets.std(ddof=1) > 0 else 0.0
return total_return, cagr, max_dd, vol, sharpe
def slice_stocks_data(stocks_file: str, start: pd.Timestamp, end: pd.Timestamp) -> TiingoData:
d = TiingoData(stocks_file)
m = (d._data["date"] >= start) & (d._data["date"] <= end)
d._data = d._data.loc[m].copy()
d.start_date = d._data["date"].min()
d.end_date = d._data["date"].max()
return d
def run_options_portfolio_backtester(
stocks_file: str,
symbols: list[str],
weights: list[float],
scenario: Scenario,
runs: int,
) -> tuple[dict[str, float], pd.Series]:
stocks = [Stock(sym, w) for sym, w in zip(symbols, weights)]
runtimes = []
last_eq = pd.Series(dtype=float)
for _ in range(runs):
stocks_data = slice_stocks_data(stocks_file, scenario.start, scenario.end)
bt = Backtest({"stocks": 1.0, "options": 0.0, "cash": 0.0}, initial_capital=int(scenario.initial_capital))
bt.stocks = stocks
bt.stocks_data = stocks_data
t0 = time.perf_counter()
bt.run(rebalance_freq=scenario.rebalance_months, rebalance_unit="BMS")
runtimes.append(time.perf_counter() - t0)
last_eq = bt.balance["total capital"].dropna()
tr, cagr, mdd, vol, sharpe = compute_metrics(last_eq)
return ({
"ob_runtime_s": float(np.mean(runtimes)),
"ob_total_return_pct": tr * 100.0,
"ob_cagr_pct": cagr * 100.0,
"ob_max_drawdown_pct": mdd * 100.0,
"ob_vol_annual_pct": vol * 100.0,
"ob_sharpe": sharpe,
"ob_rows": float(len(last_eq)),
}, last_eq)
def run_bt(
stocks_file: str,
symbols: list[str],
weights: list[float],
scenario: Scenario,
runs: int,
) -> tuple[dict[str, float], pd.Series | None]:
try:
import bt # type: ignore
except Exception:
return ({"bt_available": 0.0}, None)
prices = pd.read_csv(stocks_file, parse_dates=["date"])
m = (prices["date"] >= scenario.start) & (prices["date"] <= scenario.end) & (prices["symbol"].isin(symbols))
prices = prices.loc[m].copy()
px = prices.pivot(index="date", columns="symbol", values="adjClose").sort_index().dropna()
px = px[symbols]
runtimes = []
last_eq = None
for _ in range(runs):
algos = [
bt.algos.RunMonthly(),
bt.algos.SelectThese(symbols),
bt.algos.WeighSpecified(**{s: w for s, w in zip(symbols, weights)}),
bt.algos.Rebalance(),
]
test = bt.Backtest(bt.Strategy("bench_matrix", algos), px, initial_capital=scenario.initial_capital)
t0 = time.perf_counter()
res = bt.run(test)
runtimes.append(time.perf_counter() - t0)
last_eq = res.prices.iloc[:, 0]
assert last_eq is not None
tr, cagr, mdd, vol, sharpe = compute_metrics(last_eq)
return ({
"bt_available": 1.0,
"bt_runtime_s": float(np.mean(runtimes)),
"bt_total_return_pct": tr * 100.0,
"bt_cagr_pct": cagr * 100.0,
"bt_max_drawdown_pct": mdd * 100.0,
"bt_vol_annual_pct": vol * 100.0,
"bt_sharpe": sharpe,
"bt_rows": float(len(last_eq)),
}, last_eq)
def overlap_parity(ob_eq: pd.Series, bt_eq: pd.Series | None) -> dict[str, float]:
if bt_eq is None:
return {"overlap_rows": 0.0, "overlap_end_delta": np.nan, "overlap_max_abs_delta": np.nan}
common = ob_eq.index.intersection(bt_eq.index)
if len(common) == 0:
return {"overlap_rows": 0.0, "overlap_end_delta": np.nan, "overlap_max_abs_delta": np.nan}
ob_n = ob_eq.loc[common] / ob_eq.loc[common].iloc[0]
bt_n = bt_eq.loc[common] / bt_eq.loc[common].iloc[0]
d = ob_n - bt_n
return {
"overlap_rows": float(len(common)),
"overlap_end_delta": float(d.iloc[-1]),
"overlap_max_abs_delta": float(d.abs().max()),
}
def build_scenarios(args: argparse.Namespace) -> list[Scenario]:
date_ranges = []
for chunk in args.date_ranges.split(","):
chunk = chunk.strip()
if not chunk:
continue
s, e = chunk.split(":")
date_ranges.append((pd.Timestamp(s), pd.Timestamp(e)))
rebal = parse_csv_list(args.rebalance_months, int)
capitals = parse_csv_list(args.initial_capitals, float)
scenarios = []
idx = 1
for s, e in date_ranges:
for r in rebal:
for c in capitals:
scenarios.append(Scenario(
label=f"S{idx}",
start=s,
end=e,
rebalance_months=r,
initial_capital=c,
))
idx += 1
return scenarios
def main() -> None:
args = parse_args()
symbols = [s.strip().upper() for s in args.symbols.split(",") if s.strip()]
if not symbols:
raise ValueError("No symbols provided")
weights = normalize_weights(symbols, args.weights)
scenarios = build_scenarios(args)
rows = []
for sc in scenarios:
ob_stats, ob_eq = run_options_portfolio_backtester(
stocks_file=args.stocks_file,
symbols=symbols,
weights=weights,
scenario=sc,
runs=args.runs,
)
bt_stats, bt_eq = run_bt(
stocks_file=args.stocks_file,
symbols=symbols,
weights=weights,
scenario=sc,
runs=args.runs,
)
parity = overlap_parity(ob_eq, bt_eq)
row = {
"scenario": sc.label,
"start": sc.start.date().isoformat(),
"end": sc.end.date().isoformat(),
"rebalance_months": sc.rebalance_months,
"initial_capital": sc.initial_capital,
"symbols": ",".join(symbols),
"weights": ",".join(f"{w:.6f}" for w in weights),
**ob_stats,
**bt_stats,
**parity,
}
if bt_stats.get("bt_available", 0.0) == 1.0:
row["speed_ratio_bt_over_ob"] = row["bt_runtime_s"] / row["ob_runtime_s"] if row["ob_runtime_s"] > 0 else np.nan
row["return_delta_pct_pts"] = row["ob_total_return_pct"] - row["bt_total_return_pct"]
row["maxdd_delta_pct_pts"] = row["ob_max_drawdown_pct"] - row["bt_max_drawdown_pct"]
else:
row["speed_ratio_bt_over_ob"] = np.nan
row["return_delta_pct_pts"] = np.nan
row["maxdd_delta_pct_pts"] = np.nan
rows.append(row)
out = pd.DataFrame(rows).sort_values(["start", "rebalance_months", "initial_capital"])
out_path = Path(args.output)
out_path.parent.mkdir(parents=True, exist_ok=True)
out.to_csv(out_path, index=False)
print("\n=== Benchmark Matrix Summary ===")
print(f"scenarios: {len(out)}")
print(f"output: {out_path}")
cols = [
"scenario", "start", "end", "rebalance_months",
"ob_runtime_s", "bt_runtime_s", "speed_ratio_bt_over_ob",
"return_delta_pct_pts", "maxdd_delta_pct_pts", "overlap_max_abs_delta",
]
print(out[cols].to_string(index=False))
if __name__ == "__main__":
main()
================================================
FILE: benchmarks/benchmark_rust_vs_python.py
================================================
"""Benchmark: Rust full-loop vs Python BacktestEngine vs legacy Backtest vs bt.
Runs options backtest (with options data) through Rust and Python paths, plus
a stock-only comparison against bt if installed.
Usage:
python scripts/benchmark_rust_vs_python.py
python scripts/benchmark_rust_vs_python.py --runs 5 --stock-only
"""
from __future__ import annotations
import argparse
import os
import sys
import time
from dataclasses import dataclass
from pathlib import Path
import numpy as np
import pandas as pd
REPO_ROOT = Path(__file__).resolve().parents[1]
from options_portfolio_backtester import BacktestEngine as LegacyBacktest
from options_portfolio_backtester.data.providers import HistoricalOptionsData, TiingoData
from options_portfolio_backtester.core.types import Direction, Stock, OptionType as Type
from options_portfolio_backtester.strategy.strategy import Strategy
from options_portfolio_backtester.strategy.strategy_leg import StrategyLeg
from options_portfolio_backtester.engine.engine import BacktestEngine
from options_portfolio_backtester.engine._dispatch import use_rust
from options_portfolio_backtester.engine import _dispatch as _rust_dispatch
from options_portfolio_backtester.execution.cost_model import NoCosts
from options_portfolio_backtester.execution.fill_model import MarketAtBidAsk
from options_portfolio_backtester.execution.signal_selector import FirstMatch
TEST_DIR = os.path.join(REPO_ROOT, "backtester", "test")
STOCKS_FILE = os.path.join(TEST_DIR, "test_data", "ivy_5assets_data.csv")
OPTIONS_FILE = os.path.join(TEST_DIR, "test_data", "options_data.csv")
PROD_STOCKS_FILE = os.path.join(REPO_ROOT, "data", "processed", "stocks.csv")
PROD_OPTIONS_FILE = os.path.join(REPO_ROOT, "data", "processed", "options.csv")
@dataclass
class BenchResult:
name: str
runtime_s: float
final_capital: float
total_return_pct: float
n_trades: int
dispatch_mode: str
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Benchmark Rust vs Python backtest paths.")
p.add_argument("--runs", type=int, default=3, help="Timing averaging repeats.")
p.add_argument("--stock-only", action="store_true", help="Also run stock-only comparison vs bt.")
p.add_argument("--use-prod-data", action="store_true", help="Use production data files if available.")
p.add_argument("--rebalance-freq", type=int, default=1, help="Rebalance frequency.")
return p.parse_args()
def _stocks(use_prod: bool = False):
if use_prod:
return [Stock("SPY", 1.0)]
return [Stock("VTI", 0.2), Stock("VEU", 0.2), Stock("BND", 0.2),
Stock("VNQ", 0.2), Stock("DBC", 0.2)]
def _load_data(use_prod: bool):
if use_prod and Path(PROD_STOCKS_FILE).exists() and Path(PROD_OPTIONS_FILE).exists():
stocks_file, options_file = PROD_STOCKS_FILE, PROD_OPTIONS_FILE
else:
stocks_file, options_file = STOCKS_FILE, OPTIONS_FILE
stocks_data = TiingoData(stocks_file)
options_data = HistoricalOptionsData(options_file)
if stocks_file == STOCKS_FILE:
stocks_data._data["adjClose"] = 10
options_data._data.at[2, "ask"] = 1
options_data._data.at[2, "bid"] = 0.5
options_data._data.at[51, "ask"] = 1.5
options_data._data.at[50, "bid"] = 0.5
options_data._data.at[130, "bid"] = 0.5
options_data._data.at[131, "bid"] = 1.5
options_data._data.at[206, "bid"] = 0.5
options_data._data.at[207, "bid"] = 1.5
return stocks_data, options_data, stocks_file
def _buy_strategy(schema):
strat = Strategy(schema)
leg = StrategyLeg("leg_1", schema, option_type=Type.PUT, direction=Direction.BUY)
leg.entry_filter = (schema.underlying == "SPX") & (schema.dte >= 60)
leg.exit_filter = schema.dte <= 30
strat.add_legs([leg])
return strat
# ---------------------------------------------------------------------------
# Runners
# ---------------------------------------------------------------------------
def run_engine_python(stocks_data, options_data, stocks, rebalance_freq, runs) -> BenchResult:
"""Force Python path by temporarily disabling Rust dispatch."""
times = []
engine = None
for _ in range(runs):
sd = TiingoData.__new__(TiingoData)
sd.__dict__.update(stocks_data.__dict__)
sd._data = stocks_data._data.copy()
od = HistoricalOptionsData.__new__(HistoricalOptionsData)
od.__dict__.update(options_data.__dict__)
od._data = options_data._data.copy()
engine = BacktestEngine(
{"stocks": 0.97, "options": 0.03, "cash": 0},
cost_model=NoCosts(),
)
engine.stocks = stocks
engine.stocks_data = sd
engine.options_data = od
engine.options_strategy = _buy_strategy(od.schema)
saved_rust = _rust_dispatch.RUST_AVAILABLE
_rust_dispatch.RUST_AVAILABLE = False
try:
t0 = time.perf_counter()
engine.run(rebalance_freq=rebalance_freq)
times.append(time.perf_counter() - t0)
finally:
_rust_dispatch.RUST_AVAILABLE = saved_rust
assert engine is not None
final = float(engine.balance["total capital"].iloc[-1])
n_trades = len(engine.trade_log) if not engine.trade_log.empty else 0
total_ret = (final / engine.initial_capital - 1) * 100
return BenchResult(
name="Python BacktestEngine",
runtime_s=float(np.mean(times)),
final_capital=final,
total_return_pct=total_ret,
n_trades=n_trades,
dispatch_mode="python",
)
def run_engine_rust(stocks_data, options_data, stocks, rebalance_freq, runs) -> BenchResult | None:
"""Let Rust dispatch happen naturally (default path)."""
if not use_rust():
return None
times = []
engine = None
for _ in range(runs):
sd = TiingoData.__new__(TiingoData)
sd.__dict__.update(stocks_data.__dict__)
sd._data = stocks_data._data.copy()
od = HistoricalOptionsData.__new__(HistoricalOptionsData)
od.__dict__.update(options_data.__dict__)
od._data = options_data._data.copy()
engine = BacktestEngine(
{"stocks": 0.97, "options": 0.03, "cash": 0},
cost_model=NoCosts(),
)
engine.stocks = stocks
engine.stocks_data = sd
engine.options_data = od
engine.options_strategy = _buy_strategy(od.schema)
t0 = time.perf_counter()
engine.run(rebalance_freq=rebalance_freq)
times.append(time.perf_counter() - t0)
assert engine is not None
mode = engine.run_metadata.get("dispatch_mode", "unknown")
final = float(engine.balance["total capital"].iloc[-1])
n_trades = len(engine.trade_log) if not engine.trade_log.empty else 0
total_ret = (final / engine.initial_capital - 1) * 100
return BenchResult(
name="Rust BacktestEngine",
runtime_s=float(np.mean(times)),
final_capital=final,
total_return_pct=total_ret,
n_trades=n_trades,
dispatch_mode=mode,
)
def run_legacy_python(stocks_data, options_data, stocks, rebalance_freq, runs) -> BenchResult:
"""Legacy Backtest class."""
times = []
bt = None
for _ in range(runs):
sd = TiingoData.__new__(TiingoData)
sd.__dict__.update(stocks_data.__dict__)
sd._data = stocks_data._data.copy()
od = HistoricalOptionsData.__new__(HistoricalOptionsData)
od.__dict__.update(options_data.__dict__)
od._data = options_data._data.copy()
bt = LegacyBacktest({"stocks": 0.97, "options": 0.03, "cash": 0})
bt.stocks = stocks
bt.stocks_data = sd
bt.options_data = od
bt.options_strategy = _buy_strategy(od.schema)
t0 = time.perf_counter()
bt.run(rebalance_freq=rebalance_freq)
times.append(time.perf_counter() - t0)
assert bt is not None
final = float(bt.balance["total capital"].iloc[-1])
n_trades = len(bt.trade_log) if not bt.trade_log.empty else 0
total_ret = (final / bt.initial_capital - 1) * 100
return BenchResult(
name="Legacy Python Backtest",
runtime_s=float(np.mean(times)),
final_capital=final,
total_return_pct=total_ret,
n_trades=n_trades,
dispatch_mode="python-legacy",
)
def run_bt_stock_only(stocks_file, symbols, weights, initial_capital, runs) -> BenchResult | None:
"""bt library stock-only benchmark."""
try:
import bt
except Exception:
return None
prices = pd.read_csv(stocks_file, parse_dates=["date"])
prices = prices[prices["symbol"].isin(symbols)].copy()
px = prices.pivot(index="date", columns="symbol", values="adjClose").sort_index().dropna()
px = px[symbols]
times = []
last_res = None
for _ in range(runs):
algos = [
bt.algos.RunMonthly(),
bt.algos.SelectThese(symbols),
bt.algos.WeighSpecified(**dict(zip(symbols, weights))),
bt.algos.Rebalance(),
]
strat = bt.Strategy("bench", algos)
test = bt.Backtest(strat, px, initial_capital=initial_capital)
t0 = time.perf_counter()
last_res = bt.run(test)
times.append(time.perf_counter() - t0)
assert last_res is not None
series = last_res.prices.iloc[:, 0]
# bt normalizes NAV to start at initial_capital
final = float(series.iloc[-1])
start = float(series.iloc[0])
total_ret = (final / start - 1) * 100
return BenchResult(
name="bt library",
runtime_s=float(np.mean(times)),
final_capital=final,
total_return_pct=total_ret,
n_trades=0,
dispatch_mode="bt",
)
def run_ob_stock_only(stocks_file, symbols, weights, initial_capital, runs) -> BenchResult:
"""options_portfolio_backtester stock-only benchmark."""
stocks = [Stock(sym, w) for sym, w in zip(symbols, weights)]
times = []
bt_obj = None
for _ in range(runs):
stocks_data = TiingoData(stocks_file)
bt_obj = LegacyBacktest({"stocks": 1.0, "options": 0.0, "cash": 0.0},
initial_capital=int(initial_capital))
bt_obj.stocks = stocks
bt_obj.stocks_data = stocks_data
t0 = time.perf_counter()
bt_obj.run(rebalance_freq=1, rebalance_unit="BMS")
times.append(time.perf_counter() - t0)
assert bt_obj is not None
bal = bt_obj.balance["total capital"].dropna()
final = float(bal.iloc[-1])
total_ret = (final / initial_capital - 1) * 100
return BenchResult(
name="options_portfolio_backtester (stock-only)",
runtime_s=float(np.mean(times)),
final_capital=final,
total_return_pct=total_ret,
n_trades=0,
dispatch_mode="python-legacy-stock-only",
)
# ---------------------------------------------------------------------------
# Display
# ---------------------------------------------------------------------------
def print_result(r: BenchResult) -> None:
print(f" {r.name}")
print(f" dispatch: {r.dispatch_mode}")
print(f" runtime: {r.runtime_s:.4f}s")
print(f" final_capital: {r.final_capital:,.2f}")
print(f" total_return: {r.total_return_pct:.4f}%")
print(f" n_trades: {r.n_trades}")
def print_comparison(a: BenchResult, b: BenchResult) -> None:
speedup = b.runtime_s / a.runtime_s if a.runtime_s > 0 else float("nan")
cap_delta = abs(a.final_capital - b.final_capital)
ret_delta = a.total_return_pct - b.total_return_pct
print(f" {a.name} vs {b.name}:")
print(f" speedup: {speedup:.2f}x ({a.name} is {'faster' if speedup > 1 else 'slower'})")
print(f" capital delta: ${cap_delta:,.2f}")
print(f" return delta: {ret_delta:+.4f} pct-pts")
if a.n_trades > 0 and b.n_trades > 0:
print(f" trades match: {a.n_trades == b.n_trades} ({a.n_trades} vs {b.n_trades})")
def main() -> None:
args = parse_args()
stocks_data, options_data, stocks_file = _load_data(args.use_prod_data)
stocks = _stocks(use_prod=args.use_prod_data)
print(f"\n{'='*60}")
print("Benchmark: Rust vs Python vs Legacy")
print(f"{'='*60}")
print(f" Rust available: {use_rust()}")
print(f" runs per backend: {args.runs}")
print(f" rebalance_freq: {args.rebalance_freq}")
print(f" data: {'production' if args.use_prod_data else 'test'}")
print()
# -- Options backtest benchmarks --
print("--- Options Backtest (with options data) ---")
results = []
legacy = run_legacy_python(stocks_data, options_data, stocks, args.rebalance_freq, args.runs)
results.append(legacy)
print_result(legacy)
python_engine = run_engine_python(stocks_data, options_data, stocks, args.rebalance_freq, args.runs)
results.append(python_engine)
print_result(python_engine)
rust_engine = run_engine_rust(stocks_data, options_data, stocks, args.rebalance_freq, args.runs)
if rust_engine:
results.append(rust_engine)
print_result(rust_engine)
else:
print(" Rust BacktestEngine: SKIPPED (Rust not available)")
print()
print("--- Comparisons ---")
if rust_engine:
print_comparison(rust_engine, python_engine)
print_comparison(rust_engine, legacy)
print_comparison(python_engine, legacy)
# -- Stock-only benchmarks --
if args.stock_only and Path(PROD_STOCKS_FILE).exists():
print()
print("--- Stock-Only Monthly Rebalance (vs bt) ---")
symbols = ["SPY"]
weights = [1.0]
capital = 1_000_000.0
ob_stock = run_ob_stock_only(PROD_STOCKS_FILE, symbols, weights, capital, args.runs)
print_result(ob_stock)
bt_res = run_bt_stock_only(PROD_STOCKS_FILE, symbols, weights, capital, args.runs)
if bt_res:
print_result(bt_res)
print()
print_comparison(ob_stock, bt_res)
else:
print(" bt: SKIPPED (not installed)")
print()
print("Done.")
if __name__ == "__main__":
main()
================================================
FILE: benchmarks/benchmark_sweep.py
================================================
"""Benchmark: Rust parallel_sweep vs Python sequential grid search.
This is the PRIMARY benchmark for justifying the Rust backend.
Single backtests have Pandas<->Polars conversion overhead, but
parallel_sweep amortizes that cost over N grid points and runs
all backtests on Rayon threads (no GIL, no pickle, zero-copy data).
Usage:
python scripts/benchmark_sweep.py
python scripts/benchmark_sweep.py --grid-sizes 10 50 100 --runs 3
"""
from __future__ import annotations
import argparse
import gc
import math
import os
import sys
import time
from pathlib import Path
import numpy as np
import pandas as pd
import polars as pl
REPO_ROOT = Path(__file__).resolve().parents[1]
from options_portfolio_backtester.data.providers import HistoricalOptionsData, TiingoData
from options_portfolio_backtester.core.types import Direction, Stock, OptionType as Type
from options_portfolio_backtester.strategy.strategy import Strategy
from options_portfolio_backtester.strategy.strategy_leg import StrategyLeg
from options_portfolio_backtester.engine.engine import BacktestEngine
from options_portfolio_backtester.engine._dispatch import use_rust, rust
from options_portfolio_backtester.engine import _dispatch as _rust_dispatch
from options_portfolio_backtester.execution.cost_model import NoCosts
TEST_DIR = os.path.join(REPO_ROOT, "backtester", "test")
STOCKS_FILE = os.path.join(TEST_DIR, "test_data", "ivy_5assets_data.csv")
OPTIONS_FILE = os.path.join(TEST_DIR, "test_data", "options_data.csv")
PROD_STOCKS_FILE = os.path.join(REPO_ROOT, "data", "processed", "stocks.csv")
PROD_OPTIONS_FILE = os.path.join(REPO_ROOT, "data", "processed", "options.csv")
def parse_args():
p = argparse.ArgumentParser(description="Benchmark Rust parallel_sweep vs Python sequential")
p.add_argument("--grid-sizes", nargs="+", type=int, default=[5, 10, 25, 50],
help="Grid sizes to test (number of parameter combos)")
p.add_argument("--runs", type=int, default=2, help="Timing runs per grid size")
p.add_argument("--use-prod-data", action="store_true", help="Use production data")
return p.parse_args()
def _load_data(use_prod: bool):
if use_prod and Path(PROD_STOCKS_FILE).exists() and Path(PROD_OPTIONS_FILE).exists():
sf, of = PROD_STOCKS_FILE, PROD_OPTIONS_FILE
else:
sf, of = STOCKS_FILE, OPTIONS_FILE
stocks_data = TiingoData(sf)
options_data = HistoricalOptionsData(of)
if sf == STOCKS_FILE:
stocks_data._data["adjClose"] = 10
options_data._data.at[2, "ask"] = 1
options_data._data.at[2, "bid"] = 0.5
options_data._data.at[51, "ask"] = 1.5
options_data._data.at[50, "bid"] = 0.5
options_data._data.at[130, "bid"] = 0.5
options_data._data.at[131, "bid"] = 1.5
options_data._data.at[206, "bid"] = 0.5
options_data._data.at[207, "bid"] = 1.5
return stocks_data, options_data, sf
def _build_param_grid(n: int, underlying: str = "SPX") -> list[dict]:
"""Generate n parameter override dicts varying DTE thresholds.
Returns list of dicts with both Rust filter strings AND raw dte values
so both the Rust and Python paths can use the same grid.
"""
dte_mins = np.linspace(20, 90, max(int(n**0.5), 2)).astype(int)
dte_exits = np.linspace(5, 45, max(int(n / len(dte_mins)) + 1, 2)).astype(int)
grid = []
for dmin in dte_mins:
for dex in dte_exits:
if len(grid) >= n:
break
grid.append({
"label": f"dte_min={dmin}_exit={dex}",
"leg_entry_filters": [
f"(underlying == '{underlying}') & (dte >= {dmin})",
],
"leg_exit_filters": [
f"dte <= {dex}",
],
# Raw params for Python path
"_dte_min": int(dmin),
"_dte_exit": int(dex),
"_underlying": underlying,
})
if len(grid) >= n:
break
return grid[:n]
def _build_rust_config(stocks_data, options_data, stocks, underlying="SPX"):
"""Build the config dict for rust.parallel_sweep."""
schema = options_data.schema
strat = Strategy(schema)
leg = StrategyLeg("leg_1", schema, option_type=Type.PUT, direction=Direction.BUY)
leg.entry_filter = (schema.underlying == underlying) & (schema.dte >= 60)
leg.exit_filter = schema.dte <= 30
strat.add_legs([leg])
date_fmt = "%Y-%m-%d %H:%M:%S"
dates_df = (
pd.DataFrame(options_data._data[["quotedate", "volume"]])
.drop_duplicates("quotedate")
.set_index("quotedate")
)
rebalancing_days = pd.to_datetime(
dates_df.groupby(pd.Grouper(freq="1BMS"))
.apply(lambda x: x.index.min())
.values
)
rb_dates = [d.strftime(date_fmt) for d in rebalancing_days]
config = {
"allocation": {"stocks": 0.97, "options": 0.03, "cash": 0.0},
"initial_capital": 1_000_000.0,
"shares_per_contract": 100,
"rebalance_dates": rb_dates,
"legs": [{
"name": leg.name,
"entry_filter": leg.entry_filter.query,
"exit_filter": leg.exit_filter.query,
"direction": leg.direction.value,
"type": leg.type.value,
"entry_sort_col": None,
"entry_sort_asc": True,
}],
"profit_pct": None,
"loss_pct": None,
"stocks": [(s.symbol, s.percentage) for s in stocks],
}
stocks_schema = stocks_data.schema
opts_schema = options_data.schema
schema_mapping = {
"contract": opts_schema["contract"],
"date": opts_schema["date"],
"stocks_date": stocks_schema["date"],
"stocks_symbol": stocks_schema["symbol"],
"stocks_price": stocks_schema["adjClose"],
"underlying": opts_schema["underlying"],
"expiration": opts_schema["expiration"],
"type": opts_schema["type"],
"strike": opts_schema["strike"],
}
# Convert datetime columns
opts_copy = options_data._data.copy()
for c in [opts_schema["date"], opts_schema["expiration"]]:
if c in opts_copy.columns and pd.api.types.is_datetime64_any_dtype(opts_copy[c]):
opts_copy[c] = opts_copy[c].dt.strftime(date_fmt)
stocks_copy = stocks_data._data.copy()
sc = stocks_schema["date"]
if sc in stocks_copy.columns and pd.api.types.is_datetime64_any_dtype(stocks_copy[sc]):
stocks_copy[sc] = stocks_copy[sc].dt.strftime(date_fmt)
opts_pl = pl.from_pandas(opts_copy)
stocks_pl = pl.from_pandas(stocks_copy)
return config, schema_mapping, opts_pl, stocks_pl, strat
def run_rust_sweep(opts_pl, stocks_pl, config, schema_mapping, param_grid, runs):
"""Run Rust parallel_sweep."""
times = []
last_results = None
for _ in range(runs):
gc.collect()
t0 = time.perf_counter()
results = rust.parallel_sweep(
opts_pl, stocks_pl, config, schema_mapping, param_grid, None,
)
elapsed = time.perf_counter() - t0
times.append(elapsed)
last_results = results
return times, last_results
def run_python_sequential(stocks_data, options_data, stocks, param_grid, runs, underlying="SPX"):
"""Run sequential Python backtests for same grid."""
times = []
last_results = None
for _ in range(runs):
gc.collect()
t0 = time.perf_counter()
results = []
for params in param_grid:
sd = TiingoData.__new__(TiingoData)
sd.__dict__.update(stocks_data.__dict__)
sd._data = stocks_data._data.copy()
od = HistoricalOptionsData.__new__(HistoricalOptionsData)
od.__dict__.update(options_data.__dict__)
od._data = options_data._data.copy()
schema = od.schema
strat = Strategy(schema)
leg = StrategyLeg("leg_1", schema, option_type=Type.PUT, direction=Direction.BUY)
# Construct filters from raw DTE params
dte_min = params.get("_dte_min", 60)
dte_exit = params.get("_dte_exit", 30)
und = params.get("_underlying", underlying)
leg.entry_filter = (schema.underlying == und) & (schema.dte >= dte_min)
leg.exit_filter = schema.dte <= dte_exit
strat.add_legs([leg])
engine = BacktestEngine(
{"stocks": 0.97, "options": 0.03, "cash": 0},
cost_model=NoCosts(),
)
engine.stocks = stocks
engine.stocks_data = sd
engine.options_data = od
engine.options_strategy = strat
saved = _rust_dispatch.RUST_AVAILABLE
_rust_dispatch.RUST_AVAILABLE = False
try:
engine.run(rebalance_freq=1)
finally:
_rust_dispatch.RUST_AVAILABLE = saved
final = float(engine.balance["total capital"].iloc[-1])
n_trades = len(engine.trade_log) if not engine.trade_log.empty else 0
results.append({
"label": params.get("label", ""),
"final_capital": final,
"total_trades": n_trades,
})
elapsed = time.perf_counter() - t0
times.append(elapsed)
last_results = results
return times, last_results
def run_rust_single(opts_pl, stocks_pl, config, schema_mapping, runs):
"""Run a single Rust backtest (for overhead measurement)."""
times = []
for _ in range(runs):
gc.collect()
t0 = time.perf_counter()
rust.run_backtest_py(opts_pl, stocks_pl, config, schema_mapping)
elapsed = time.perf_counter() - t0
times.append(elapsed)
return times
def run_python_single(stocks_data, options_data, stocks, runs, underlying="SPX"):
"""Run a single Python backtest."""
times = []
for _ in range(runs):
sd = TiingoData.__new__(TiingoData)
sd.__dict__.update(stocks_data.__dict__)
sd._data = stocks_data._data.copy()
od = HistoricalOptionsData.__new__(HistoricalOptionsData)
od.__dict__.update(options_data.__dict__)
od._data = options_data._data.copy()
engine = BacktestEngine(
{"stocks": 0.97, "options": 0.03, "cash": 0},
cost_model=NoCosts(),
)
engine.stocks = stocks
engine.stocks_data = sd
engine.options_data = od
schema = od.schema
strat = Strategy(schema)
leg = StrategyLeg("leg_1", schema, option_type=Type.PUT, direction=Direction.BUY)
leg.entry_filter = (schema.underlying == underlying) & (schema.dte >= 60)
leg.exit_filter = schema.dte <= 30
strat.add_legs([leg])
engine.options_strategy = strat
saved = _rust_dispatch.RUST_AVAILABLE
_rust_dispatch.RUST_AVAILABLE = False
try:
gc.collect()
t0 = time.perf_counter()
engine.run(rebalance_freq=1)
elapsed = time.perf_counter() - t0
finally:
_rust_dispatch.RUST_AVAILABLE = saved
times.append(elapsed)
return times
def main():
args = parse_args()
if not use_rust():
print("ERROR: Rust extension not available. Build with: make rust-build")
sys.exit(1)
stocks_data, options_data, sf = _load_data(args.use_prod_data)
if args.use_prod_data and Path(PROD_STOCKS_FILE).exists():
stocks = [Stock("SPY", 1.0)]
else:
stocks = [Stock("VTI", 0.2), Stock("VEU", 0.2), Stock("BND", 0.2),
Stock("VNQ", 0.2), Stock("DBC", 0.2)]
underlying = "SPY" if args.use_prod_data else "SPX"
n_rows = len(options_data._data)
n_dates = options_data._data["quotedate"].nunique()
print(f"\n{'='*65}")
print("Benchmark: Rust parallel_sweep vs Python sequential")
print(f"{'='*65}")
print(f" Data: {'production' if args.use_prod_data else 'test'} ({n_rows:,} options rows, {n_dates} dates)")
print(f" Underlying: {underlying}")
print(f" Grid sizes: {args.grid_sizes}")
print(f" Runs per test: {args.runs}")
print(f" CPU cores: {os.cpu_count()}")
print()
# Build Rust config once (amortized over all grid sizes)
config, schema_mapping, opts_pl, stocks_pl, strat = _build_rust_config(
stocks_data, options_data, stocks, underlying=underlying
)
# -- Single backtest comparison --
print("--- Single Backtest (1 run) ---")
rust_single = run_rust_single(opts_pl, stocks_pl, config, schema_mapping, args.runs)
python_single = run_python_single(stocks_data, options_data, stocks, args.runs, underlying=underlying)
rust_avg = np.mean(rust_single)
py_avg = np.mean(python_single)
print(f" Rust single: {rust_avg:.4f}s (per-run: [{', '.join(f'{t:.4f}s' for t in rust_single)}])")
print(f" Python single: {py_avg:.4f}s (per-run: [{', '.join(f'{t:.4f}s' for t in python_single)}])")
print(f" Speedup: {py_avg/rust_avg:.2f}x {'(Rust faster)' if rust_avg < py_avg else '(Python faster)'}")
print()
# -- Grid sweep comparison --
print("--- Grid Sweep (N parallel Rust vs N sequential Python) ---")
rows = []
for grid_size in args.grid_sizes:
param_grid = _build_param_grid(grid_size, underlying=underlying)
print(f"\n Grid size: {grid_size}")
# Rust parallel_sweep
rust_times, rust_results = run_rust_sweep(
opts_pl, stocks_pl, config, schema_mapping, param_grid, args.runs
)
rust_avg = np.mean(rust_times)
print(f" Rust parallel: {rust_avg:.4f}s (per-run: [{', '.join(f'{t:.4f}s' for t in rust_times)}])")
# Python sequential
python_times, python_results = run_python_sequential(
stocks_data, options_data, stocks, param_grid, args.runs, underlying=underlying
)
py_avg = np.mean(python_times)
print(f" Python seq: {py_avg:.4f}s (per-run: [{', '.join(f'{t:.4f}s' for t in python_times)}])")
speedup = py_avg / rust_avg if rust_avg > 0 else float("nan")
throughput_rust = grid_size / rust_avg if rust_avg > 0 else 0
throughput_py = grid_size / py_avg if py_avg > 0 else 0
print(f" Speedup: {speedup:.2f}x {'(Rust faster)' if speedup > 1 else '(Python faster)'}")
print(f" Throughput: Rust={throughput_rust:.1f}/s, Python={throughput_py:.1f}/s")
rows.append({
"Grid": grid_size,
"Rust (s)": f"{rust_avg:.4f}",
"Python (s)": f"{py_avg:.4f}",
"Speedup": f"{speedup:.2f}x",
"Rust runs/s": f"{throughput_rust:.1f}",
"Python runs/s": f"{throughput_py:.1f}",
})
# -- Summary Table --
print(f"\n{'='*65}")
print("Summary")
print(f"{'='*65}")
df = pd.DataFrame(rows)
print(df.to_string(index=False))
print(f"\n{'='*65}")
print("Conclusion")
print(f"{'='*65}")
if rows:
final_speedup = float(rows[-1]["Speedup"].replace("x", ""))
if final_speedup > 1:
print(f" Rust parallel_sweep is {final_speedup:.1f}x faster for {args.grid_sizes[-1]} grid points.")
print(f" For optimization/grid search, Rust + Rayon provides real value.")
else:
print(f" Rust is {1/final_speedup:.1f}x slower even for parallel sweep.")
print(f" The Pandas<->Polars conversion overhead dominates.")
single_speedup = np.mean(python_single) / np.mean(rust_single)
if single_speedup < 1:
print(f" Single backtest: Rust is {1/single_speedup:.1f}x SLOWER (conversion overhead).")
else:
print(f" Single backtest: Rust is {single_speedup:.1f}x faster.")
print("\nDone.")
if __name__ == "__main__":
main()
================================================
FILE: benchmarks/compare_with_bt.py
================================================
"""Head-to-head comparison: options_portfolio_backtester stock-only mode vs bt.
This harness runs the same monthly stock-rebalance policy in both frameworks:
- options_portfolio_backtester (legacy Backtest with options allocation = 0)
- bt (if installed)
Outputs a small scorecard with performance and runtime metrics.
"""
from __future__ import annotations
import argparse
import sys
import time
from dataclasses import dataclass
from pathlib import Path
import numpy as np
import pandas as pd
REPO_ROOT = Path(__file__).resolve().parents[1]
from options_portfolio_backtester import BacktestEngine as Backtest
from options_portfolio_backtester.data.providers import TiingoData
from options_portfolio_backtester.core.types import Stock
@dataclass
class RunResult:
name: str
total_return_pct: float
cagr_pct: float
max_drawdown_pct: float
vol_annual_pct: float
sharpe: float
runtime_s: float
start_date: str
end_date: str
n_days: int
equity: pd.Series
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Compare options_portfolio_backtester vs bt on stock-only allocation.")
parser.add_argument("--stocks-file", default="data/processed/stocks.csv")
parser.add_argument("--options-file", default="data/processed/options.csv")
parser.add_argument("--symbols", default="SPY", help="Comma-separated symbols. Example: SPY or SPY,TLT,GLD")
parser.add_argument("--weights", default=None, help="Comma-separated weights matching symbols. Defaults equal.")
parser.add_argument("--initial-capital", type=float, default=1_000_000.0)
parser.add_argument("--rebalance-months", type=int, default=1, help="Business-month-start rebalance frequency.")
parser.add_argument("--runs", type=int, default=3, help="Runtime averaging repeats.")
return parser.parse_args()
def normalize_weights(symbols: list[str], raw_weights: str | None) -> list[float]:
if raw_weights is None:
return [1.0 / len(symbols)] * len(symbols)
vals = [float(x) for x in raw_weights.split(",")]
if len(vals) != len(symbols):
raise ValueError("--weights length must match --symbols length")
total = float(sum(vals))
if total <= 0:
raise ValueError("--weights must sum to > 0")
return [v / total for v in vals]
def compute_metrics(total_capital: pd.Series) -> tuple[float, float, float, float, float]:
total_capital = total_capital.dropna()
if total_capital.empty:
return 0.0, 0.0, 0.0, 0.0, 0.0
rets = total_capital.pct_change().dropna()
total_return = total_capital.iloc[-1] / total_capital.iloc[0] - 1.0
n_years = len(total_capital) / 252.0
cagr = (total_capital.iloc[-1] / total_capital.iloc[0]) ** (1.0 / n_years) - 1.0 if n_years > 0 else 0.0
peak = total_capital.cummax()
dd = total_capital / peak - 1.0
max_dd = float(dd.min()) if not dd.empty else 0.0
vol = float(rets.std(ddof=1) * np.sqrt(252)) if len(rets) > 1 else 0.0
sharpe = float((rets.mean() / rets.std(ddof=1)) * np.sqrt(252)) if len(rets) > 1 and rets.std(ddof=1) > 0 else 0.0
return total_return, cagr, max_dd, vol, sharpe
def run_options_portfolio_backtester(
stocks_file: str,
symbols: list[str],
weights: list[float],
initial_capital: float,
rebalance_months: int,
runs: int,
) -> RunResult:
stocks_data = TiingoData(stocks_file)
stocks = [Stock(sym, w) for sym, w in zip(symbols, weights)]
times: list[float] = []
bt_obj = None
for _ in range(runs):
bt = Backtest({"stocks": 1.0, "options": 0.0, "cash": 0.0}, initial_capital=int(initial_capital))
bt.stocks = stocks
bt.stocks_data = stocks_data
t0 = time.perf_counter()
bt.run(rebalance_freq=rebalance_months, rebalance_unit="BMS")
times.append(time.perf_counter() - t0)
bt_obj = bt
assert bt_obj is not None
bal = bt_obj.balance["total capital"].dropna()
tr, cagr, mdd, vol, sharpe = compute_metrics(bal)
return RunResult(
name="options_portfolio_backtester",
total_return_pct=tr * 100.0,
cagr_pct=cagr * 100.0,
max_drawdown_pct=mdd * 100.0,
vol_annual_pct=vol * 100.0,
sharpe=sharpe,
runtime_s=float(np.mean(times)),
start_date=str(bal.index.min().date()),
end_date=str(bal.index.max().date()),
n_days=int(len(bal)),
equity=bal,
)
def run_bt(
stocks_file: str,
symbols: list[str],
weights: list[float],
initial_capital: float,
runs: int,
) -> RunResult | None:
try:
import bt # type: ignore
except Exception:
return None
prices = pd.read_csv(stocks_file, parse_dates=["date"])
prices = prices[prices["symbol"].isin(symbols)].copy()
px = prices.pivot(index="date", columns="symbol", values="adjClose").sort_index().dropna()
px = px[symbols]
times: list[float] = []
last_res = None
for _ in range(runs):
algos = [
bt.algos.RunMonthly(),
bt.algos.SelectThese(symbols),
bt.algos.WeighSpecified(**{s: w for s, w in zip(symbols, weights)}),
bt.algos.Rebalance(),
]
strat = bt.Strategy("bt_monthly_rebal", algos)
test = bt.Backtest(strat, px, initial_capital=initial_capital)
t0 = time.perf_counter()
last_res = bt.run(test)
times.append(time.perf_counter() - t0)
assert last_res is not None
series = last_res.prices.iloc[:, 0]
tr, cagr, mdd, vol, sharpe = compute_metrics(series)
return RunResult(
name="bt",
total_return_pct=tr * 100.0,
cagr_pct=cagr * 100.0,
max_drawdown_pct=mdd * 100.0,
vol_annual_pct=vol * 100.0,
sharpe=sharpe,
runtime_s=float(np.mean(times)),
start_date=str(series.index.min().date()),
end_date=str(series.index.max().date()),
n_days=int(len(series)),
equity=series,
)
def print_result(r: RunResult) -> None:
print(f"{r.name}")
print(f" period: {r.start_date} -> {r.end_date} ({r.n_days} rows)")
print(f" total_return: {r.total_return_pct:8.2f}%")
print(f" cagr: {r.cagr_pct:8.2f}%")
print(f" max_drawdown: {r.max_drawdown_pct:8.2f}%")
print(f" vol_annual: {r.vol_annual_pct:8.2f}%")
print(f" sharpe: {r.sharpe:8.3f}")
print(f" runtime: {r.runtime_s:8.4f}s")
def print_overlap_parity(a: RunResult, b: RunResult) -> None:
common = a.equity.index.intersection(b.equity.index)
if len(common) == 0:
print(" overlap: none")
return
a_n = a.equity.loc[common] / a.equity.loc[common].iloc[0]
b_n = b.equity.loc[common] / b.equity.loc[common].iloc[0]
diff = a_n - b_n
print(f" overlap rows: {len(common)}")
print(f" overlap end delta: {float(diff.iloc[-1]):.6e}")
print(f" overlap max abs delta: {float(diff.abs().max()):.6e}")
def main() -> None:
args = parse_args()
symbols = [s.strip().upper() for s in args.symbols.split(",") if s.strip()]
if not symbols:
raise ValueError("No symbols provided")
weights = normalize_weights(symbols, args.weights)
for file_path in (args.stocks_file,):
if not Path(file_path).exists():
raise FileNotFoundError(f"Missing file: {file_path}")
ob = run_options_portfolio_backtester(
stocks_file=args.stocks_file,
symbols=symbols,
weights=weights,
initial_capital=args.initial_capital,
rebalance_months=args.rebalance_months,
runs=args.runs,
)
bt_res = run_bt(
stocks_file=args.stocks_file,
symbols=symbols,
weights=weights,
initial_capital=args.initial_capital,
runs=args.runs,
)
print("\n=== Comparison Scorecard ===")
print_result(ob)
if bt_res is None:
print("\nbt")
print(" not available (module 'bt' is not installed in this environment).")
print(" install in nix shell and rerun:")
print(" pip install bt")
else:
print()
print_result(bt_res)
speedup = bt_res.runtime_s / ob.runtime_s if ob.runtime_s > 0 else float("nan")
print("\nsummary")
print(f" speed ratio (bt / options_portfolio_backtester): {speedup:0.2f}x")
print(
f" return delta (options_portfolio_backtester - bt): "
f"{(ob.total_return_pct - bt_res.total_return_pct):0.2f} pct-pts"
)
print(
f" maxDD delta (options_portfolio_backtester - bt): "
f"{(ob.max_drawdown_pct - bt_res.max_drawdown_pct):0.2f} pct-pts"
)
print(" overlap parity:")
print_overlap_parity(ob, bt_res)
if __name__ == "__main__":
main()
================================================
FILE: data/README.md
================================================
# Data Scripts
Scripts for fetching and converting market data into the formats expected by the backtester.
## Quick Start
Fetch both stock and options data for SPY, aligned by date:
```bash
python data/fetch_data.py all --symbols SPY --start 2020-01-01 --end 2023-01-01
```
Data is first fetched from the [self-hosted GitHub Release](https://github.com/lambdaclass/options_backtester/releases/tag/data-v1), falling back to [philippdubach/options-data](https://github.com/philippdubach/options-data) CDN and yfinance. Outputs:
- `data/processed/stocks.csv` — Tiingo-format stock data
- `data/processed/options.csv` — options data with Greeks
## Subcommands
```bash
# Stocks only (GitHub Release > options-data > yfinance)
python data/fetch_data.py stocks --symbols SPY --start 2020-01-01 --end 2023-01-01
# Options only
python data/fetch_data.py options --symbols SPY --start 2020-01-01 --end 2023-01-01
# Both + date alignment (default)
python data/fetch_data.py all --symbols SPY --start 2020-01-01 --end 2023-01-01
# Multiple symbols
python data/fetch_data.py all --symbols SPY IWM QQQ --start 2020-01-01 --end 2023-01-01
# Custom output paths
python data/fetch_data.py all --symbols SPY --start 2020-01-01 --end 2023-01-01 \
--stocks-output data/processed/spy_stocks.csv \
--options-output data/processed/spy_options.csv
# Force re-download (skip cache)
python data/fetch_data.py all --symbols SPY --start 2020-01-01 --end 2023-01-01 --force
```
## OptionsDX Conversion (separate)
For SPX index options from [optionsdx.com](https://www.optionsdx.com/):
```bash
python data/convert_optionsdx.py data/raw/spx_eod_2020.csv --output data/processed/spx_options.csv
```
## Loading Data in the Backtester
```python
from backtester.datahandler import HistoricalOptionsData, TiingoData
options = HistoricalOptionsData("data/processed/options.csv")
stocks = TiingoData("data/processed/stocks.csv")
```
The `all` subcommand automatically aligns stock and option dates so the backtester's `np.array_equal` assertion passes.
## Directory Structure
- `raw/` — Cached parquet downloads (gitignored)
- `processed/` — Converted CSV output ready for the backtester (gitignored)
================================================
FILE: data/convert_optionsdx.py
================================================
#!/usr/bin/env python3
"""Convert OptionsDX wide-format CSV to backtester long-format CSV.
OptionsDX provides one row per strike/date/expiry with both call and put data
in wide format. The backtester expects one row per contract in long format.
Usage:
python data/convert_optionsdx.py data/raw/spx_eod.csv --output data/processed/spx_options.csv
"""
import argparse
import sys
import pandas as pd
# OptionsDX columns we need (they have trailing spaces, stripped on read)
CALL_COLS = {
"C_BID": "bid",
"C_ASK": "ask",
"C_LAST": "last",
"C_VOLUME": "volume",
"C_IV": "impliedvol",
"C_DELTA": "delta",
"C_GAMMA": "gamma",
"C_THETA": "theta",
"C_VEGA": "vega",
}
PUT_COLS = {
"P_BID": "bid",
"P_ASK": "ask",
"P_LAST": "last",
"P_VOLUME": "volume",
"P_IV": "impliedvol",
"P_DELTA": "delta",
"P_GAMMA": "gamma",
"P_THETA": "theta",
"P_VEGA": "vega",
}
OUTPUT_COLUMNS = [
"underlying",
"underlying_last",
"optionroot",
"type",
"expiration",
"quotedate",
"strike",
"last",
"bid",
"ask",
"volume",
"openinterest",
"impliedvol",
"delta",
"gamma",
"theta",
"vega",
"optionalias",
]
def make_optionroot(expire_dates, option_type, strikes):
"""Generate OCC-format option root symbols vectorized.
Format: SPX{YYMMDD}{C|P}{strike*1000:08d}
Example: SPX170317C00300000
"""
date_str = expire_dates.dt.strftime("%y%m%d")
type_char = "C" if option_type == "call" else "P"
strike_str = (strikes * 1000).astype(int).astype(str).str.zfill(8)
return "SPX" + date_str + type_char + strike_str
def convert(input_path, output_path):
df = pd.read_csv(input_path, parse_dates=["QUOTE_DATE", "EXPIRE_DATE"])
# Strip whitespace from column names (OptionsDX CSVs have trailing spaces)
df.columns = df.columns.str.strip()
shared = {
"underlying": "SPX",
"underlying_last": df["UNDERLYING_LAST"],
"expiration": df["EXPIRE_DATE"],
"quotedate": df["QUOTE_DATE"],
"strike": df["STRIKE"],
"openinterest": 0,
}
# Build call rows
calls = pd.DataFrame(shared)
calls["type"] = "call"
for src, dst in CALL_COLS.items():
calls[dst] = df[src].values
calls["optionroot"] = make_optionroot(df["EXPIRE_DATE"], "call", df["STRIKE"])
calls["optionalias"] = calls["optionroot"]
# Build put rows
puts = pd.DataFrame(shared)
puts["type"] = "put"
for src, dst in PUT_COLS.items():
puts[dst] = df[src].values
puts["optionroot"] = make_optionroot(df["EXPIRE_DATE"], "put", df["STRIKE"])
puts["optionalias"] = puts["optionroot"]
result = pd.concat([calls, puts], ignore_index=True)
result = result[OUTPUT_COLUMNS]
result = result.sort_values(["quotedate", "expiration", "strike", "type"])
result.to_csv(output_path, index=False)
print(f"Wrote {len(result)} rows to {output_path}")
def main():
parser = argparse.ArgumentParser(
description="Convert OptionsDX wide CSV to backtester long CSV"
)
parser.add_argument("input", help="Path to OptionsDX CSV file")
parser.add_argument(
"--output",
default="data/processed/spx_options.csv",
help="Output path (default: data/processed/spx_options.csv)",
)
args = parser.parse_args()
convert(args.input, args.output)
if __name__ == "__main__":
main()
================================================
FILE: data/fetch_data.py
================================================
#!/usr/bin/env python3
"""Unified data fetch script for the options backtester.
Downloads stock and options data, converts to backtester CSV formats,
and aligns dates between datasets.
Download priority (for each symbol):
1. Self-hosted GitHub Release (lambdaclass/options_backtester data-v1)
2. philippdubach/options-data CDN — 104 symbols
3. philippdubach/options-dataset-hist — SPY/IWM/QQQ underlying prices
4. yfinance (last resort, stocks only)
Usage:
python data/fetch_data.py all --symbols SPY --start 2020-01-01 --end 2023-01-01
python data/fetch_data.py stocks --symbols SPY --start 2020-01-01 --end 2023-01-01
python data/fetch_data.py options --symbols SPY --start 2020-01-01 --end 2023-01-01
python data/fetch_data.py all --symbols SPY --start 2020-01-01 --end 2023-01-01 --update
"""
import argparse
import shutil
import sys
from pathlib import Path
from urllib.request import Request, urlopen
import pandas as pd
BASE_DIR = Path(__file__).resolve().parent
RAW_DIR = BASE_DIR / "raw"
PROCESSED_DIR = BASE_DIR / "processed"
# Self-hosted data on GitHub Releases (primary source)
RELEASE_URL = "https://github.com/lambdaclass/options_backtester/releases/download/data-v1"
# philippdubach/options-data — 104 symbols, options + underlying (underlying empty for some ETFs)
OPTIONS_DATA_URL = "https://static.philippdubach.com/data/options"
# philippdubach/options-dataset-hist — SPY/IWM/QQQ, proper underlying_prices via GitHub LFS
HIST_REPO_RAW = "https://github.com/philippdubach/options-dataset-hist/raw/main/data"
HIST_SYMBOLS = {"SPY", "IWM", "QQQ"}
# ---------------------------------------------------------------------------
# Download helpers
# ---------------------------------------------------------------------------
def _download(url, dest, force=False):
"""Download url to dest. Returns dest on success, None on failure."""
dest = Path(dest)
dest.parent.mkdir(parents=True, exist_ok=True)
if dest.exists() and not force:
print(f" Using cached {dest}")
return dest
print(f" Downloading {url} ...")
try:
req = Request(url, headers={"User-Agent": "options-backtester/1.0"})
with urlopen(req) as resp, open(dest, "wb") as f:
shutil.copyfileobj(resp, f)
print(f" Saved to {dest}")
except Exception as e:
print(f" Error: {e}", file=sys.stderr)
if dest.exists():
dest.unlink()
return None
return dest
def download_options_parquet(symbol, force=False):
"""Download options parquet. Priority: GitHub Release > options-data CDN."""
sym = symbol.upper()
# 1. Self-hosted GitHub Release
dest = RAW_DIR / "release" / f"{sym}_options.parquet"
url = f"{RELEASE_URL}/{sym}_options.parquet"
result = _download(url, dest, force)
if result is not None:
return result
# 2. options-data CDN
dest = RAW_DIR / "options-data" / sym / "options.parquet"
url = f"{OPTIONS_DATA_URL}/{sym.lower()}/options.parquet"
return _download(url, dest, force)
def download_underlying(symbol, force=False):
"""Download underlying prices.
Priority: GitHub Release > options-dataset-hist > options-data > None (caller falls back to yfinance).
"""
sym = symbol.upper()
# 1. Self-hosted GitHub Release
dest = RAW_DIR / "release" / f"{sym}_underlying.parquet"
url = f"{RELEASE_URL}/{sym}_underlying.parquet"
result = _download(url, dest, force)
if result is not None:
df = pd.read_parquet(result)
if not df.empty:
return result
print(f" Warning: release underlying empty for {sym}")
# 2. options-dataset-hist has proper underlying for SPY/IWM/QQQ
if sym in HIST_SYMBOLS:
dest = RAW_DIR / "options-dataset-hist" / sym / "underlying_prices.parquet"
url = f"{HIST_REPO_RAW}/parquet_{sym.lower()}/underlying_prices.parquet"
result = _download(url, dest, force)
if result is not None:
df = pd.read_parquet(result)
if not df.empty:
return result
print(f" Warning: options-dataset-hist underlying empty for {sym}")
# 3. options-data underlying
dest = RAW_DIR / "options-data" / sym / "underlying.parquet"
url = f"{OPTIONS_DATA_URL}/{sym.lower()}/underlying.parquet"
result = _download(url, dest, force)
if result is not None:
df = pd.read_parquet(result)
if not df.empty:
return result
print(f" Warning: options-data underlying empty for {sym}")
return None
# ---------------------------------------------------------------------------
# Underlying price reading
# ---------------------------------------------------------------------------
def read_underlying_prices(symbol, und_path, start, end):
"""Read underlying parquet and return (date, close) DataFrame for joining."""
und = pd.read_parquet(und_path)
und["date"] = pd.to_datetime(und["date"])
und = und[(und["date"] >= start) & (und["date"] <= end)]
if und.empty:
return None
return und[["date", "close"]].rename(columns={"close": "underlying_last"})
def underlying_to_tiingo(symbol, und_path, start, end):
"""Convert an underlying.parquet to Tiingo-format DataFrame."""
und = pd.read_parquet(und_path)
und["date"] = pd.to_datetime(und["date"])
und = und[(und["date"] >= start) & (und["date"] <= end)]
if und.empty:
return pd.DataFrame()
ratio = und["adjusted_close"] / und["close"]
return pd.DataFrame({
"symbol": symbol,
"date": und["date"].values,
"close": und["close"].values,
"high": und["high"].values,
"low": und["low"].values,
"open": und["open"].values,
"volume": und["volume"].values,
"adjClose": und["adjusted_close"].values,
"adjHigh": (und["high"] * ratio).values,
"adjLow": (und["low"] * ratio).values,
"adjOpen": (und["open"] * ratio).values,
"adjVolume": und["volume"].values,
"divCash": und["dividend_amount"].values,
"splitFactor": und["split_coefficient"].values,
})
def fetch_yfinance(symbol, start, end):
"""Fetch one symbol via yfinance (last resort)."""
try:
import yfinance as yf
except ImportError:
print(f" yfinance not installed, cannot fetch {symbol}", file=sys.stderr)
return pd.DataFrame()
print(f" Last resort: fetching {symbol} from yfinance...")
ticker = yf.Ticker(symbol)
df = ticker.history(start=str(start.date()), end=str(end.date()), auto_adjust=False)
if df.empty:
return pd.DataFrame()
if df.index.tz is not None:
df.index = df.index.tz_localize(None)
ratio = df["Adj Close"] / df["Close"]
return pd.DataFrame({
"symbol": symbol,
"date": df.index,
"close": df["Close"].values,
"high": df["High"].values,
"low": df["Low"].values,
"open": df["Open"].values,
"volume": df["Volume"].values,
"adjClose": df["Adj Close"].values,
"adjHigh": (df["High"] * ratio).values,
"adjLow": (df["Low"] * ratio).values,
"adjOpen": (df["Open"] * ratio).values,
"adjVolume": df["Volume"].values,
"divCash": 0.0,
"splitFactor": 1.0,
})
# ---------------------------------------------------------------------------
# Options
# ---------------------------------------------------------------------------
def fetch_options(symbols, start, end, output, force=False):
"""Download options parquets and convert to backtester CSV format."""
frames = []
for symbol in symbols:
sym = symbol.upper()
print(f"Fetching options for {sym}...")
opt_path = download_options_parquet(sym, force)
if opt_path is None:
print(f" Skipping {sym} options (download failed)", file=sys.stderr)
continue
opts = pd.read_parquet(opt_path)
opts["date"] = pd.to_datetime(opts["date"])
opts = opts[(opts["date"] >= start) & (opts["date"] <= end)]
if opts.empty:
print(f" No options data for {sym} in [{start}, {end}]")
continue
# Get underlying close prices for underlying_last
und_path = download_underlying(sym, force)
und_prices = None
if und_path is not None:
und_prices = read_underlying_prices(sym, und_path, start, end)
if und_prices is None:
yf_df = fetch_yfinance(sym, start, end)
if not yf_df.empty:
und_prices = pd.DataFrame({
"date": pd.to_datetime(yf_df["date"]),
"underlying_last": yf_df["close"].values,
})
if und_prices is not None:
opts = opts.merge(und_prices, on="date", how="left")
else:
opts["underlying_last"] = float("nan")
# Last price: use column if present, else mid
if "last" in opts.columns:
opts["_last"] = opts["last"].fillna((opts["bid"] + opts["ask"]) / 2)
else:
opts["_last"] = (opts["bid"] + opts["ask"]) / 2
out = pd.DataFrame({
"underlying": sym,
"underlying_last": opts["underlying_last"].values,
"optionroot": opts["contract_id"].values,
"type": opts["type"].values,
"expiration": pd.to_datetime(opts["expiration"]).values,
"quotedate": opts["date"].values,
"strike": opts["strike"].values,
"last": opts["_last"].values,
"bid": opts["bid"].values,
"ask": opts["ask"].values,
"volume": opts["volume"].values,
"openinterest": opts["open_interest"].values,
"impliedvol": opts["implied_volatility"].values,
"delta": opts["delta"].values,
"gamma": opts["gamma"].values,
"theta": opts["theta"].values,
"vega": opts["vega"].values,
"optionalias": opts["contract_id"].values,
})
frames.append(out)
print(f" {len(out)} option rows for {sym}")
if not frames:
print("No options data fetched.", file=sys.stderr)
return None
result = pd.concat(frames, ignore_index=True)
result = result.sort_values(["quotedate", "underlying", "expiration", "strike", "type"])
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
result.to_csv(output, index=False)
print(f"Wrote {len(result)} option rows to {output}")
return result
# ---------------------------------------------------------------------------
# Stocks
# ---------------------------------------------------------------------------
def fetch_stocks(symbols, start, end, output, force=False):
"""Download stock data. Priority: options-dataset-hist > options-data > yfinance."""
frames = []
for symbol in symbols:
sym = symbol.upper()
print(f"Fetching stocks for {sym}...")
und_path = download_underlying(sym, force)
if und_path is not None:
df = underlying_to_tiingo(sym, und_path, start, end)
if not df.empty:
source = "options-dataset-hist" if "options-dataset-hist" in str(und_path) else "options-data"
frames.append(df)
print(f" {len(df)} stock rows for {sym} (from {source})")
continue
df = fetch_yfinance(sym, start, end)
if not df.empty:
frames.append(df)
print(f" {len(df)} stock rows for {sym} (from yfinance)")
else:
print(f" No stock data for {sym}", file=sys.stderr)
if not frames:
print("No stock data fetched.", file=sys.stderr)
return None
result = pd.concat(frames, ignore_index=True)
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
result.to_csv(output, index=False)
print(f"Wrote {len(result)} stock rows to {output}")
return result
# ---------------------------------------------------------------------------
# Date alignment
# ---------------------------------------------------------------------------
def align_dates(stocks_path, options_path):
"""Align stock and option dates to their intersection."""
stocks = pd.read_csv(stocks_path, parse_dates=["date"])
options = pd.read_csv(options_path, parse_dates=["quotedate", "expiration"])
stock_dates = set(stocks["date"].dt.normalize())
option_dates = set(options["quotedate"].dt.normalize())
shared = stock_dates & option_dates
if not shared:
print("Warning: no overlapping dates between stocks and options!", file=sys.stderr)
return
stocks_filtered = stocks[stocks["date"].dt.normalize().isin(shared)]
options_filtered = options[options["quotedate"].dt.normalize().isin(shared)]
stocks_filtered.to_csv(stocks_path, index=False)
options_filtered.to_csv(options_path, index=False)
dropped_stock = len(stocks) - len(stocks_filtered)
dropped_opt = len(options) - len(options_filtered)
print(f"Aligned dates: {len(shared)} shared trading days")
if dropped_stock:
print(f" Dropped {dropped_stock} stock rows without matching option dates")
if dropped_opt:
print(f" Dropped {dropped_opt} option rows without matching stock dates")
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description="Fetch stock and options data for the backtester"
)
parser.add_argument(
"command", nargs="?", default="all",
choices=["all", "stocks", "options"],
help="What to fetch (default: all)",
)
parser.add_argument(
"--symbols", nargs="+", required=True,
help="Ticker symbols (e.g. SPY IWM QQQ AAPL)",
)
parser.add_argument("--start", required=True, help="Start date (YYYY-MM-DD)")
parser.add_argument("--end", required=True, help="End date (YYYY-MM-DD)")
parser.add_argument(
"--stocks-output", default="data/processed/stocks.csv",
help="Stock CSV output path",
)
parser.add_argument(
"--options-output", default="data/processed/options.csv",
help="Options CSV output path",
)
parser.add_argument(
"--update", action="store_true",
help="Re-download parquets to get latest data",
)
args = parser.parse_args()
start = pd.Timestamp(args.start)
end = pd.Timestamp(args.end)
force = args.update
if args.command in ("all", "options"):
fetch_options(args.symbols, start, end, args.options_output, force)
if args.command in ("all", "stocks"):
fetch_stocks(args.symbols, start, end, args.stocks_output, force)
if args.command == "all":
if Path(args.stocks_output).exists() and Path(args.options_output).exists():
print("\nAligning dates...")
align_dates(args.stocks_output, args.options_output)
print("\nDone!")
if __name__ == "__main__":
main()
================================================
FILE: data/fetch_signals.py
================================================
#!/usr/bin/env python3
"""Download macro signal data from FRED for use in backtest signal filters.
Downloads:
- GDP (quarterly) — for Buffett Indicator proxy
- VIX (daily) — CBOE Volatility Index
- High Yield Spread (daily) — credit stress indicator
- 10Y-2Y Yield Curve (daily) — recession predictor
- Nonfinancial Corporate Equity Market Value (quarterly) — for Tobin's Q
- Nonfinancial Corporate Net Worth (quarterly) — for Tobin's Q
- Dollar Index (daily) — broad trade-weighted USD
Outputs:
data/processed/signals.csv — daily signal data, forward-filled from quarterly
"""
import io
import urllib.request
import pandas as pd
FRED_SERIES = {
'gdp': 'GDP',
'vix': 'VIXCLS',
'hy_spread': 'BAMLH0A0HYM2',
'yield_curve_10y2y': 'T10Y2Y',
'nfc_equity_mv': 'NCBEILQ027S',
'nfc_net_worth': 'NCBCMDPMVCE',
'dollar_index': 'DTWEXBGS',
}
START = '2007-01-01'
END = '2025-12-31'
def fetch_fred(series_id: str) -> pd.Series:
url = (f'https://fred.stlouisfed.org/graph/fredgraph.csv'
f'?id={series_id}&cosd={START}&coed={END}')
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
resp = urllib.request.urlopen(req, timeout=30)
data = resp.read().decode()
df = pd.read_csv(io.StringIO(data), parse_dates=['observation_date'],
index_col='observation_date')
col = df.columns[0]
s = pd.to_numeric(df[col], errors='coerce')
s.index.name = 'date'
return s.dropna()
def main():
signals = {}
for name, sid in FRED_SERIES.items():
print(f'Fetching {name} ({sid})...', end=' ', flush=True)
try:
s = fetch_fred(sid)
signals[name] = s
print(f'{len(s)} obs, {s.index[0].date()} to {s.index[-1].date()}')
except Exception as e:
print(f'FAILED: {e}')
if not signals:
print('No data fetched.')
return
# Build daily DataFrame
all_dates = sorted(set().union(*(s.index for s in signals.values())))
daily = pd.DataFrame(index=pd.DatetimeIndex(all_dates, name='date'))
for name, s in signals.items():
daily[name] = s.reindex(daily.index)
# Forward-fill quarterly data to daily
daily = daily.ffill()
# Compute derived signals
if 'gdp' in daily.columns:
# Buffett Indicator proxy: we don't have total market cap, but
# nfc_equity_mv is corporate equity market value (in millions)
# GDP is in billions. Scale NFC equity to billions to match.
if 'nfc_equity_mv' in daily.columns:
daily['buffett_indicator'] = daily['nfc_equity_mv'] / (daily['gdp'] * 1000) * 100
print(f'Computed buffett_indicator (nfc_equity_mv / GDP)')
if 'nfc_equity_mv' in daily.columns and 'nfc_net_worth' in daily.columns:
# Tobin's Q proxy: market value / net worth
# nfc_net_worth is in weird units (ratio), use nfc_equity_mv levels
# Actually NCBCMDPMVCE is "market value / cost" already
daily['tobin_q'] = daily['nfc_net_worth']
print(f'Computed tobin_q (NCBCMDPMVCE is already MV/replacement cost)')
daily = daily.dropna(how='all')
out = 'data/processed/signals.csv'
daily.to_csv(out)
print(f'\nSaved {len(daily)} rows to {out}')
print(f'Columns: {list(daily.columns)}')
print(f'Date range: {daily.index[0].date()} to {daily.index[-1].date()}')
print(daily.describe().round(2))
if __name__ == '__main__':
main()
================================================
FILE: flake.nix
================================================
{
description = "Options backtester dev environment";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
rust-overlay.url = "github:oxalica/rust-overlay";
};
outputs = { self, nixpkgs, rust-overlay }:
let
supportedSystems = [ "x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin" ];
forAllSystems = nixpkgs.lib.genAttrs supportedSystems;
in {
devShells = forAllSystems (system:
let
pkgs = import nixpkgs {
inherit system;
overlays = [ rust-overlay.overlays.default ];
};
python = pkgs.python312;
pythonPkgs = python.pkgs;
rustToolchain = pkgs.rust-bin.stable.latest.default.override {
extensions = [ "rust-src" "rust-analyzer" ];
};
in {
default = pkgs.mkShell {
packages = [
# Rust
rustToolchain
pkgs.maturin
pkgs.cargo-nextest
(python.withPackages (ps: [
# Runtime
ps.pandas
ps.numpy
ps.altair
ps.pyprind
ps.seaborn
ps.matplotlib
ps.pyarrow
ps.polars
# Notebooks
ps.jupyter
ps.nbconvert
ps.ipykernel
# Testing
ps.pytest
ps.hypothesis
ps.pytest-benchmark
ps.mypy
ps.pandas-stubs
ps.ruff
# Dev tools
ps.yapf
# Data fetching (optional, for data/ scripts)
ps.yfinance
]))
];
shellHook = ''
export PYO3_PYTHON=${python}/bin/python
export PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1
# Build Rust extension and symlink for Python import
if [ -f rust/ob_python/Cargo.toml ]; then
if [ ! -f rust/target/release/lib_ob_rust.dylib ] && [ ! -f rust/target/release/lib_ob_rust.so ]; then
echo "Building Rust extension (first time only)..."
cargo build --manifest-path rust/ob_python/Cargo.toml --release 2>&1 | tail -1
fi
# Python needs _ob_rust.so, Rust produces lib_ob_rust.dylib/.so
if [ -f rust/target/release/lib_ob_rust.dylib ] && [ ! -f _ob_rust.so ]; then
ln -sf rust/target/release/lib_ob_rust.dylib _ob_rust.so
elif [ -f rust/target/release/lib_ob_rust.so ] && [ ! -f _ob_rust.so ]; then
ln -sf rust/target/release/lib_ob_rust.so _ob_rust.so
fi
fi
'';
};
});
};
}
================================================
FILE: options_portfolio_backtester/__init__.py
================================================
"""options_portfolio_backtester — the open-source options backtesting framework."""
# Core types
from options_portfolio_backtester.core.types import (
Direction,
OptionType,
Type,
Order,
Signal,
Fill,
Greeks,
OptionContract,
StockAllocation,
Stock,
get_order,
)
# Data
from options_portfolio_backtester.data.schema import Schema, Field, Filter
from options_portfolio_backtester.data.providers import (
CsvOptionsProvider, CsvStocksProvider,
TiingoData, HistoricalOptionsData,
)
# Strategy
from options_portfolio_backtester.strategy.strategy import Strategy
from options_portfolio_backtester.strategy.strategy_leg import StrategyLeg
from options_portfolio_backtester.strategy.presets import Strangle
# Execution
from options_portfolio_backtester.execution.cost_model import (
NoCosts, PerContractCommission, TieredCommission, SpreadSlippage,
)
from options_portfolio_backtester.execution.fill_model import MarketAtBidAsk, MidPrice, VolumeAwareFill
from options_portfolio_backtester.execution.sizer import (
CapitalBased, FixedQuantity, FixedDollar, PercentOfPortfolio,
)
from options_portfolio_backtester.execution.signal_selector import (
FirstMatch, NearestDelta, MaxOpenInterest,
)
# Portfolio
from options_portfolio_backtester.portfolio.portfolio import Portfolio
from options_portfolio_backtester.portfolio.position import OptionPosition
from options_portfolio_backtester.portfolio.greeks import aggregate_greeks
from options_portfolio_backtester.portfolio.risk import RiskManager, MaxDelta, MaxVega, MaxDrawdown
# Engine
from options_portfolio_backtester.engine.engine import BacktestEngine
from options_portfolio_backtester.engine.clock import TradingClock
# Analytics
from options_portfolio_backtester.analytics.stats import BacktestStats, PeriodStats, LookbackReturns
from options_portfolio_backtester.analytics.trade_log import TradeLog
from options_portfolio_backtester.analytics.tearsheet import TearsheetReport, build_tearsheet
from options_portfolio_backtester.analytics.summary import summary
__all__ = [
# Core types
"Direction", "OptionType", "Type", "Order", "Signal", "Fill", "Greeks",
"OptionContract", "StockAllocation", "Stock", "get_order",
# Data
"Schema", "Field", "Filter", "CsvOptionsProvider", "CsvStocksProvider",
"TiingoData", "HistoricalOptionsData",
# Strategy
"Strategy", "StrategyLeg", "Strangle",
# Execution
"NoCosts", "PerContractCommission", "TieredCommission", "SpreadSlippage",
"MarketAtBidAsk", "MidPrice", "VolumeAwareFill",
"CapitalBased", "FixedQuantity", "FixedDollar", "PercentOfPortfolio",
"FirstMatch", "NearestDelta", "MaxOpenInterest",
# Portfolio
"Portfolio", "OptionPosition", "aggregate_greeks",
"RiskManager", "MaxDelta", "MaxVega", "MaxDrawdown",
# Engine
"BacktestEngine", "TradingClock",
# Analytics
"BacktestStats", "PeriodStats", "LookbackReturns",
"TradeLog", "TearsheetReport", "build_tearsheet",
"summary",
]
================================================
FILE: options_portfolio_backtester/analytics/__init__.py
================================================
================================================
FILE: options_portfolio_backtester/analytics/charts.py
================================================
"""Charts — Altair charts + matplotlib additions."""
from __future__ import annotations
import altair as alt
import pandas as pd
def returns_chart(report: pd.DataFrame) -> alt.VConcatChart:
# Time interval selector
time_interval = alt.selection_interval(encodings=['x'])
# Area plot
areas = alt.Chart().mark_area(opacity=0.7).encode(x='index:T',
y=alt.Y('accumulated return:Q', axis=alt.Axis(format='%')))
# Nearest point selector
nearest = alt.selection_point(nearest=True, on='mouseover', fields=['index'])
points = areas.mark_point().encode(opacity=alt.condition(nearest, alt.value(1), alt.value(0)))
# Transparent date selector
selectors = alt.Chart().mark_point().encode(
x='index:T',
opacity=alt.value(0),
).add_params(nearest)
text = areas.mark_text(
align='left', dx=5,
dy=-5).encode(text=alt.condition(nearest, 'accumulated return:Q', alt.value(' '), format='.2%'))
layered = alt.layer(selectors,
points,
text,
areas.encode(
alt.X('index:T', axis=alt.Axis(title='date'), scale=alt.Scale(domain=time_interval))),
width=700,
height=350,
title='Returns over time')
lower = areas.properties(width=700, height=70).add_params(time_interval)
return alt.vconcat(layered, lower, data=report.reset_index())
def returns_histogram(report: pd.DataFrame) -> alt.Chart:
bar = alt.Chart(report).mark_bar().encode(x=alt.X('% change:Q',
bin=alt.BinParams(maxbins=100),
axis=alt.Axis(format='%')),
y='count():Q')
return bar
def monthly_returns_heatmap(report: pd.DataFrame) -> alt.Chart:
resample = report.resample('ME')['total capital'].last()
monthly_returns = resample.pct_change().reset_index()
monthly_returns.loc[monthly_returns.index[0], 'total capital'] = resample.iloc[0] / report.iloc[0]['total capital'] - 1
monthly_returns.columns = ['date', 'total capital']
chart = alt.Chart(monthly_returns).mark_rect().encode(
alt.X('year(date):O', title='Year'), alt.Y('month(date):O', title='Month'),
alt.Color('mean(total capital)', title='Return', scale=alt.Scale(scheme='redyellowgreen')),
alt.Tooltip('mean(total capital)', format='.2f')).properties(title='Monthly Returns')
return chart
def weights_chart(balance: pd.DataFrame, figsize: tuple[float, float] = (12, 6)):
"""Stacked area chart of portfolio weights over time.
Expects a balance DataFrame with ``{symbol} qty`` columns and a
``total capital`` column (as produced by ``AlgoPipelineBacktester``).
Returns ``(fig, ax)`` from matplotlib.
"""
import matplotlib.pyplot as plt
qty_cols = [c for c in balance.columns if c.endswith(" qty")]
if not qty_cols:
fig, ax = plt.subplots(figsize=figsize)
ax.set_title("Portfolio Weights (no positions found)")
return fig, ax
symbols = [c.replace(" qty", "") for c in qty_cols]
total = balance["total capital"]
# Compute weights: qty * price / total_capital
# We don't have price columns directly, but stocks capital is available.
# Reconstruct per-symbol value: qty * (total - cash) is aggregate,
# so we estimate from qty shares of total stock value.
weights = pd.DataFrame(index=balance.index)
for sym, col in zip(symbols, qty_cols):
weights[sym] = balance[col].fillna(0)
# Normalize to weights (proportional share of total qty-weighted value)
row_sums = weights.abs().sum(axis=1)
row_sums = row_sums.replace(0, 1) # avoid division by zero
# If we have cash and total capital, use stock fraction
if "cash" in balance.columns:
stock_fraction = 1.0 - balance["cash"] / total.replace(0, 1)
for sym in symbols:
weights[sym] = (weights[sym] / row_sums) * stock_fraction
else:
weights = weights.div(row_sums, axis=0)
fig, ax = plt.subplots(figsize=figsize)
ax.stackplot(weights.index, *[weights[s] for s in symbols], labels=symbols, alpha=0.8)
ax.set_title("Portfolio Weights Over Time")
ax.set_ylabel("Weight")
ax.set_xlabel("Date")
ax.legend(loc="upper left", fontsize="small")
ax.set_ylim(0, 1)
fig.tight_layout()
return fig, ax
__all__ = ["returns_chart", "returns_histogram", "monthly_returns_heatmap", "weights_chart"]
================================================
FILE: options_portfolio_backtester/analytics/optimization.py
================================================
"""Walk-forward optimization and parameter grid sweep."""
from __future__ import annotations
import itertools
from concurrent.futures import ProcessPoolExecutor, as_completed
from dataclasses import dataclass
from typing import Any, Callable
import pandas as pd
import numpy as np
from options_portfolio_backtester.analytics.stats import BacktestStats
@dataclass
class OptimizationResult:
"""Result of a single parameter combination."""
params: dict[str, Any]
stats: BacktestStats
balance: pd.DataFrame
def grid_sweep(
run_fn: Callable[..., tuple[BacktestStats, pd.DataFrame]],
param_grid: dict[str, list[Any]],
max_workers: int | None = None,
) -> list[OptimizationResult]:
"""Run a parameter grid sweep using parallel execution.
Args:
run_fn: Function that takes **params and returns (BacktestStats, balance).
param_grid: Dict mapping parameter names to lists of values.
max_workers: Number of parallel workers (None = CPU count).
Returns:
List of OptimizationResult, sorted by Sharpe ratio descending.
"""
keys = list(param_grid.keys())
combos = list(itertools.product(*param_grid.values()))
results: list[OptimizationResult] = []
with ProcessPoolExecutor(max_workers=max_workers) as executor:
futures = {}
for combo in combos:
params = dict(zip(keys, combo))
future = executor.submit(run_fn, **params)
futures[future] = params
for future in as_completed(futures):
params = futures[future]
try:
stats, balance = future.result()
results.append(OptimizationResult(
params=params, stats=stats, balance=balance,
))
except Exception:
continue
results.sort(key=lambda r: r.stats.sharpe_ratio, reverse=True)
return results
def walk_forward(
run_fn: Callable[[pd.Timestamp, pd.Timestamp], tuple[BacktestStats, pd.DataFrame]],
dates: pd.DatetimeIndex,
in_sample_pct: float = 0.70,
n_splits: int = 5,
) -> list[tuple[OptimizationResult, OptimizationResult]]:
"""Walk-forward analysis with rolling in-sample/out-of-sample splits.
Args:
run_fn: Function that takes (start_date, end_date) and returns (stats, balance).
dates: Full date range.
in_sample_pct: Fraction of each window used for in-sample.
n_splits: Number of walk-forward windows.
Returns:
List of (in_sample_result, out_of_sample_result) tuples.
"""
total = len(dates)
window_size = total // n_splits
results = []
for i in range(n_splits):
start_idx = i * window_size
end_idx = min(start_idx + window_size, total)
split_idx = start_idx + int((end_idx - start_idx) * in_sample_pct)
is_start = dates[start_idx]
is_end = dates[split_idx - 1]
oos_start = dates[split_idx]
oos_end = dates[end_idx - 1]
try:
is_stats, is_balance = run_fn(is_start, is_end)
oos_stats, oos_balance = run_fn(oos_start, oos_end)
results.append((
OptimizationResult(params={"split": i, "type": "in_sample"},
stats=is_stats, balance=is_balance),
OptimizationResult(params={"split": i, "type": "out_of_sample"},
stats=oos_stats, balance=oos_balance),
))
except Exception:
continue
return results
def rust_grid_sweep(
options_data,
stocks_data,
base_config: dict,
schema_mapping: dict,
param_overrides: list[dict],
n_workers: int | None = None,
) -> list[dict]:
"""Run a parallel grid sweep using the Rust backtest engine.
Each dict in param_overrides can contain:
- "label": str
- "profit_pct": Optional[float]
- "loss_pct": Optional[float]
- "rebalance_dates": Optional[list[str]]
- "leg_entry_filters": Optional[list[Optional[str]]]
- "leg_exit_filters": Optional[list[Optional[str]]]
Returns list of result dicts sorted by sharpe_ratio descending.
"""
from options_portfolio_backtester._ob_rust import parallel_sweep
results = parallel_sweep(
options_data, stocks_data,
base_config, schema_mapping,
param_overrides, n_workers,
)
results.sort(key=lambda r: r.get("sharpe_ratio", 0.0), reverse=True)
return results
================================================
FILE: options_portfolio_backtester/analytics/stats.py
================================================
"""BacktestStats — comprehensive analytics matching and exceeding bt/ffn.
Provides:
- Trade stats: profit factor, win rate, largest win/loss
- Return stats: total, annualized, Sharpe, Sortino, Calmar
- Risk stats: max drawdown, drawdown duration, volatility, tail ratio
- Period stats: monthly/yearly Sharpe, Sortino, mean, vol, skew, kurtosis
- Extreme analysis: best/worst day, month, year
- Lookback returns: MTD, 3M, 6M, YTD, 1Y, 3Y, 5Y, 10Y
- Portfolio metrics: turnover, Herfindahl concentration index
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
import numpy as np
import pandas as pd
from options_portfolio_backtester._ob_rust import compute_full_stats
@dataclass
class PeriodStats:
"""Stats for a specific return frequency (daily, monthly, yearly)."""
mean: float = 0.0
vol: float = 0.0
sharpe: float = 0.0
sortino: float = 0.0
skew: float = 0.0
kurtosis: float = 0.0
best: float = 0.0
worst: float = 0.0
@dataclass
class LookbackReturns:
"""Trailing period returns as of the end date."""
mtd: float | None = None
three_month: float | None = None
six_month: float | None = None
ytd: float | None = None
one_year: float | None = None
three_year: float | None = None
five_year: float | None = None
ten_year: float | None = None
@dataclass
class BacktestStats:
"""Comprehensive backtest statistics."""
# Trade stats
total_trades: int = 0
wins: int = 0
losses: int = 0
win_pct: float = 0.0
profit_factor: float = 0.0
largest_win: float = 0.0
largest_loss: float = 0.0
avg_win: float = 0.0
avg_loss: float = 0.0
avg_trade: float = 0.0
# Return stats
total_return: float = 0.0
annualized_return: float = 0.0
sharpe_ratio: float = 0.0
sortino_ratio: float = 0.0
calmar_ratio: float = 0.0
# Risk stats
max_drawdown: float = 0.0
max_drawdown_duration: int = 0
avg_drawdown: float = 0.0
avg_drawdown_duration: int = 0
volatility: float = 0.0
tail_ratio: float = 0.0
# Period stats
daily: PeriodStats = field(default_factory=PeriodStats)
monthly: PeriodStats = field(default_factory=PeriodStats)
yearly: PeriodStats = field(default_factory=PeriodStats)
# Lookback
lookback: LookbackReturns = field(default_factory=LookbackReturns)
# Portfolio metrics
turnover: float = 0.0
herfindahl: float = 0.0
@classmethod
def from_balance_range(
cls,
balance: pd.DataFrame,
start: str | pd.Timestamp | None = None,
end: str | pd.Timestamp | None = None,
**kwargs,
) -> "BacktestStats":
"""Slice balance to [start, end] and recompute all stats."""
if balance.empty:
return cls()
b = balance.copy()
if start:
b = b.loc[pd.Timestamp(start):]
if end:
b = b.loc[:pd.Timestamp(end)]
if b.empty:
return cls()
b["% change"] = b["total capital"].pct_change()
return cls.from_balance(b, **kwargs)
@classmethod
def from_balance(
cls,
balance: pd.DataFrame,
trade_pnls: np.ndarray | None = None,
risk_free_rate: float = 0.0,
) -> "BacktestStats":
"""Compute stats from a balance DataFrame and optional trade P&Ls."""
if balance.empty:
return cls()
total_capital = balance["total capital"].values.astype(np.float64)
timestamps_ns = balance.index.values.astype("datetime64[ns]").view("int64").astype(np.int64).tolist()
pnls = trade_pnls.astype(np.float64).tolist() if trade_pnls is not None else []
# Build stock weight matrix
stock_cols = [c for c in balance.columns if f"{c} qty" in balance.columns]
if stock_cols:
total = balance["total capital"].values
with np.errstate(divide="ignore", invalid="ignore"):
weights = balance[stock_cols].values / total[:, None]
weights = np.nan_to_num(weights, 0.0).astype(np.float64)
flat_weights = weights.ravel().tolist()
n_stocks = len(stock_cols)
else:
flat_weights = []
n_stocks = 0
d = compute_full_stats(
total_capital.tolist(),
timestamps_ns,
pnls,
flat_weights,
n_stocks,
risk_free_rate,
)
stats = cls()
# Scalars
for attr in (
"total_trades", "wins", "losses", "win_pct", "profit_factor",
"largest_win", "largest_loss", "avg_win", "avg_loss", "avg_trade",
"total_return", "annualized_return", "sharpe_ratio", "sortino_ratio",
"calmar_ratio", "max_drawdown", "max_drawdown_duration",
"avg_drawdown", "avg_drawdown_duration", "volatility", "tail_ratio",
"turnover", "herfindahl",
):
setattr(stats, attr, d[attr])
# Period stats
for period_name in ("daily", "monthly", "yearly"):
pd_dict = d[period_name]
setattr(stats, period_name, PeriodStats(
mean=pd_dict["mean"], vol=pd_dict["vol"],
sharpe=pd_dict["sharpe"], sortino=pd_dict["sortino"],
skew=pd_dict["skew"], kurtosis=pd_dict["kurtosis"],
best=pd_dict["best"], worst=pd_dict["worst"],
))
# Lookback
lb = d["lookback"]
stats.lookback = LookbackReturns(
mtd=lb["mtd"], three_month=lb["three_month"],
six_month=lb["six_month"], ytd=lb["ytd"],
one_year=lb["one_year"], three_year=lb["three_year"],
five_year=lb["five_year"], ten_year=lb["ten_year"],
)
return stats
def to_dataframe(self) -> pd.DataFrame:
"""Return stats as a styled DataFrame."""
data = {
"Total trades": self.total_trades,
"Wins": self.wins,
"Losses": self.losses,
"Win %": self.win_pct,
"Profit factor": self.profit_factor,
"Largest win": self.largest_win,
"Largest loss": self.largest_loss,
"Avg win": self.avg_win,
"Avg loss": self.avg_loss,
"Avg trade": self.avg_trade,
"Total return": self.total_return,
"Annualized return": self.annualized_return,
"Sharpe ratio": self.sharpe_ratio,
"Sortino ratio": self.sortino_ratio,
"Calmar ratio": self.calmar_ratio,
"Max drawdown": self.max_drawdown,
"Max DD duration (days)": self.max_drawdown_duration,
"Avg drawdown": self.avg_drawdown,
"Avg DD duration (days)": self.avg_drawdown_duration,
"Volatility": self.volatility,
"Tail ratio": self.tail_ratio,
# Daily
"Daily mean": self.daily.mean,
"Daily vol": self.daily.vol,
"Daily Sharpe": self.daily.sharpe,
"Daily Sortino": self.daily.sortino,
"Daily skew": self.daily.skew,
"Daily kurtosis": self.daily.kurtosis,
"Best day": self.daily.best,
"Worst day": self.daily.worst,
# Monthly
"Monthly mean": self.monthly.mean,
"Monthly vol": self.monthly.vol,
"Monthly Sharpe": self.monthly.sharpe,
"Monthly Sortino": self.monthly.sortino,
"Monthly skew": self.monthly.skew,
"Monthly kurtosis": self.monthly.kurtosis,
"Best month": self.monthly.best,
"Worst month": self.monthly.worst,
# Yearly
"Yearly mean": self.yearly.mean,
"Yearly vol": self.yearly.vol,
"Yearly Sharpe": self.yearly.sharpe,
"Yearly Sortino": self.yearly.sortino,
"Best year": self.yearly.best,
"Worst year": self.yearly.worst,
# Portfolio
"Turnover": self.turnover,
"Herfindahl index": self.herfindahl,
}
# Add lookback returns (skip None values)
lb = self.lookback
for label, val in [
("MTD", lb.mtd), ("3M return", lb.three_month),
("6M return", lb.six_month), ("YTD", lb.ytd),
("1Y return", lb.one_year), ("3Y return", lb.three_year),
("5Y return", lb.five_year), ("10Y return", lb.ten_year),
]:
if val is not None:
data[label] = val
return pd.DataFrame(
list(data.values()), index=list(data.keys()), columns=["Value"]
)
def summary(self) -> str:
"""Return a formatted text summary."""
lines = [
f"Total Return: {self.total_return:>10.2%}",
f"Annualized Return: {self.annualized_return:>10.2%}",
f"Sharpe Ratio: {self.sharpe_ratio:>10.2f}",
f"Sortino Ratio: {self.sortino_ratio:>10.2f}",
f"Max Drawdown: {self.max_drawdown:>10.2%}",
f"Max DD Duration: {self.max_drawdown_duration:>10d} days",
f"Calmar Ratio: {self.calmar_ratio:>10.2f}",
f"Profit Factor: {self.profit_factor:>10.2f}",
f"Win Rate: {self.win_pct:>10.1f}%",
f"Total Trades: {self.total_trades:>10d}",
]
if self.monthly.sharpe != 0:
lines.append(f"Monthly Sharpe: {self.monthly.sharpe:>10.2f}")
if self.monthly.best != 0:
lines.append(f"Best Month: {self.monthly.best:>10.2%}")
lines.append(f"Worst Month: {self.monthly.worst:>10.2%}")
if self.turnover != 0:
lines.append(f"Turnover: {self.turnover:>10.2%}")
return "\n".join(lines)
def lookback_table(self) -> pd.DataFrame:
"""Lookback returns as a single-row DataFrame."""
lb = self.lookback
data = {}
for label, val in [
("MTD", lb.mtd), ("3M", lb.three_month), ("6M", lb.six_month),
("YTD", lb.ytd), ("1Y", lb.one_year), ("3Y", lb.three_year),
("5Y", lb.five_year), ("10Y", lb.ten_year),
]:
if val is not None:
data[label] = val
if not data:
return pd.DataFrame()
return pd.DataFrame([data])
================================================
FILE: options_portfolio_backtester/analytics/summary.py
================================================
"""Summary statistics for trade logs."""
from __future__ import annotations
import numpy as np
import pandas as pd
from options_portfolio_backtester.core.types import Order
def summary(trade_log: pd.DataFrame, balance: pd.DataFrame) -> pd.io.formats.style.Styler:
"""Returns a table with summary statistics about the trade log"""
initial_capital: float = balance['total capital'].iloc[0]
trade_log.loc[:,
('totals',
'capital')] = (-trade_log['totals']['cost'] * trade_log['totals']['qty']).cumsum() + initial_capital
daily_returns: pd.Series = balance['% change'] * 100
first_leg: str = trade_log.columns.levels[0][0]
## Not sure of a better way to to this, just doing `Order` or `@Order` inside
## the .eval(...) does not seem to work.
order_bto = Order.BTO
order_sto = Order.STO
entry_mask: pd.Series = trade_log[first_leg].eval('(order == @order_bto) | (order == @order_sto)')
entries: pd.DataFrame = trade_log.loc[entry_mask]
exits: pd.DataFrame = trade_log.loc[~entry_mask]
costs: np.ndarray = np.array([])
for contract in entries[first_leg]['contract']:
entry = entries.loc[entries[first_leg]['contract'] == contract]
exit_ = exits.loc[exits[first_leg]['contract'] == contract]
try:
# Here we assume we are entering only once per contract (i.e both entry and exit_ have only one row)
costs = np.append(costs, (entry['totals']['cost'] * entry['totals']['qty']).values[0] +
(exit_['totals']['cost'] * exit_['totals']['qty']).values[0])
except IndexError:
continue
wins: np.ndarray = costs < 0
losses: np.ndarray = costs >= 0
total_trades: int = len(exits)
win_number: int = int(np.sum(wins))
loss_number: int = total_trades - win_number
win_pct: float = (win_number / total_trades) * 100 if total_trades > 0 else 0
profit_factor: float = np.sum(wins) / np.sum(losses) if np.sum(losses) > 0 else 0
largest_loss: float = max(0, np.max(costs)) if len(costs) > 0 else 0
avg_profit: float = np.mean(-costs) if len(costs) > 0 else 0
avg_pl: float = np.mean(daily_returns)
total_pl: float = (trade_log['totals']['capital'].iloc[-1] / initial_capital) * 100
data = [total_trades, win_number, loss_number, win_pct, largest_loss, profit_factor, avg_profit, avg_pl, total_pl]
stats = [
'Total trades', 'Number of wins', 'Number of losses', 'Win %', 'Largest loss', 'Profit factor',
'Average profit', 'Average P&L %', 'Total P&L %'
]
strat = ['Strategy']
summary_df = pd.DataFrame(data, stats, strat)
formatters: dict[str, str] = {
"Total trades": "{:.0f}",
"Number of wins": "{:.0f}",
"Number of losses": "{:.0f}",
"Win %": "{:.2f}%",
"Largest loss": "${:.2f}",
"Profit factor": "{:.2f}",
"Average profit": "${:.2f}",
"Average P&L %": "{:.2f}%",
"Total P&L %": "{:.2f}%"
}
styled = summary_df.style
for row_label, fmt in formatters.items():
styled = styled.format(fmt, subset=pd.IndexSlice[row_label, :])
return styled
================================================
FILE: options_portfolio_backtester/analytics/tearsheet.py
================================================
"""Simple tearsheet-style report helpers."""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
import numpy as np
import pandas as pd
from options_portfolio_backtester.analytics.stats import BacktestStats
@dataclass
class TearsheetReport:
"""Container for common report artifacts."""
stats: BacktestStats
stats_table: pd.DataFrame
monthly_returns: pd.DataFrame
drawdown_series: pd.Series
def to_dict(self) -> dict[str, object]:
return {
"stats": self.stats,
"stats_table": self.stats_table,
"monthly_returns": self.monthly_returns,
"drawdown_series": self.drawdown_series,
}
def to_csv(self, directory: str | Path) -> dict[str, Path]:
out_dir = Path(directory)
out_dir.mkdir(parents=True, exist_ok=True)
stats_path = out_dir / "stats_table.csv"
monthly_path = out_dir / "monthly_returns.csv"
drawdown_path = out_dir / "drawdown_series.csv"
self.stats_table.to_csv(stats_path)
self.monthly_returns.to_csv(monthly_path)
self.drawdown_series.rename("drawdown").to_frame().to_csv(drawdown_path)
return {
"stats_table": stats_path,
"monthly_returns": monthly_path,
"drawdown_series": drawdown_path,
}
def to_markdown(self) -> str:
lines = ["# Tearsheet", "", "## Summary", ""]
try:
lines.extend(self.stats_table.to_markdown().splitlines())
except Exception:
lines.extend(self.stats_table.to_string().splitlines())
lines.extend(["", "## Monthly Returns", ""])
if self.monthly_returns.empty:
lines.append("_No monthly returns available._")
else:
try:
lines.extend(self.monthly_returns.to_markdown().splitlines())
except Exception:
lines.extend(self.monthly_returns.to_string().splitlines())
return "\n".join(lines)
def to_html(self) -> str:
summary = self.stats_table.to_html(classes="stats-table")
monthly = (
self.monthly_returns.to_html(classes="monthly-returns")
if not self.monthly_returns.empty
else "
No monthly returns available.
"
)
return (
"Tearsheet"
"Tearsheet
"
"Summary
"
f"{summary}"
"Monthly Returns
"
f"{monthly}"
""
)
def monthly_return_table(balance: pd.DataFrame) -> pd.DataFrame:
if balance.empty or "% change" not in balance.columns:
return pd.DataFrame()
rets = balance["% change"].dropna()
if rets.empty:
return pd.DataFrame()
monthly = (1.0 + rets).groupby(pd.Grouper(freq="ME")).prod() - 1.0
out = monthly.to_frame(name="return")
out["year"] = out.index.year
out["month"] = out.index.month
return out.pivot(index="year", columns="month", values="return").sort_index()
def drawdown_series(balance: pd.DataFrame) -> pd.Series:
if balance.empty or "total capital" not in balance.columns:
return pd.Series(dtype=float)
total = balance["total capital"].dropna()
if total.empty:
return pd.Series(dtype=float)
peak = total.cummax()
return (total - peak) / peak
def build_tearsheet(
balance: pd.DataFrame,
trade_pnls=None,
risk_free_rate: float = 0.0,
) -> TearsheetReport:
trade_arr = None if trade_pnls is None else np.asarray(trade_pnls, dtype=float)
stats = BacktestStats.from_balance(balance, trade_pnls=trade_arr, risk_free_rate=risk_free_rate)
table = stats.to_dataframe()
monthly = monthly_return_table(balance)
dd = drawdown_series(balance)
return TearsheetReport(
stats=stats,
stats_table=table,
monthly_returns=monthly,
drawdown_series=dd,
)
================================================
FILE: options_portfolio_backtester/analytics/trade_log.py
================================================
"""Structured trade log — replaces MultiIndex trade log with per-trade P&L."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
import pandas as pd
import numpy as np
from options_portfolio_backtester.core.types import Order
@dataclass
class Trade:
"""A single round-trip trade (entry + exit)."""
contract: str
underlying: str
option_type: str
strike: float
entry_date: Any
exit_date: Any
entry_price: float
exit_price: float
quantity: int
shares_per_contract: int
entry_order: Order
exit_order: Order
entry_commission: float = 0.0
exit_commission: float = 0.0
@property
def gross_pnl(self) -> float:
"""P&L before commissions."""
return (self.exit_price - self.entry_price) * self.quantity * self.shares_per_contract
@property
def net_pnl(self) -> float:
"""P&L after commissions."""
return self.gross_pnl - self.entry_commission - self.exit_commission
@property
def return_pct(self) -> float:
"""Return as percentage of entry cost."""
entry_cost = abs(self.entry_price * self.quantity * self.shares_per_contract)
if entry_cost == 0:
return 0.0
return self.net_pnl / entry_cost
class TradeLog:
"""Structured collection of round-trip trades with analysis methods."""
def __init__(self) -> None:
self.trades: list[Trade] = []
def add_trade(self, trade: Trade) -> None:
self.trades.append(trade)
@classmethod
def from_legacy_trade_log(cls, trade_log: pd.DataFrame,
shares_per_contract: int = 100) -> "TradeLog":
"""Build a TradeLog from the legacy MultiIndex trade_log DataFrame."""
tl = cls()
if trade_log.empty:
return tl
first_leg: str = trade_log.columns.levels[0][0]
order_bto = Order.BTO
order_sto = Order.STO
entry_mask = trade_log[first_leg].eval(
"(order == @order_bto) | (order == @order_sto)"
)
entries = trade_log.loc[entry_mask]
exits = trade_log.loc[~entry_mask]
for contract in entries[first_leg]["contract"]:
entry = entries.loc[entries[first_leg]["contract"] == contract]
exit_ = exits.loc[exits[first_leg]["contract"] == contract]
if entry.empty or exit_.empty:
continue
try:
e_row = entry.iloc[0]
x_row = exit_.iloc[0]
trade = Trade(
contract=contract,
underlying=e_row[first_leg]["underlying"],
option_type=e_row[first_leg]["type"],
strike=e_row[first_leg]["strike"],
entry_date=e_row["totals"]["date"],
exit_date=x_row["totals"]["date"],
entry_price=abs(e_row[first_leg]["cost"]) / shares_per_contract,
exit_price=abs(x_row[first_leg]["cost"]) / shares_per_contract,
quantity=int(e_row["totals"]["qty"]),
shares_per_contract=shares_per_contract,
entry_order=e_row[first_leg]["order"],
exit_order=x_row[first_leg]["order"],
)
tl.add_trade(trade)
except (IndexError, KeyError):
continue
return tl
def to_dataframe(self) -> pd.DataFrame:
"""Convert to a flat DataFrame for analysis."""
if not self.trades:
return pd.DataFrame()
rows = []
for t in self.trades:
rows.append({
"contract": t.contract,
"underlying": t.underlying,
"type": t.option_type,
"strike": t.strike,
"entry_date": t.entry_date,
"exit_date": t.exit_date,
"entry_price": t.entry_price,
"exit_price": t.exit_price,
"quantity": t.quantity,
"gross_pnl": t.gross_pnl,
"net_pnl": t.net_pnl,
"return_pct": t.return_pct,
"entry_commission": t.entry_commission,
"exit_commission": t.exit_commission,
})
return pd.DataFrame(rows)
@property
def net_pnls(self) -> np.ndarray:
return np.array([t.net_pnl for t in self.trades])
@property
def winners(self) -> list[Trade]:
return [t for t in self.trades if t.net_pnl > 0]
@property
def losers(self) -> list[Trade]:
return [t for t in self.trades if t.net_pnl <= 0]
def __len__(self) -> int:
return len(self.trades)
================================================
FILE: options_portfolio_backtester/convexity/__init__.py
================================================
"""Convexity scanner: cross-asset tail protection scoring and allocation."""
from options_portfolio_backtester.convexity.allocator import (
allocate_equal_weight,
allocate_inverse_vol,
pick_cheapest,
)
from options_portfolio_backtester.convexity.backtest import (
BacktestResult,
run_backtest,
run_unhedged,
)
from options_portfolio_backtester.convexity.config import (
BacktestConfig,
InstrumentConfig,
default_config,
)
from options_portfolio_backtester.convexity.scoring import compute_convexity_scores
__all__ = [
"InstrumentConfig",
"BacktestConfig",
"default_config",
"compute_convexity_scores",
"BacktestResult",
"run_backtest",
"run_unhedged",
"pick_cheapest",
"allocate_equal_weight",
"allocate_inverse_vol",
]
================================================
FILE: options_portfolio_backtester/convexity/_utils.py
================================================
"""Shared utilities for the convexity module."""
from __future__ import annotations
import numpy as np
import pandas as pd
def _to_ns(series: pd.Series) -> np.ndarray:
"""Convert a datetime Series to int64 nanosecond timestamps."""
return series.values.astype("datetime64[ns]").view("int64").astype(np.int64)
================================================
FILE: options_portfolio_backtester/convexity/allocator.py
================================================
"""Allocation strategies: pick which instrument(s) to hedge."""
from __future__ import annotations
def pick_cheapest(scores: dict[str, float]) -> str:
"""Pick the instrument with the highest convexity ratio."""
if not scores:
raise ValueError("No scores to pick from")
return max(scores, key=scores.get)
def allocate_equal_weight(symbols: list[str], budget: float) -> dict[str, float]:
"""Split budget equally across all instruments."""
if not symbols:
return {}
per_symbol = budget / len(symbols)
return {s: per_symbol for s in symbols}
def allocate_inverse_vol(vol_map: dict[str, float], budget: float) -> dict[str, float]:
"""Allocate more to lower-volatility instruments.
Weight is proportional to 1/vol, normalized to sum to budget.
"""
if not vol_map:
return {}
inv_vols = {}
for sym, vol in vol_map.items():
if vol > 0:
inv_vols[sym] = 1.0 / vol
if not inv_vols:
return allocate_equal_weight(list(vol_map.keys()), budget)
total_inv_vol = sum(inv_vols.values())
return {sym: (iv / total_inv_vol) * budget for sym, iv in inv_vols.items()}
================================================
FILE: options_portfolio_backtester/convexity/backtest.py
================================================
"""Backtest: run the monthly rebalance loop via Rust backend."""
from __future__ import annotations
import logging
from dataclasses import dataclass
import numpy as np
import pandas as pd
from .config import BacktestConfig
log = logging.getLogger(__name__)
def _to_ns(series: pd.Series) -> np.ndarray:
"""Convert a datetime Series to int64 nanosecond timestamps."""
return series.values.astype("datetime64[ns]").view("int64").astype(np.int64)
@dataclass
class BacktestResult:
"""Results from a single-instrument backtest."""
records: pd.DataFrame # monthly rebalance records
daily_balance: pd.DataFrame # daily portfolio values
config: BacktestConfig
def run_backtest(
options_data,
stocks_data,
config: BacktestConfig,
) -> BacktestResult:
"""Run the full backtest: monthly put overlay on equity portfolio.
Takes HistoricalOptionsData and TiingoData from options_backtester.
"""
from options_portfolio_backtester._ob_rust import run_convexity_backtest
opt_df = options_data._data
puts = opt_df[opt_df["type"] == "put"].sort_values("quotedate")
stk_df = stocks_data._data.sort_values("date")
if puts.empty or stk_df.empty:
empty_records = pd.DataFrame()
empty_daily = pd.DataFrame()
return BacktestResult(records=empty_records, daily_balance=empty_daily, config=config)
result = run_convexity_backtest(
put_dates_ns=_to_ns(puts["quotedate"]),
put_expirations_ns=_to_ns(puts["expiration"]),
put_strikes=puts["strike"].values.astype(np.float64),
put_bids=puts["bid"].values.astype(np.float64),
put_asks=puts["ask"].values.astype(np.float64),
put_deltas=puts["delta"].values.astype(np.float64),
put_underlying=puts["underlying_last"].values.astype(np.float64),
put_dtes=puts["dte"].values.astype(np.int32),
put_ivs=puts["impliedvol"].values.astype(np.float64),
stock_dates_ns=_to_ns(stk_df["date"]),
stock_prices=stk_df["adjClose"].values.astype(np.float64),
initial_capital=config.initial_capital,
budget_pct=config.budget_pct,
target_delta=config.target_delta,
dte_min=config.dte_min,
dte_max=config.dte_max,
tail_drop=config.tail_drop,
)
# Build monthly records DataFrame
rec = result["records"]
records = pd.DataFrame(
{
"date": pd.to_datetime(rec["dates_ns"], unit="ns"),
"shares": rec["shares"],
"stock_price": rec["stock_prices"],
"equity_value": rec["equity_values"],
"put_cost": rec["put_costs"],
"put_exit_value": rec["put_exit_values"],
"put_pnl": rec["put_pnls"],
"portfolio_value": rec["portfolio_values"],
"convexity_ratio": rec["convexity_ratios"],
"strike": rec["strikes"],
"contracts": rec["contracts"],
}
).set_index("date")
# Build daily balance DataFrame
daily = pd.DataFrame(
{
"date": pd.to_datetime(result["daily_dates_ns"], unit="ns"),
"balance": result["daily_balances"],
}
).set_index("date")
daily["pct_change"] = daily["balance"].pct_change()
log.info(
"Backtest: %d months, final value $%.0f (started $%.0f)",
len(records),
daily["balance"].iloc[-1] if len(daily) > 0 else 0,
config.initial_capital,
)
return BacktestResult(records=records, daily_balance=daily, config=config)
def run_unhedged(stocks_data, config: BacktestConfig) -> pd.DataFrame:
"""Run unhedged equity-only benchmark. Returns daily balance DataFrame."""
stk_df = stocks_data._data.sort_values("date")
if stk_df.empty:
return pd.DataFrame()
prices = stk_df["adjClose"].values.astype(np.float64)
dates = stk_df["date"]
initial_shares = config.initial_capital / prices[0]
daily_balance = initial_shares * prices
df = pd.DataFrame({"date": dates, "balance": daily_balance}).set_index("date")
df["pct_change"] = df["balance"].pct_change()
return df
================================================
FILE: options_portfolio_backtester/convexity/config.py
================================================
"""Configuration: instrument registry and backtest parameters."""
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass(frozen=True)
class InstrumentConfig:
"""Configuration for a single instrument."""
symbol: str
options_file: str
stocks_file: str
target_delta: float = -0.10
dte_min: int = 14
dte_max: int = 60
tail_drop: float = 0.20
@dataclass(frozen=True)
class BacktestConfig:
"""Global backtest parameters."""
initial_capital: float = 1_000_000.0
budget_pct: float = 0.005 # 0.5% of portfolio per month on puts
target_delta: float = -0.10
dte_min: int = 14
dte_max: int = 60
tail_drop: float = 0.20
instruments: list[InstrumentConfig] = field(default_factory=list)
def default_config(
options_file: str = "data/processed/options.csv",
stocks_file: str = "data/processed/stocks.csv",
) -> BacktestConfig:
"""Default config with SPY only."""
spy = InstrumentConfig(
symbol="SPY",
options_file=options_file,
stocks_file=stocks_file,
)
return BacktestConfig(instruments=[spy])
================================================
FILE: options_portfolio_backtester/convexity/scoring.py
================================================
"""Scoring: compute convexity ratios via Rust backend."""
from __future__ import annotations
import logging
import numpy as np
import pandas as pd
from .config import BacktestConfig
log = logging.getLogger(__name__)
def _to_ns(series: pd.Series) -> np.ndarray:
"""Convert a datetime Series to int64 nanosecond timestamps."""
return series.values.astype("datetime64[ns]").view("int64").astype(np.int64)
def compute_convexity_scores(
options_data,
config: BacktestConfig,
) -> pd.DataFrame:
"""Compute daily convexity ratio scores for an instrument.
Takes an HistoricalOptionsData object from options_backtester and
returns a DataFrame indexed by date with convexity_ratio and supporting fields.
"""
from options_portfolio_backtester._ob_rust import compute_daily_scores
df = options_data._data
puts = df[df["type"] == "put"].sort_values("quotedate")
if puts.empty:
return pd.DataFrame()
result = compute_daily_scores(
dates_ns=_to_ns(puts["quotedate"]),
strikes=puts["strike"].values.astype(np.float64),
bids=puts["bid"].values.astype(np.float64),
asks=puts["ask"].values.astype(np.float64),
deltas=puts["delta"].values.astype(np.float64),
underlying_prices=puts["underlying_last"].values.astype(np.float64),
dtes=puts["dte"].values.astype(np.int32),
implied_vols=puts["impliedvol"].values.astype(np.float64),
target_delta=config.target_delta,
dte_min=config.dte_min,
dte_max=config.dte_max,
tail_drop=config.tail_drop,
)
scores = pd.DataFrame(
{
"date": pd.to_datetime(result["dates_ns"], unit="ns"),
"convexity_ratio": result["convexity_ratios"],
"strike": result["strikes"],
"ask": result["asks"],
"bid": result["bids"],
"delta": result["deltas"],
"underlying_price": result["underlying_prices"],
"implied_vol": result["implied_vols"],
"dte": result["dtes"],
"annual_cost": result["annual_costs"],
"tail_payoff": result["tail_payoffs"],
}
).set_index("date")
log.info("Computed %d daily scores (%.1f years)", len(scores), len(scores) / 252)
return scores
================================================
FILE: options_portfolio_backtester/convexity/viz.py
================================================
"""Visualization: Altair charts for scores, allocations, and P&L."""
from __future__ import annotations
import altair as alt
import pandas as pd
def convexity_scores_chart(scores_df: pd.DataFrame) -> alt.Chart:
"""Line chart of daily convexity ratios over time."""
data = scores_df.reset_index()
return (
alt.Chart(data)
.mark_line()
.encode(
x=alt.X("date:T", title="Date"),
y=alt.Y("convexity_ratio:Q", title="Convexity Ratio"),
tooltip=["date:T", "convexity_ratio:Q", "strike:Q", "underlying_price:Q", "implied_vol:Q"],
)
.properties(title="Daily Convexity Ratio", width=800, height=300)
)
def monthly_pnl_chart(records: pd.DataFrame) -> alt.Chart:
"""Bar chart of monthly put P&L."""
data = records.reset_index()
return (
alt.Chart(data)
.mark_bar()
.encode(
x=alt.X("date:T", title="Date"),
y=alt.Y("put_pnl:Q", title="Put P&L ($)"),
color=alt.condition(
alt.datum.put_pnl > 0,
alt.value("steelblue"),
alt.value("salmon"),
),
tooltip=["date:T", "put_pnl:Q", "put_cost:Q", "put_exit_value:Q", "strike:Q", "contracts:Q"],
)
.properties(title="Monthly Put P&L", width=800, height=200)
)
def cumulative_pnl_chart(results: dict[str, pd.DataFrame]) -> alt.Chart:
"""Cumulative portfolio value for multiple strategies."""
frames = []
for name, daily_df in results.items():
df = daily_df[["balance"]].copy()
df["strategy"] = name
frames.append(df.reset_index())
if not frames:
return alt.Chart(pd.DataFrame()).mark_line()
data = pd.concat(frames, ignore_index=True)
return (
alt.Chart(data)
.mark_line()
.encode(
x=alt.X("date:T", title="Date"),
y=alt.Y("balance:Q", title="Portfolio Value ($)", scale=alt.Scale(zero=False)),
color=alt.Color("strategy:N", title="Strategy"),
tooltip=["date:T", "balance:Q", "strategy:N"],
)
.properties(title="Cumulative Portfolio Value", width=800, height=400)
)
================================================
FILE: options_portfolio_backtester/core/__init__.py
================================================
================================================
FILE: options_portfolio_backtester/core/types.py
================================================
"""Core domain types for options backtesting.
Direction is decoupled from column names — use Direction.price_column instead of
Direction.value to get the DataFrame column for pricing.
"""
from __future__ import annotations
from collections import namedtuple
from dataclasses import dataclass, field
from enum import Enum
from typing import Any
# ---------------------------------------------------------------------------
# Enums
# ---------------------------------------------------------------------------
class OptionType(Enum):
CALL = "call"
PUT = "put"
def __invert__(self) -> OptionType:
return OptionType.PUT if self == OptionType.CALL else OptionType.CALL
class Direction(Enum):
"""Trade direction. price_column gives the DataFrame column name."""
BUY = "buy"
SELL = "sell"
@property
def price_column(self) -> str:
"""Column name used for trade execution pricing."""
return "ask" if self == Direction.BUY else "bid"
def __invert__(self) -> Direction:
return Direction.SELL if self == Direction.BUY else Direction.BUY
class Signal(Enum):
ENTRY = "entry"
EXIT = "exit"
class Order(Enum):
BTO = "BTO" # Buy to Open
BTC = "BTC" # Buy to Close
STO = "STO" # Sell to Open
STC = "STC" # Sell to Close
def __invert__(self) -> Order:
_inv = {Order.BTO: Order.STC, Order.STC: Order.BTO,
Order.STO: Order.BTC, Order.BTC: Order.STO}
return _inv[self]
def get_order(direction: Direction, signal: Signal) -> Order:
"""Map (direction, signal) to the appropriate Order type."""
if direction == Direction.BUY:
return Order.BTO if signal == Signal.ENTRY else Order.STC
return Order.STO if signal == Signal.ENTRY else Order.BTC
# ---------------------------------------------------------------------------
# Value objects
# ---------------------------------------------------------------------------
@dataclass(frozen=True, slots=True)
class Greeks:
"""Option Greeks for a single contract or aggregated position.
Supports addition (aggregation) and scalar multiplication (scaling by qty).
"""
delta: float = 0.0
gamma: float = 0.0
theta: float = 0.0
vega: float = 0.0
def __add__(self, other: Greeks) -> Greeks:
return Greeks(
delta=self.delta + other.delta,
gamma=self.gamma + other.gamma,
theta=self.theta + other.theta,
vega=self.vega + other.vega,
)
def __mul__(self, scalar: float) -> Greeks:
return Greeks(
delta=self.delta * scalar,
gamma=self.gamma * scalar,
theta=self.theta * scalar,
vega=self.vega * scalar,
)
def __rmul__(self, scalar: float) -> Greeks:
return self.__mul__(scalar)
def __neg__(self) -> Greeks:
return self * -1.0
@property
def as_dict(self) -> dict[str, float]:
return {"delta": self.delta, "gamma": self.gamma,
"theta": self.theta, "vega": self.vega}
@dataclass(frozen=True, slots=True)
class Fill:
"""A single execution fill.
Captures price, quantity, commission, slippage, and computes notional.
"""
price: float
quantity: int
direction: Direction
shares_per_contract: int = 100
commission: float = 0.0
slippage: float = 0.0
@property
def direction_sign(self) -> int:
return -1 if self.direction == Direction.BUY else 1
@property
def notional(self) -> float:
"""Net cash impact: direction_sign * (price * qty * spc) - commission - slippage."""
raw = self.direction_sign * self.price * self.quantity * self.shares_per_contract
return raw - self.commission - self.slippage
@dataclass(frozen=True, slots=True)
class OptionContract:
"""Identifies a specific option contract."""
contract_id: str
underlying: str
expiration: Any # pd.Timestamp or str
option_type: OptionType
strike: float
# Re-use namedtuple for backward compatibility
StockAllocation = namedtuple("StockAllocation", "symbol percentage")
# Backward-compatible aliases
Stock = StockAllocation
Type = OptionType
================================================
FILE: options_portfolio_backtester/data/__init__.py
================================================
"""Data module — schema and providers."""
================================================
FILE: options_portfolio_backtester/data/providers.py
================================================
"""Data providers — ABCs, CSV implementations, and data loaders."""
from __future__ import annotations
import os
from abc import ABC, abstractmethod
from typing import Any, Union
import pandas as pd
from .schema import Schema, Filter
class TiingoData:
"""Tiingo (stocks & indeces) Data container class."""
def __init__(self, file: str, schema: Schema | None = None, **params: Any) -> None:
if schema is None:
self.schema = TiingoData.default_schema()
file_extension = os.path.splitext(file)[1]
if file_extension == '.h5':
self._data: pd.DataFrame = pd.read_hdf(file, **params)
elif file_extension == '.csv':
params['parse_dates'] = [self.schema.date.mapping]
self._data = pd.read_csv(file, **params)
columns = self._data.columns
assert all((col in columns for _key, col in self.schema))
date_col = self.schema['date']
self.start_date: pd.Timestamp = self._data[date_col].min()
self.end_date: pd.Timestamp = self._data[date_col].max()
def apply_filter(self, f: Filter) -> pd.DataFrame:
"""Apply Filter `f` to the data. Returns a `pd.DataFrame` with the filtered rows."""
return self._data.query(f.query)
def iter_dates(self) -> pd.core.groupby.DataFrameGroupBy:
"""Returns `pd.DataFrameGroupBy` that groups stocks by date"""
return self._data.groupby(self.schema['date'])
def iter_months(self) -> pd.core.groupby.DataFrameGroupBy:
"""Returns `pd.DataFrameGroupBy` that groups stocks by month"""
date_col = self.schema['date']
first_date_per_month = (
self._data.groupby(self._data[date_col].dt.to_period('M'))[date_col]
.min()
)
mask = self._data[date_col].isin(first_date_per_month.values)
return self._data[mask].groupby(date_col)
def __getattr__(self, attr: str) -> Any:
"""Pass method invocation to `self._data`"""
method = getattr(self._data, attr)
if hasattr(method, '__call__'):
def df_method(*args: Any, **kwargs: Any) -> Any:
return method(*args, **kwargs)
return df_method
else:
return method
def __getitem__(self, item: Union[str, pd.Series]) -> Union[pd.DataFrame, pd.Series]:
if isinstance(item, pd.Series):
return self._data[item]
else:
key = self.schema[item]
return self._data[key]
def __setitem__(self, key: str, value: Any) -> None:
self._data[key] = value
if key not in self.schema:
self.schema.update({key: key})
def __len__(self) -> int:
return len(self._data)
def __repr__(self) -> str:
return self._data.__repr__()
@staticmethod
def default_schema() -> Schema:
"""Returns default schema for Tiingo Data"""
return Schema.stocks()
def sma(self, periods: int) -> None:
sma = self._data.groupby('symbol', as_index=False).rolling(periods)['adjClose'].mean()
sma = sma.fillna(0)
sma.index = [index[1] for index in sma.index]
self._data['sma'] = sma
self.schema.update({'sma': 'sma'})
class HistoricalOptionsData:
"""Historical Options Data container class."""
def __init__(self, file: str, schema: Schema | None = None, **params: Any) -> None:
if schema is None:
self.schema = HistoricalOptionsData.default_schema()
file_extension = os.path.splitext(file)[1]
if file_extension == '.h5':
self._data: pd.DataFrame = pd.read_hdf(file, **params)
elif file_extension == '.csv':
params['parse_dates'] = [self.schema.expiration.mapping, self.schema.date.mapping]
self._data = pd.read_csv(file, **params)
columns = self._data.columns
assert all((col in columns for _key, col in self.schema))
date_col = self.schema['date']
expiration_col = self.schema['expiration']
self._data['dte'] = (self._data[expiration_col] - self._data[date_col]).dt.days
self.schema.update({'dte': 'dte'})
self.start_date: pd.Timestamp = self._data[date_col].min()
self.end_date: pd.Timestamp = self._data[date_col].max()
def apply_filter(self, f: Filter) -> pd.DataFrame:
"""Apply Filter `f` to the data. Returns a `pd.DataFrame` with the filtered rows."""
return self._data.query(f.query)
def iter_dates(self) -> pd.core.groupby.DataFrameGroupBy:
"""Returns `pd.DataFrameGroupBy` that groups contracts by date"""
return self._data.groupby(self.schema['date'])
def iter_months(self) -> pd.core.groupby.DataFrameGroupBy:
"""Returns `pd.DataFrameGroupBy` that groups contracts by month"""
date_col = self.schema['date']
first_date_per_month = (
self._data.groupby(self._data[date_col].dt.to_period('M'))[date_col]
.min()
)
mask = self._data[date_col].isin(first_date_per_month.values)
return self._data[mask].groupby(date_col)
def __getattr__(self, attr: str) -> Any:
"""Pass method invocation to `self._data`"""
method = getattr(self._data, attr)
if hasattr(method, '__call__'):
def df_method(*args: Any, **kwargs: Any) -> Any:
return method(*args, **kwargs)
return df_method
else:
return method
def __getitem__(self, item: Union[str, pd.Series]) -> Union[pd.DataFrame, pd.Series]:
if isinstance(item, pd.Series):
return self._data[item]
else:
key = self.schema[item]
return self._data[key]
def __setitem__(self, key: str, value: Any) -> None:
self._data[key] = value
if key not in self.schema:
self.schema.update({key: key})
def __len__(self) -> int:
return len(self._data)
def __repr__(self) -> str:
return self._data.__repr__()
@staticmethod
def default_schema() -> Schema:
"""Returns default schema for Historical Options Data"""
schema = Schema.options()
schema.update({
'contract': 'optionroot',
'date': 'quotedate',
'last': 'last',
'open_interest': 'openinterest',
'impliedvol': 'impliedvol',
'delta': 'delta',
'gamma': 'gamma',
'theta': 'theta',
'vega': 'vega'
})
return schema
# ---------------------------------------------------------------------------
# Abstract base classes
# ---------------------------------------------------------------------------
class DataProvider(ABC):
"""Base interface for all data providers."""
@property
@abstractmethod
def schema(self) -> Schema:
...
@property
@abstractmethod
def data(self) -> pd.DataFrame:
...
@property
@abstractmethod
def start_date(self) -> pd.Timestamp:
...
@property
@abstractmethod
def end_date(self) -> pd.Timestamp:
...
@abstractmethod
def apply_filter(self, f: Filter) -> pd.DataFrame:
...
@abstractmethod
def iter_dates(self) -> Any:
...
@abstractmethod
def iter_months(self) -> Any:
...
class OptionsDataProvider(DataProvider):
"""Options-specific data provider interface."""
pass
class StocksDataProvider(DataProvider):
"""Stocks-specific data provider interface."""
@abstractmethod
def sma(self, periods: int) -> None:
...
# ---------------------------------------------------------------------------
# CSV implementations (wrap existing loaders)
# ---------------------------------------------------------------------------
class CsvOptionsProvider(OptionsDataProvider):
"""Load options data from CSV files using the existing HistoricalOptionsData loader."""
def __init__(self, file: str, schema: Schema | None = None, **params: Any) -> None:
self._loader = HistoricalOptionsData(file, schema=schema, **params)
@property
def schema(self) -> Schema:
return self._loader.schema
@property
def data(self) -> pd.DataFrame:
return self._loader._data
@property
def start_date(self) -> pd.Timestamp:
return self._loader.start_date
@property
def end_date(self) -> pd.Timestamp:
return self._loader.end_date
def apply_filter(self, f: Filter) -> pd.DataFrame:
return self._loader.apply_filter(f)
def iter_dates(self) -> Any:
return self._loader.iter_dates()
def iter_months(self) -> Any:
return self._loader.iter_months()
def __getitem__(self, item: Any) -> Any:
return self._loader[item]
def __setitem__(self, key: str, value: Any) -> None:
self._loader[key] = value
def __len__(self) -> int:
return len(self._loader)
@property
def _data(self) -> pd.DataFrame:
"""Access to underlying DataFrame."""
return self._loader._data
class CsvStocksProvider(StocksDataProvider):
"""Load stock data from CSV files using the existing TiingoData loader."""
def __init__(self, file: str, schema: Schema | None = None, **params: Any) -> None:
self._loader = TiingoData(file, schema=schema, **params)
@property
def schema(self) -> Schema:
return self._loader.schema
@property
def data(self) -> pd.DataFrame:
return self._loader._data
@property
def start_date(self) -> pd.Timestamp:
return self._loader.start_date
@property
def end_date(self) -> pd.Timestamp:
return self._loader.end_date
def apply_filter(self, f: Filter) -> pd.DataFrame:
return self._loader.apply_filter(f)
def iter_dates(self) -> Any:
return self._loader.iter_dates()
def iter_months(self) -> Any:
return self._loader.iter_months()
def sma(self, periods: int) -> None:
self._loader.sma(periods)
def __getitem__(self, item: Any) -> Any:
return self._loader[item]
def __setitem__(self, key: str, value: Any) -> None:
self._loader[key] = value
def __len__(self) -> int:
return len(self._loader)
@property
def _data(self) -> pd.DataFrame:
"""Access to underlying DataFrame."""
return self._loader._data
================================================
FILE: options_portfolio_backtester/data/schema.py
================================================
"""Filter DSL — Schema, Field, and Filter for building query expressions."""
from __future__ import annotations
from typing import Any, Iterator, Union
class Schema:
"""Data schema class.
Used provide uniform access to fields in the data set.
"""
stock_columns = [
"symbol", "date", "open", "close", "high", "low", "volume", "adjClose", "adjHigh", "adjLow", "adjOpen",
"adjVolume", "divCash", "splitFactor"
]
option_columns = [
"underlying", "underlying_last", "date", "contract", "type", "expiration", "strike", "bid", "ask", "volume",
"open_interest"
]
@staticmethod
def stocks() -> Schema:
"""Builder method that returns a `Schema` with default mappings for stocks"""
mappings = {key: key for key in Schema.stock_columns}
return Schema(mappings)
@staticmethod
def options() -> Schema:
"""Builder method that returns a `Schema` with default mappings for options"""
mappings = {key: key for key in Schema.option_columns}
return Schema(mappings)
def __init__(self, mappings: dict[str, str]) -> None:
assert all((key in mappings for key in Schema.stock_columns)) or all(
(key in mappings for key in Schema.option_columns))
self._mappings: dict[str, str] = mappings
def update(self, mappings: dict[str, str]) -> Schema:
"""Update schema according to given `mappings`"""
self._mappings.update(mappings)
return self
def __contains__(self, key: str) -> bool:
"""Returns True if key is in schema"""
return key in self._mappings.keys()
def __getattr__(self, key: str) -> Field:
"""Returns Field object used to build Filters"""
return Field(key, self._mappings[key])
def __setitem__(self, key: str, value: str) -> None:
self._mappings[key] = value
def __getitem__(self, key: str) -> str:
"""Returns mapping of given `key`"""
return self._mappings[key]
def __iter__(self) -> Iterator[tuple[str, str]]:
return iter(self._mappings.items())
def __repr__(self) -> str:
return "Schema({})".format([Field(k, m) for k, m in self._mappings.items()])
def __eq__(self, other: object) -> bool:
if not isinstance(other, Schema):
return NotImplemented
return self._mappings == other._mappings
class Field:
"""Encapsulates data fields to build filters used by strategies"""
__slots__ = ("name", "mapping")
def __init__(self, name: str, mapping: str) -> None:
self.name = name
self.mapping = mapping
def _create_filter(self, op: str, other: Union[Field, Any]) -> Filter:
if isinstance(other, Field):
query = Field._format_query(self.mapping, op, other.mapping)
else:
query = Field._format_query(self.mapping, op, other)
return Filter(query)
def _combine_fields(self, op: str, other: Union[Field, int, float], invert: bool = False) -> Field:
if isinstance(other, Field):
name = Field._format_query(self.name, op, other.name, invert)
mapping = Field._format_query(self.mapping, op, other.mapping, invert)
elif isinstance(other, (int, float)):
name = Field._format_query(self.name, op, other, invert)
mapping = Field._format_query(self.mapping, op, other, invert)
else:
raise TypeError
return Field(name, mapping)
@staticmethod
def _format_query(left: Any, op: str, right: Any, invert: bool = False) -> str:
if invert:
left, right = right, left
query = "{left} {op} {right}".format(left=left, op=op, right=right)
return query
def __add__(self, value: Union[Field, int, float]) -> Field:
return self._combine_fields("+", value)
def __radd__(self, value: Union[Field, int, float]) -> Field:
return self._combine_fields("+", value, invert=True)
def __sub__(self, value: Union[Field, int, float]) -> Field:
return self._combine_fields("-", value)
def __rsub__(self, value: Union[Field, int, float]) -> Field:
return self._combine_fields("-", value, invert=True)
def __mul__(self, value: Union[Field, int, float]) -> Field:
return self._combine_fields("*", value)
def __rmul__(self, value: Union[Field, int, float]) -> Field:
return self._combine_fields("*", value, invert=True)
def __truediv__(self, value: Union[Field, int, float]) -> Field:
return self._combine_fields("/", value)
def __rtruediv__(self, value: Union[Field, int, float]) -> Field:
return self._combine_fields("/", value, invert=True)
def __lt__(self, value: Union[Field, Any]) -> Filter:
return self._create_filter("<", value)
def __le__(self, value: Union[Field, Any]) -> Filter:
return self._create_filter("<=", value)
def __gt__(self, value: Union[Field, Any]) -> Filter:
return self._create_filter(">", value)
def __ge__(self, value: Union[Field, Any]) -> Filter:
return self._create_filter(">=", value)
def __eq__(self, value: Union[Field, Any]) -> Filter: # type: ignore[override]
if isinstance(value, str):
value = "'{}'".format(value)
return self._create_filter("==", value)
def __ne__(self, value: Union[Field, Any]) -> Filter: # type: ignore[override]
return self._create_filter("!=", value)
def __repr__(self) -> str:
return "Field(name='{}', mapping='{}')".format(self.name, self.mapping)
class Filter:
"""This class determines entry/exit conditions for strategies"""
__slots__ = ("query")
def __init__(self, query: str) -> None:
self.query = query
def __and__(self, other: Filter) -> Filter:
"""Returns logical *and* between `self` and `other`"""
assert isinstance(other, Filter)
new_query = "({}) & ({})".format(self.query, other.query)
return Filter(query=new_query)
def __or__(self, other: Filter) -> Filter:
"""Returns logical *or* between `self` and `other`"""
assert isinstance(other, Filter)
new_query = "(({}) | ({}))".format(self.query, other.query)
return Filter(query=new_query)
def __invert__(self) -> Filter:
"""Negates filter"""
return Filter("!({})".format(self.query))
def __call__(self, data: 'pd.DataFrame') -> 'pd.Series':
"""Returns dataframe of filtered data"""
return data.eval(self.query)
def __repr__(self) -> str:
return "Filter(query='{}')".format(self.query)
__all__ = ["Schema", "Field", "Filter"]
================================================
FILE: options_portfolio_backtester/engine/__init__.py
================================================
================================================
FILE: options_portfolio_backtester/engine/algo_adapters.py
================================================
"""Algo adapter layer to drive BacktestEngine with bt-style pipeline blocks."""
from __future__ import annotations
import math
from dataclasses import dataclass, field
from typing import Literal, Protocol
import pandas as pd
from options_portfolio_backtester.core.types import Greeks
StepStatus = Literal["continue", "skip_day", "stop"]
@dataclass(frozen=True)
class EngineStepDecision:
"""Decision emitted by one engine-algo step."""
status: StepStatus = "continue"
message: str = ""
@dataclass
class EnginePipelineContext:
"""Mutable run context shared by all engine algo steps for one rebalance date."""
date: pd.Timestamp
stocks: pd.DataFrame
options: pd.DataFrame
total_capital: float
current_cash: float
current_greeks: Greeks
options_allocation: float
entry_filters: list = field(default_factory=list)
exit_threshold_override: tuple[float, float] | None = None
class EngineAlgo(Protocol):
def __call__(self, ctx: EnginePipelineContext) -> EngineStepDecision:
...
class EngineRunMonthly:
"""Allow rebalances only on first rebalance day per month."""
def __init__(self) -> None:
self._last_month: tuple[int, int] | None = None
def reset(self) -> None:
self._last_month = None
def __call__(self, ctx: EnginePipelineContext) -> EngineStepDecision:
key = (ctx.date.year, ctx.date.month)
if self._last_month == key:
return EngineStepDecision(status="skip_day", message="not month-start")
self._last_month = key
return EngineStepDecision()
class BudgetPercent:
"""Set options allocation budget as percent of current total capital."""
def __init__(self, pct: float) -> None:
self.pct = float(pct)
def __call__(self, ctx: EnginePipelineContext) -> EngineStepDecision:
ctx.options_allocation = max(0.0, float(ctx.total_capital) * self.pct)
return EngineStepDecision()
class RangeFilter:
"""Keep contracts where *column* falls within [min_val, max_val].
Generic building block — use directly or via the convenience aliases
``SelectByDelta``, ``SelectByDTE``, ``IVRankFilter``.
"""
def __init__(self, column: str, min_val: float, max_val: float) -> None:
self.column = column
self.min_val = float(min_val)
self.max_val = float(max_val)
def __call__(self, ctx: EnginePipelineContext) -> EngineStepDecision:
lo, hi, col = self.min_val, self.max_val, self.column
def _flt(df: pd.DataFrame) -> pd.Series:
if col not in df.columns:
return pd.Series(True, index=df.index)
v = df[col]
return (v >= lo) & (v <= hi)
ctx.entry_filters.append(_flt)
return EngineStepDecision()
def SelectByDelta(min_delta: float = -1.0, max_delta: float = 1.0, column: str = "delta") -> RangeFilter:
"""Keep contracts with delta within [min_delta, max_delta]."""
return RangeFilter(column=column, min_val=min_delta, max_val=max_delta)
def SelectByDTE(min_dte: int = 0, max_dte: int = 10_000, column: str = "dte") -> RangeFilter:
"""Keep contracts with DTE within [min_dte, max_dte]."""
return RangeFilter(column=column, min_val=float(min_dte), max_val=float(max_dte))
def IVRankFilter(min_rank: float = 0.0, max_rank: float = 1.0, column: str = "iv_rank") -> RangeFilter:
"""Keep contracts with IV rank within [min_rank, max_rank]."""
return RangeFilter(column=column, min_val=min_rank, max_val=max_rank)
class MaxGreekExposure:
"""Skip new entries when current absolute greek exposure exceeds limits."""
def __init__(
self,
max_abs_delta: float | None = None,
max_abs_vega: float | None = None,
) -> None:
self.max_abs_delta = float(max_abs_delta) if max_abs_delta is not None else None
self.max_abs_vega = float(max_abs_vega) if max_abs_vega is not None else None
def __call__(self, ctx: EnginePipelineContext) -> EngineStepDecision:
if self.max_abs_delta is not None and abs(float(ctx.current_greeks.delta)) > self.max_abs_delta:
return EngineStepDecision(
status="skip_day",
message=f"|delta|>{self.max_abs_delta}",
)
if self.max_abs_vega is not None and abs(float(ctx.current_greeks.vega)) > self.max_abs_vega:
return EngineStepDecision(
status="skip_day",
message=f"|vega|>{self.max_abs_vega}",
)
return EngineStepDecision()
class ExitOnThreshold:
"""Override strategy exit profit/loss thresholds for this run.
At least one of *profit_pct* or *loss_pct* must be finite, otherwise the
algo is a no-op and likely a caller mistake.
"""
def __init__(self, profit_pct: float = float("inf"), loss_pct: float = float("inf")) -> None:
self.profit_pct = float(profit_pct)
self.loss_pct = float(loss_pct)
if math.isinf(self.profit_pct) and math.isinf(self.loss_pct):
import warnings
warnings.warn(
"ExitOnThreshold created with both thresholds infinite — "
"exit overrides will have no effect",
stacklevel=2,
)
def __call__(self, ctx: EnginePipelineContext) -> EngineStepDecision:
ctx.exit_threshold_override = (self.profit_pct, self.loss_pct)
return EngineStepDecision()
================================================
FILE: options_portfolio_backtester/engine/clock.py
================================================
"""Trading clock — date iteration and rebalance scheduling."""
from __future__ import annotations
from typing import Generator
import pandas as pd
class TradingClock:
"""Generates (date, stocks_df, options_df) tuples for the backtest loop.
Handles daily/monthly iteration and rebalance scheduling.
"""
def __init__(
self,
stocks_data: pd.DataFrame,
options_data: pd.DataFrame,
stocks_date_col: str = "date",
options_date_col: str = "quotedate",
monthly: bool = False,
) -> None:
self.stocks_data = stocks_data
self.options_data = options_data
self.stocks_date_col = stocks_date_col
self.options_date_col = options_date_col
self.monthly = monthly
def iter_dates(self) -> Generator[tuple[pd.Timestamp, pd.DataFrame, pd.DataFrame], None, None]:
"""Iterate over trading dates, yielding (date, stocks, options) per step."""
if self.monthly:
stocks_iter = self._monthly_iter(self.stocks_data, self.stocks_date_col)
options_iter = self._monthly_iter(self.options_data, self.options_date_col)
else:
stocks_iter = self.stocks_data.groupby(self.stocks_date_col)
options_iter = self.options_data.groupby(self.options_date_col)
for (date, stocks), (_, options) in zip(stocks_iter, options_iter):
yield date, stocks, options
def rebalance_dates(self, freq: int) -> pd.DatetimeIndex:
"""Compute rebalance dates using business-month-start frequency.
Args:
freq: Number of business months between rebalances.
Returns:
DatetimeIndex of rebalance dates present in the data.
"""
if freq <= 0:
return pd.DatetimeIndex([])
dates = pd.DataFrame(
self.options_data[[self.options_date_col, "volume"]]
).drop_duplicates(self.options_date_col).set_index(self.options_date_col)
return pd.to_datetime(
dates.groupby(pd.Grouper(freq=f"{freq}BMS"))
.apply(lambda x: x.index.min())
.values
)
@staticmethod
def _monthly_iter(data: pd.DataFrame, date_col: str):
first_date_per_month = (
data.groupby(data[date_col].dt.to_period('M'))[date_col]
.min()
)
mask = data[date_col].isin(first_date_per_month.values)
return data[mask].groupby(date_col)
@property
def all_dates(self) -> pd.DatetimeIndex:
return pd.DatetimeIndex(self.options_data[self.options_date_col].unique())
================================================
FILE: options_portfolio_backtester/engine/engine.py
================================================
"""BacktestEngine — thin orchestrator composing all framework components.
Replaces the monolithic Backtest class with a clean composition of:
- Data providers (stocks, options)
- Strategy (legs, filters, thresholds)
- Execution (cost model, fill model, sizer, signal selector)
- Portfolio (positions, cash, holdings)
- Risk management (constraints)
- Analytics (trade log, balance sheet)
"""
from __future__ import annotations
import hashlib
import json
import logging
import subprocess
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
import numpy as np
import pandas as pd
from options_portfolio_backtester.core.types import (
Direction, OptionType, Order, Signal, Greeks, Stock, StockAllocation,
get_order,
)
from options_portfolio_backtester.execution.cost_model import TransactionCostModel, NoCosts
from options_portfolio_backtester.execution.fill_model import FillModel, MarketAtBidAsk
from options_portfolio_backtester.execution.sizer import PositionSizer, CapitalBased
from options_portfolio_backtester.execution.signal_selector import SignalSelector, FirstMatch
from options_portfolio_backtester.portfolio.risk import RiskManager
from options_portfolio_backtester.portfolio.portfolio import Portfolio
from options_portfolio_backtester import _ob_rust
from options_portfolio_backtester.engine.algo_adapters import (
EngineAlgo,
EnginePipelineContext,
)
from options_portfolio_backtester.data.providers import HistoricalOptionsData, TiingoData
from options_portfolio_backtester.data.schema import Schema
from options_portfolio_backtester.strategy.strategy import Strategy
from options_portfolio_backtester.strategy.strategy_leg import StrategyLeg
logger = logging.getLogger(__name__)
def _intrinsic_value(option_type: str, strike: float, underlying_price: float) -> float:
"""Compute intrinsic value of an option given spot price.
For puts: max(0, strike - spot)
For calls: max(0, spot - strike)
"""
if option_type == OptionType.CALL.value:
return max(0.0, underlying_price - strike)
return max(0.0, strike - underlying_price)
@dataclass
class _StrategySlot:
"""Configuration and runtime state for one strategy within a multi-strategy engine."""
strategy: Strategy
weight: float
rebalance_freq: int
rebalance_unit: str = 'BMS'
check_exits_daily: bool = False
name: str = ""
inventory: pd.DataFrame = field(default=None, repr=False)
rebalance_dates: pd.DatetimeIndex = field(default=None, repr=False)
class BacktestEngine:
"""Orchestrates backtest with pluggable execution components.
Composes data providers, strategy legs, cost/fill/sizer/selector models,
and risk constraints into a single backtest loop. Dispatches to Rust
for all supported configurations.
"""
def __init__(
self,
allocation: dict[str, float],
initial_capital: int = 1_000_000,
shares_per_contract: int = 100,
cost_model: TransactionCostModel | None = None,
fill_model: FillModel | None = None,
sizer: PositionSizer | None = None,
signal_selector: SignalSelector | None = None,
risk_manager: RiskManager | None = None,
algos: list[EngineAlgo] | None = None,
stop_if_broke: bool = False,
max_notional_pct: float | None = None,
) -> None:
assets = ("stocks", "options", "cash")
self._raw_allocation = {a: allocation.get(a, 0.0) for a in assets}
total_allocation = sum(self._raw_allocation.values())
self.allocation: dict[str, float] = {}
for asset in assets:
self.allocation[asset] = self._raw_allocation[asset] / total_allocation
self.initial_capital = initial_capital
self.shares_per_contract = shares_per_contract
self.cost_model = cost_model or NoCosts()
self.fill_model = fill_model or MarketAtBidAsk()
self.sizer = sizer or CapitalBased()
self.signal_selector = signal_selector or FirstMatch()
self.risk_manager = risk_manager or RiskManager()
self.algos = list(algos or [])
self.stop_if_broke = stop_if_broke
self.max_notional_pct = max_notional_pct
self.options_budget_pct: float | None = None
self.options_budget_annual_pct: float | None = None
self._stocks: list[Stock] = []
self._options_strategy: Strategy | None = None
self._stocks_data: TiingoData | None = None
self._options_data: HistoricalOptionsData | None = None
self.run_metadata: dict[str, Any] = {}
self._event_log_rows: list[dict[str, Any]] = []
# -- Properties (same API as original Backtest) --
@property
def stocks(self) -> list[Stock]:
return self._stocks
@stocks.setter
def stocks(self, stocks: list[Stock]) -> None:
assert np.isclose(sum(s.percentage for s in stocks), 1.0, atol=1e-6)
self._stocks = list(stocks)
@property
def options_strategy(self) -> Strategy | None:
return self._options_strategy
@options_strategy.setter
def options_strategy(self, strat: Strategy) -> None:
self._options_strategy = strat
@property
def stocks_data(self) -> TiingoData | None:
return self._stocks_data
@stocks_data.setter
def stocks_data(self, data: TiingoData) -> None:
self._stocks_schema = data.schema
self._stocks_data = data
@property
def options_data(self) -> HistoricalOptionsData | None:
return self._options_data
@options_data.setter
def options_data(self, data: HistoricalOptionsData) -> None:
self._options_schema = data.schema
self._options_data = data
# -- Multi-strategy API --
def add_strategy(
self,
strategy: Strategy,
weight: float,
rebalance_freq: int,
rebalance_unit: str = 'BMS',
check_exits_daily: bool = False,
name: str | None = None,
) -> None:
"""Register a strategy slot for multi-strategy mode.
Args:
strategy: The Strategy object (legs + exit thresholds).
weight: Fraction of options allocation for this strategy.
rebalance_freq: Rebalance every N periods.
rebalance_unit: Pandas offset alias (default 'BMS').
check_exits_daily: Check exits on non-rebalance days.
name: Human-readable name (auto-generated if omitted).
"""
if not hasattr(self, '_strategy_slots'):
self._strategy_slots: list[_StrategySlot] = []
slot_name = name or f"strategy_{len(self._strategy_slots)}"
self._strategy_slots.append(_StrategySlot(
strategy=strategy,
weight=weight,
rebalance_freq=rebalance_freq,
rebalance_unit=rebalance_unit,
check_exits_daily=check_exits_daily,
name=slot_name,
))
@property
def _is_multi_strategy(self) -> bool:
return hasattr(self, '_strategy_slots') and len(self._strategy_slots) > 0
# -- Main entry point --
def run(self, rebalance_freq: int = 0, monthly: bool = False,
sma_days: int | None = None,
rebalance_unit: str = 'BMS',
check_exits_daily: bool = False) -> pd.DataFrame:
"""Run the backtest. Returns the trade log DataFrame.
Args:
check_exits_daily: When True, evaluate exit filters on every trading
day (not just rebalancing days). Positions that match the exit
filter are closed and cash is updated, but no new entries or
stock reallocation occurs outside rebalancing days.
"""
self._event_log_rows = []
for algo in self.algos:
if hasattr(algo, "reset"):
algo.reset()
assert self._stocks_data, "Stock data not set"
assert all(
stock.symbol in self._stocks_data["symbol"].values
for stock in self._stocks
), "Ensure all stocks in portfolio are present in the data"
assert self._options_data, "Options data not set"
# Multi-strategy mode
if self._is_multi_strategy:
total_weight = sum(s.weight for s in self._strategy_slots)
assert abs(total_weight - 1.0) < 1e-6, (
f"Strategy weights must sum to 1.0, got {total_weight}"
)
for slot in self._strategy_slots:
assert self._options_data.schema == slot.strategy.schema
return self._run_rust_multi(
monthly=monthly, sma_days=sma_days,
check_exits_daily=check_exits_daily,
)
assert self._options_strategy, "Options Strategy not set"
assert self._options_data.schema == self._options_strategy.schema
option_dates = self._options_data["date"].unique()
stock_dates = self.stocks_data["date"].unique()
assert np.array_equal(stock_dates, option_dates)
# Translate algos to Rust-compatible config fields before dispatch.
if self.algos:
self._translate_algos_to_config()
return self._run_rust(
rebalance_freq,
monthly=monthly,
sma_days=sma_days,
rebalance_unit=rebalance_unit,
check_exits_daily=check_exits_daily,
)
def events_dataframe(self) -> pd.DataFrame:
"""Structured execution event log for debugging and audit.
The ``data`` dict from each event is flattened into top-level columns
so that the result can be filtered directly (e.g.
``df[df["cash"] > 0]``).
"""
if not self._event_log_rows:
return pd.DataFrame(columns=["date", "event", "status"])
flat = []
for row in self._event_log_rows:
entry = {"date": row["date"], "event": row["event"], "status": row["status"]}
entry.update(row.get("data", {}))
flat.append(entry)
return pd.DataFrame(flat)
def _translate_algos_to_config(self) -> None:
"""Translate algo pipeline into Rust-compatible engine config fields.
Each algo type maps to an existing Rust feature:
- EngineRunMonthly → rebalance_unit='BMS' + rebalance_freq=1 (already handled)
- BudgetPercent → options_budget_pct
- RangeFilter/SelectByDelta/SelectByDTE/IVRankFilter → entry filter conjunction
- MaxGreekExposure → risk_constraints (MaxDelta/MaxVega)
- ExitOnThreshold → profit_pct/loss_pct on strategy
After translation, self.algos is cleared so the Rust gate passes.
"""
from options_portfolio_backtester.engine.algo_adapters import (
EngineRunMonthly, BudgetPercent, RangeFilter,
MaxGreekExposure, ExitOnThreshold,
)
from options_portfolio_backtester.portfolio.risk import RiskManager
for algo in self.algos:
if isinstance(algo, EngineRunMonthly):
# Already handled by rebalance_unit='BMS' + rebalance_freq=1.
# If user set algos=[EngineRunMonthly()], it's a no-op for Rust.
pass
elif isinstance(algo, BudgetPercent):
self.options_budget_pct = algo.pct
elif isinstance(algo, RangeFilter):
# Append range condition to each leg's entry filter as conjunction.
col, lo, hi = algo.column, algo.min_val, algo.max_val
clause = f"({col} >= {lo}) & ({col} <= {hi})"
for leg in self._options_strategy.legs:
existing = leg.entry_filter.query
if existing:
leg.entry_filter.query = f"({existing}) & ({clause})"
else:
leg.entry_filter.query = clause
elif isinstance(algo, MaxGreekExposure):
if algo.max_abs_delta is not None:
self.risk_manager.add_constraint(
type("MaxDelta", (), {
"to_rust_config": lambda self_: {"type": "MaxDelta", "limit": algo.max_abs_delta},
"is_allowed": lambda self_, cg, pg, pv, pk: (
abs(cg.delta + pg.delta) <= algo.max_abs_delta, ""
),
})()
)
if algo.max_abs_vega is not None:
self.risk_manager.add_constraint(
type("MaxVega", (), {
"to_rust_config": lambda self_: {"type": "MaxVega", "limit": algo.max_abs_vega},
"is_allowed": lambda self_, cg, pg, pv, pk: (
abs(cg.vega + pg.vega) <= algo.max_abs_vega, ""
),
})()
)
elif isinstance(algo, ExitOnThreshold):
import math
if not math.isinf(algo.profit_pct):
self._options_strategy.add_exit_thresholds(
profit_pct=algo.profit_pct,
loss_pct=self._options_strategy.exit_thresholds[1],
)
if not math.isinf(algo.loss_pct):
self._options_strategy.add_exit_thresholds(
profit_pct=self._options_strategy.exit_thresholds[0],
loss_pct=algo.loss_pct,
)
else:
raise ValueError(
f"Unsupported algo type for Rust dispatch: {type(algo).__name__}. "
f"All execution runs through Rust; translate to config fields."
)
self.algos.clear()
def _run_rust(
self,
rebalance_freq: int,
monthly: bool,
sma_days: int | None,
rebalance_unit: str = 'BMS',
check_exits_daily: bool = False,
) -> pd.DataFrame:
"""Run the backtest using the Rust full-loop implementation."""
import math
import pyarrow as pa
import polars as pl
strategy = self._options_strategy
# Compute rebalance dates for the Rust backtest loop.
dates_df = (
pd.DataFrame(self.options_data._data[["quotedate", "volume"]])
.drop_duplicates("quotedate")
.set_index("quotedate")
)
if rebalance_freq:
rebalancing_days = pd.to_datetime(
dates_df.groupby(pd.Grouper(freq=f"{rebalance_freq}{rebalance_unit}"))
.apply(lambda x: x.index.min())
.values
)
# Pass rebalance dates as i64 nanoseconds (matching Polars Datetime(ns))
rb_date_ns = [int(d.value) for d in rebalancing_days if not pd.isna(d)]
else:
rb_date_ns = []
opts_date_col = self._options_schema["date"]
stocks_date_col = self._stocks_schema["date"]
exp_col = self._options_schema["expiration"]
# Drop columns Rust never accesses to reduce Arrow conversion cost.
_drop_cols = {"underlying_last", "last", "optionalias", "impliedvol"}
# Also drop openinterest unless MaxOpenInterest selector is in use
if not (hasattr(self.signal_selector, '__class__')
and self.signal_selector.__class__.__name__ == 'MaxOpenInterest'):
_drop_cols.add("openinterest")
opts_df = self._options_data._data
to_drop = [c for c in _drop_cols if c in opts_df.columns]
opts_src = opts_df.drop(columns=to_drop) if to_drop else opts_df
# Convert pandas → PyArrow → Polars (avoids intermediate copies).
opts_pl = pl.from_arrow(pa.Table.from_pandas(opts_src, preserve_index=False))
stocks_pl = pl.from_arrow(
pa.Table.from_pandas(self._stocks_data._data, preserve_index=False)
)
leg_configs = []
for leg in strategy.legs:
lc = {
"name": leg.name,
"entry_filter": leg.entry_filter.query,
"exit_filter": leg.exit_filter.query,
"direction": leg.direction.price_column,
"type": leg.type.value,
"entry_sort_col": leg.entry_sort[0] if leg.entry_sort else None,
"entry_sort_asc": leg.entry_sort[1] if leg.entry_sort else True,
}
# Per-leg overrides
leg_sel = getattr(leg, 'signal_selector', None)
if leg_sel is not None and hasattr(leg_sel, 'to_rust_config'):
lc["signal_selector"] = leg_sel.to_rust_config()
leg_fill = getattr(leg, 'fill_model', None)
if leg_fill is not None and hasattr(leg_fill, 'to_rust_config'):
lc["fill_model"] = leg_fill.to_rust_config()
leg_configs.append(lc)
config = {
"allocation": self.allocation,
"initial_capital": float(self.initial_capital),
"shares_per_contract": self.shares_per_contract,
"rebalance_dates": rb_date_ns,
"legs": leg_configs,
"profit_pct": (
strategy.exit_thresholds[0]
if strategy.exit_thresholds[0] != math.inf else None
),
"loss_pct": (
strategy.exit_thresholds[1]
if strategy.exit_thresholds[1] != math.inf else None
),
"stocks": [(s.symbol, s.percentage) for s in self._stocks],
"cost_model": self.cost_model.to_rust_config(),
"fill_model": self.fill_model.to_rust_config(),
"signal_selector": self.signal_selector.to_rust_config(),
"risk_constraints": [c.to_rust_config() for c in self.risk_manager.constraints],
"sma_days": sma_days,
"options_budget_pct": self.options_budget_pct,
"options_budget_annual_pct": self.options_budget_annual_pct,
"stop_if_broke": self.stop_if_broke,
"max_notional_pct": self.max_notional_pct,
"check_exits_daily": check_exits_daily,
}
schema_mapping = {
"contract": self._options_schema["contract"],
"date": opts_date_col,
"stocks_date": stocks_date_col,
"stocks_symbol": self._stocks_schema["symbol"],
"stocks_price": self._stocks_schema["adjClose"],
"underlying": self._options_schema["underlying"],
"expiration": self._options_schema["expiration"],
"type": self._options_schema["type"],
"strike": self._options_schema["strike"],
}
balance_pl, trade_log_pl, stats = _ob_rust.run_backtest_py(
opts_pl, stocks_pl, config, schema_mapping,
)
# Convert trade log from flat columns to MultiIndex
trade_log_pd = trade_log_pl.to_pandas()
self.trade_log = self._flat_trade_log_to_multiindex(trade_log_pd)
# Convert balance
self.balance = balance_pl.to_pandas()
if "date" in self.balance.columns:
self.balance["date"] = pd.to_datetime(self.balance["date"])
self.balance.set_index("date", inplace=True)
# Add initial balance row (day before first rebalance) — matches Python
initial_date = self.stocks_data.start_date - pd.Timedelta(1, unit="day")
initial_row = pd.DataFrame(
{"total capital": self.initial_capital, "cash": float(self.initial_capital)},
index=[initial_date],
)
self.balance = pd.concat([initial_row, self.balance], sort=False)
for col_name in self.balance.columns:
self.balance[col_name] = pd.to_numeric(self.balance[col_name], errors="coerce")
# Ensure per-stock columns exist (match Python's balance format)
for stock in self._stocks:
sym = stock.symbol
if sym not in self.balance.columns:
self.balance[sym] = 0.0
if f"{sym} qty" not in self.balance.columns:
self.balance[f"{sym} qty"] = 0.0
for col_name in ["options qty", "stocks qty", "calls capital", "puts capital"]:
if col_name not in self.balance.columns:
self.balance[col_name] = 0.0
# Add derived columns matching Python output
self.balance["options capital"] = (
self.balance["calls capital"] + self.balance["puts capital"]
).fillna(0)
stock_cols = [s.symbol for s in self._stocks]
self.balance["stocks capital"] = sum(
self.balance.get(c, 0) for c in stock_cols
)
first_idx = self.balance.index[0]
self.balance.loc[first_idx, "stocks capital"] = 0
self.balance.loc[first_idx, "options capital"] = 0
self.balance["total capital"] = (
self.balance["cash"]
+ self.balance["stocks capital"]
+ self.balance["options capital"]
)
self.balance["% change"] = self.balance["total capital"].pct_change()
self.balance["accumulated return"] = (1.0 + self.balance["% change"]).cumprod()
# Set current_cash to match Python loop's final state after rebalancing
# (after the loop, all capital is allocated to stocks/options/cash per allocation)
final_total = self.balance["total capital"].iloc[-1]
self.current_cash = self.allocation["cash"] * final_total
self._initialize_inventories()
self._portfolio = Portfolio(initial_cash=self.current_cash)
self._attach_run_metadata(
rebalance_freq=rebalance_freq,
monthly=monthly,
sma_days=sma_days,
)
return self.trade_log
def _run_rust_multi(
self,
monthly: bool = False,
sma_days: int | None = None,
check_exits_daily: bool = False,
) -> pd.DataFrame:
"""Run multi-strategy backtest using Rust backend."""
import math
import pyarrow as pa
import polars as pl
opts_date_col = self._options_schema["date"]
stocks_date_col = self._stocks_schema["date"]
# Drop unused columns for Arrow conversion speed
_drop_cols = {"underlying_last", "last", "optionalias", "impliedvol"}
opts_df = self._options_data._data
to_drop = [c for c in _drop_cols if c in opts_df.columns]
opts_src = opts_df.drop(columns=to_drop) if to_drop else opts_df
opts_pl = pl.from_arrow(pa.Table.from_pandas(opts_src, preserve_index=False))
stocks_pl = pl.from_arrow(
pa.Table.from_pandas(self._stocks_data._data, preserve_index=False)
)
# Compute per-slot rebalance dates
dates_df = (
pd.DataFrame(self.options_data._data[["quotedate", "volume"]])
.drop_duplicates("quotedate")
.set_index("quotedate")
)
slot_configs = []
for slot in self._strategy_slots:
if slot.rebalance_freq:
rb_dates = pd.to_datetime(
dates_df.groupby(
pd.Grouper(freq=f"{slot.rebalance_freq}{slot.rebalance_unit}")
).apply(lambda x: x.index.min()).values
)
rb_date_ns = [int(d.value) for d in rb_dates if not pd.isna(d)]
else:
rb_date_ns = []
leg_configs = []
for leg in slot.strategy.legs:
lc = {
"name": leg.name,
"entry_filter": leg.entry_filter.query,
"exit_filter": leg.exit_filter.query,
"direction": leg.direction.price_column,
"type": leg.type.value,
"entry_sort_col": leg.entry_sort[0] if leg.entry_sort else None,
"entry_sort_asc": leg.entry_sort[1] if leg.entry_sort else True,
}
leg_sel = getattr(leg, 'signal_selector', None)
if leg_sel is not None and hasattr(leg_sel, 'to_rust_config'):
lc["signal_selector"] = leg_sel.to_rust_config()
leg_fill = getattr(leg, 'fill_model', None)
if leg_fill is not None and hasattr(leg_fill, 'to_rust_config'):
lc["fill_model"] = leg_fill.to_rust_config()
leg_configs.append(lc)
slot_configs.append({
"name": slot.name,
"legs": leg_configs,
"weight": slot.weight,
"rebalance_dates": rb_date_ns,
"profit_pct": (
slot.strategy.exit_thresholds[0]
if slot.strategy.exit_thresholds[0] != math.inf else None
),
"loss_pct": (
slot.strategy.exit_thresholds[1]
if slot.strategy.exit_thresholds[1] != math.inf else None
),
"check_exits_daily": slot.check_exits_daily,
})
config = {
"allocation": self.allocation,
"initial_capital": float(self.initial_capital),
"shares_per_contract": self.shares_per_contract,
"rebalance_dates": [], # Not used for multi-strategy; per-slot instead
"legs": [], # Not used for multi-strategy; per-slot instead
"stocks": [(s.symbol, s.percentage) for s in self._stocks],
"cost_model": self.cost_model.to_rust_config(),
"fill_model": self.fill_model.to_rust_config(),
"signal_selector": self.signal_selector.to_rust_config(),
"risk_constraints": [c.to_rust_config() for c in self.risk_manager.constraints],
"sma_days": sma_days,
"options_budget_pct": self.options_budget_pct,
"options_budget_annual_pct": self.options_budget_annual_pct,
"stop_if_broke": self.stop_if_broke,
"max_notional_pct": self.max_notional_pct,
"check_exits_daily": check_exits_daily,
}
schema_mapping = {
"contract": self._options_schema["contract"],
"date": opts_date_col,
"stocks_date": stocks_date_col,
"stocks_symbol": self._stocks_schema["symbol"],
"stocks_price": self._stocks_schema["adjClose"],
"underlying": self._options_schema["underlying"],
"expiration": self._options_schema["expiration"],
"type": self._options_schema["type"],
"strike": self._options_schema["strike"],
}
balance_pl, trade_log_pl, stats = _ob_rust.run_multi_strategy_py(
opts_pl, stocks_pl, config, schema_mapping, slot_configs,
)
# Convert trade log
trade_log_pd = trade_log_pl.to_pandas()
self.trade_log = self._flat_trade_log_to_multiindex(trade_log_pd)
# Convert balance
self.balance = balance_pl.to_pandas()
if "date" in self.balance.columns:
self.balance["date"] = pd.to_datetime(self.balance["date"])
self.balance.set_index("date", inplace=True)
# Add initial balance row
initial_date = self.stocks_data.start_date - pd.Timedelta(1, unit="day")
initial_row = pd.DataFrame(
{"total capital": self.initial_capital, "cash": float(self.initial_capital)},
index=[initial_date],
)
self.balance = pd.concat([initial_row, self.balance], sort=False)
for col_name in self.balance.columns:
self.balance[col_name] = pd.to_numeric(self.balance[col_name], errors="coerce")
# Ensure per-stock columns exist
for stock in self._stocks:
sym = stock.symbol
if sym not in self.balance.columns:
self.balance[sym] = 0.0
if f"{sym} qty" not in self.balance.columns:
self.balance[f"{sym} qty"] = 0.0
for col_name in ["options qty", "stocks qty", "calls capital", "puts capital"]:
if col_name not in self.balance.columns:
self.balance[col_name] = 0.0
# Add derived columns
self.balance["options capital"] = (
self.balance["calls capital"] + self.balance["puts capital"]
).fillna(0)
stock_cols = [s.symbol for s in self._stocks]
self.balance["stocks capital"] = sum(
self.balance.get(c, 0) for c in stock_cols
)
first_idx = self.balance.index[0]
self.balance.loc[first_idx, "stocks capital"] = 0
self.balance.loc[first_idx, "options capital"] = 0
self.balance["total capital"] = (
self.balance["cash"]
+ self.balance["stocks capital"]
+ self.balance["options capital"]
)
self.balance["% change"] = self.balance["total capital"].pct_change()
self.balance["accumulated return"] = (1.0 + self.balance["% change"]).cumprod()
final_total = self.balance["total capital"].iloc[-1]
self.current_cash = self.allocation["cash"] * final_total
self._attach_run_metadata(
rebalance_freq=0,
monthly=monthly,
sma_days=sma_days,
)
return self.trade_log
def _attach_run_metadata(
self,
rebalance_freq: int,
monthly: bool,
sma_days: int | None,
) -> None:
metadata = self._build_run_metadata(
rebalance_freq=rebalance_freq,
monthly=monthly,
sma_days=sma_days,
)
self.run_metadata = metadata
self.balance.attrs["run_metadata"] = metadata
self.trade_log.attrs["run_metadata"] = metadata
def _build_run_metadata(
self,
rebalance_freq: int,
monthly: bool,
sma_days: int | None,
) -> dict[str, Any]:
stocks = [
{"symbol": stock.symbol, "percentage": float(stock.percentage)}
for stock in self._stocks
]
run_config = {
"allocation": {k: float(v) for k, v in self.allocation.items()},
"initial_capital": float(self.initial_capital),
"shares_per_contract": int(self.shares_per_contract),
"rebalance_freq": int(rebalance_freq),
"monthly": bool(monthly),
"sma_days": int(sma_days) if sma_days is not None else None,
"stocks": stocks,
}
data_snapshot = self._data_snapshot()
return {
"framework": "options_portfolio_backtester.engine.BacktestEngine",
"git_sha": self._git_sha(),
"run_at_utc": datetime.now(timezone.utc).isoformat(),
"config_hash": self._sha256_json(run_config),
"data_snapshot_hash": self._sha256_json(data_snapshot),
"data_snapshot": data_snapshot,
}
def _data_snapshot(self) -> dict[str, Any]:
options_dates = self._options_data["date"]
stocks_dates = self._stocks_data["date"]
return {
"options_rows": int(len(self._options_data._data)),
"stocks_rows": int(len(self._stocks_data._data)),
"options_date_start": pd.Timestamp(options_dates.min()).isoformat(),
"options_date_end": pd.Timestamp(options_dates.max()).isoformat(),
"stocks_date_start": pd.Timestamp(stocks_dates.min()).isoformat(),
"stocks_date_end": pd.Timestamp(stocks_dates.max()).isoformat(),
"options_columns": list(self._options_data._data.columns),
"stocks_columns": list(self._stocks_data._data.columns),
}
@staticmethod
def _sha256_json(payload: dict[str, Any]) -> str:
blob = json.dumps(payload, sort_keys=True, separators=(",", ":"), default=str)
return hashlib.sha256(blob.encode("utf-8")).hexdigest()
@staticmethod
def _git_sha() -> str:
repo_root = Path(__file__).resolve().parents[2]
try:
proc = subprocess.run(
["git", "rev-parse", "HEAD"],
cwd=repo_root,
check=True,
capture_output=True,
text=True,
)
return proc.stdout.strip()
except Exception:
return "unknown"
def _flat_trade_log_to_multiindex(self, flat_df: pd.DataFrame) -> pd.DataFrame:
"""Convert flat 'leg__field' columns from Rust to MultiIndex DataFrame."""
if flat_df.empty:
return pd.DataFrame()
tuples = []
for c in flat_df.columns:
if "__" in c:
parts = c.split("__", 1)
tuples.append((parts[0], parts[1]))
else:
tuples.append(("", c))
flat_df.columns = pd.MultiIndex.from_tuples(tuples)
return flat_df
# -- Internals (same logic as original, with pluggable components) --
def _initialize_inventories(self) -> None:
columns = pd.MultiIndex.from_product(
[
[leg.name for leg in self._options_strategy.legs],
["contract", "underlying", "expiration", "type", "strike", "cost", "order"],
]
)
totals = pd.MultiIndex.from_product([["totals"], ["cost", "qty", "date"]])
self._options_inventory: pd.DataFrame = pd.DataFrame(
columns=pd.Index(columns.tolist() + totals.tolist())
)
self._stocks_inventory: pd.DataFrame = pd.DataFrame(
columns=["symbol", "price", "qty"]
)
# Portfolio dataclass — dual-write alongside legacy DataFrames
self._portfolio = Portfolio(initial_cash=0.0)
def _current_options_capital(self, options, stocks):
options_value = self._get_current_option_quotes(options)
values_by_row: Any = [0] * len(options_value[0])
if len(options_value[0]) != 0:
sym_col = self._stocks_schema["symbol"]
# Use unadjusted close for intrinsic value — strikes are raw prices
_close_col = self._stocks_schema["close"] if "close" in self._stocks_schema else None
price_col = _close_col if (_close_col and _close_col in stocks.columns) else self._stocks_schema["adjClose"]
for i, leg in enumerate(self._options_strategy.legs):
cost_series = options_value[i]["cost"].copy()
# Replace NaN (missing contracts) with intrinsic value
if cost_series.isna().any():
inv_leg = self._options_inventory[leg.name]
for idx in cost_series.index[cost_series.isna()]:
opt_type = inv_leg.at[idx, "type"]
strike = inv_leg.at[idx, "strike"]
underlying = inv_leg.at[idx, "underlying"]
spot_match = stocks.loc[stocks[sym_col] == underlying, price_col]
spot = spot_match.iloc[0] if len(spot_match) > 0 else 0.0
iv = _intrinsic_value(opt_type, float(strike), float(spot))
cash_sign = -1.0 if ~leg.direction == Direction.SELL else 1.0
cost_series.at[idx] = cash_sign * iv * self.shares_per_contract
values_by_row += cost_series.values
total: float = -sum(values_by_row * self._options_inventory["totals"]["qty"].values)
else:
total = 0
return total
def _get_current_option_quotes(self, options):
current_options_quotes: list[pd.DataFrame] = []
for leg in self._options_strategy.legs:
inventory_leg = self._options_inventory[leg.name]
leg_options = inventory_leg[["contract"]].merge(
options, how="left",
left_on="contract", right_on=leg.schema["contract"],
)
leg_options.index = self._options_inventory.index
leg_options["order"] = get_order(leg.direction, Signal.EXIT)
leg_options["cost"] = leg_options[self._options_schema[(~leg.direction).price_column]]
if ~leg.direction == Direction.SELL:
leg_options["cost"] = -leg_options["cost"]
leg_options["cost"] *= self.shares_per_contract
current_options_quotes.append(leg_options)
return current_options_quotes
def __repr__(self) -> str:
return (
f"BacktestEngine(capital={self.initial_capital}, "
f"allocation={self.allocation}, "
f"cost_model={self.cost_model.__class__.__name__})"
)
================================================
FILE: options_portfolio_backtester/engine/multi_strategy.py
================================================
"""Multi-strategy engine — run N strategies with shared capital and risk budget."""
from __future__ import annotations
from typing import Any
import pandas as pd
from options_portfolio_backtester.engine.engine import BacktestEngine
from options_portfolio_backtester.execution.cost_model import TransactionCostModel, NoCosts
from options_portfolio_backtester.portfolio.risk import RiskManager
from options_portfolio_backtester.core.types import Stock
class StrategyAllocation:
"""Configuration for one strategy within a multi-strategy engine."""
def __init__(
self,
name: str,
engine: BacktestEngine,
weight: float = 1.0,
) -> None:
self.name = name
self.engine = engine
self.weight = weight
class MultiStrategyEngine:
"""Run multiple strategies with shared capital allocation.
Each strategy gets a fraction of total capital proportional to its weight.
Results are combined into a single balance sheet.
"""
def __init__(
self,
strategies: list[StrategyAllocation],
initial_capital: int = 1_000_000,
) -> None:
self.strategies = strategies
self.initial_capital = initial_capital
total_weight = sum(s.weight for s in strategies)
self._weights = {s.name: s.weight / total_weight for s in strategies}
def run(self, rebalance_freq: int = 0, monthly: bool = False,
sma_days: int | None = None) -> dict[str, pd.DataFrame]:
"""Run all strategies and return per-strategy trade logs.
Returns:
Dict mapping strategy name to its trade log DataFrame.
"""
results: dict[str, pd.DataFrame] = {}
for sa in self.strategies:
capital_share = int(self.initial_capital * self._weights[sa.name])
# Override the engine's initial capital with its share
sa.engine.initial_capital = capital_share
trade_log = sa.engine.run(
rebalance_freq=rebalance_freq,
monthly=monthly,
sma_days=sma_days,
)
results[sa.name] = trade_log
# Build combined balance
self._build_combined_balance()
return results
def _build_combined_balance(self) -> None:
"""Combine balance sheets from all strategies."""
balances = []
for sa in self.strategies:
if hasattr(sa.engine, "balance"):
b = sa.engine.balance[["total capital", "% change"]].copy()
b.columns = [f"{sa.name}_capital", f"{sa.name}_pct_change"]
balances.append(b)
if balances:
self.balance = pd.concat(balances, axis=1)
capital_cols = [f"{sa.name}_capital" for sa in self.strategies]
self.balance["total capital"] = self.balance[capital_cols].sum(axis=1)
self.balance["% change"] = self.balance["total capital"].pct_change()
self.balance["accumulated return"] = (
1.0 + self.balance["% change"]
).cumprod()
else:
self.balance = pd.DataFrame()
================================================
FILE: options_portfolio_backtester/engine/pipeline.py
================================================
"""Composable algo pipeline for stock portfolio workflows.
Provides bt-compatible scheduling, selection, weighting, and rebalancing algos.
"""
from __future__ import annotations
import re as _re
import random as _random
from dataclasses import dataclass, field
from typing import Callable, Literal, Protocol, Sequence
import numpy as np
import pandas as pd
StepStatus = Literal["continue", "skip_day", "stop"]
@dataclass(frozen=True)
class StepDecision:
"""Outcome returned by a pipeline step."""
status: StepStatus = "continue"
message: str = ""
@dataclass
class PipelineContext:
"""Mutable state shared across pipeline steps for one date."""
date: pd.Timestamp
prices: pd.Series
total_capital: float
cash: float
positions: dict[str, float]
selected_symbols: list[str] = field(default_factory=list)
target_weights: dict[str, float] = field(default_factory=dict)
# Price history up to current date (set by AlgoPipelineBacktester).
price_history: pd.DataFrame | None = None
@dataclass(frozen=True)
class PipelineLogRow:
date: pd.Timestamp
step: str
status: StepStatus
message: str
class Algo(Protocol):
"""Protocol for a pipeline step."""
def __call__(self, ctx: PipelineContext) -> StepDecision:
...
# ---------------------------------------------------------------------------
# Scheduling algos
# ---------------------------------------------------------------------------
class RunMonthly:
"""Gate pipeline execution to month starts."""
def __init__(self) -> None:
self._last_month: tuple[int, int] | None = None
def reset(self) -> None:
self._last_month = None
def __call__(self, ctx: PipelineContext) -> StepDecision:
key = (ctx.date.year, ctx.date.month)
if self._last_month == key:
return StepDecision(status="skip_day", message="not month-start")
self._last_month = key
return StepDecision()
class RunWeekly:
"""Gate pipeline execution to week starts (Monday)."""
def __init__(self) -> None:
self._last_week: tuple[int, int] | None = None
def reset(self) -> None:
self._last_week = None
def __call__(self, ctx: PipelineContext) -> StepDecision:
key = (ctx.date.isocalendar()[0], ctx.date.isocalendar()[1])
if self._last_week == key:
return StepDecision(status="skip_day", message="not week-start")
self._last_week = key
return StepDecision()
class RunQuarterly:
"""Gate pipeline execution to quarter starts."""
def __init__(self) -> None:
self._last_quarter: tuple[int, int] | None = None
def reset(self) -> None:
self._last_quarter = None
def __call__(self, ctx: PipelineContext) -> StepDecision:
key = (ctx.date.year, (ctx.date.month - 1) // 3)
if self._last_quarter == key:
return StepDecision(status="skip_day", message="not quarter-start")
self._last_quarter = key
return StepDecision()
class RunYearly:
"""Gate pipeline execution to year starts."""
def __init__(self) -> None:
self._last_year: int | None = None
def reset(self) -> None:
self._last_year = None
def __call__(self, ctx: PipelineContext) -> StepDecision:
if self._last_year == ctx.date.year:
return StepDecision(status="skip_day", message="not year-start")
self._last_year = ctx.date.year
return StepDecision()
class RunDaily:
"""Allow pipeline execution on every date (no gating)."""
def __call__(self, ctx: PipelineContext) -> StepDecision:
return StepDecision()
class RunOnce:
"""Execute pipeline only on the first date, skip all subsequent dates."""
def __init__(self) -> None:
self._ran = False
def reset(self) -> None:
self._ran = False
def __call__(self, ctx: PipelineContext) -> StepDecision:
if self._ran:
return StepDecision(status="skip_day", message="already ran")
self._ran = True
return StepDecision()
class RunOnDate:
"""Execute pipeline only on specific dates."""
def __init__(self, dates: Sequence[str | pd.Timestamp]) -> None:
self._dates = {pd.Timestamp(d).normalize() for d in dates}
def __call__(self, ctx: PipelineContext) -> StepDecision:
if ctx.date.normalize() not in self._dates:
return StepDecision(status="skip_day", message="not a target date")
return StepDecision()
class RunAfterDate:
"""Execute pipeline only after a specific date (inclusive)."""
def __init__(self, date: str | pd.Timestamp) -> None:
self._date = pd.Timestamp(date).normalize()
def __call__(self, ctx: PipelineContext) -> StepDecision:
if ctx.date.normalize() < self._date:
return StepDecision(status="skip_day", message="before start date")
return StepDecision()
class RunEveryNPeriods:
"""Execute pipeline every N trading days."""
def __init__(self, n: int) -> None:
self._n = int(n)
self._count = 0
def reset(self) -> None:
self._count = 0
def __call__(self, ctx: PipelineContext) -> StepDecision:
self._count += 1
if self._count % self._n != 1 and self._count != 1:
return StepDecision(status="skip_day", message=f"period {self._count}, not every {self._n}")
return StepDecision()
class RunAfterDays:
"""Warmup gate: skip the first *n* trading days."""
def __init__(self, n: int) -> None:
self._n = int(n)
self._count = 0
def reset(self) -> None:
self._count = 0
def __call__(self, ctx: PipelineContext) -> StepDecision:
self._count += 1
if self._count <= self._n:
return StepDecision(status="skip_day", message=f"warmup day {self._count}/{self._n}")
return StepDecision()
class RunIfOutOfBounds:
"""Trigger rebalance when any position drifts beyond *tolerance* from target.
Typically used with ``Or``: ``Or(RunQuarterly(), RunIfOutOfBounds(0.05))``.
Requires ``target_weights`` to have been set by a prior weighting algo
on the *previous* rebalance (stored internally).
"""
def __init__(self, tolerance: float = 0.05) -> None:
self._tolerance = float(tolerance)
self._last_target: dict[str, float] = {}
def reset(self) -> None:
self._last_target = {}
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not self._last_target:
# No previous target — let downstream algos set it, then remember
return StepDecision(status="skip_day", message="no prior target weights")
total = float(ctx.total_capital)
if total <= 0:
return StepDecision(status="skip_day", message="no capital")
for sym, target_w in self._last_target.items():
qty = ctx.positions.get(sym, 0.0)
if sym in ctx.prices.index and pd.notna(ctx.prices[sym]):
actual_w = float(qty) * float(ctx.prices[sym]) / total
else:
actual_w = 0.0
if abs(actual_w - target_w) > self._tolerance:
return StepDecision() # out of bounds → allow rebalance
return StepDecision(status="skip_day", message="all weights within bounds")
def update_target(self, weights: dict[str, float]) -> None:
"""Call after a successful rebalance to remember the new target."""
self._last_target = dict(weights)
class Or:
"""Logical OR combinator: pass if any child algo passes."""
def __init__(self, *algos: Algo) -> None:
self._algos = algos
def reset(self) -> None:
for algo in self._algos:
if hasattr(algo, "reset"):
algo.reset()
def __call__(self, ctx: PipelineContext) -> StepDecision:
for algo in self._algos:
decision = algo(ctx)
if decision.status == "continue":
return StepDecision()
return StepDecision(status="skip_day", message="all sub-algos skipped")
class Not:
"""Logical NOT combinator: invert the child algo's decision."""
def __init__(self, algo: Algo) -> None:
self._algo = algo
def reset(self) -> None:
if hasattr(self._algo, "reset"):
self._algo.reset()
def __call__(self, ctx: PipelineContext) -> StepDecision:
decision = self._algo(ctx)
if decision.status == "skip_day":
return StepDecision()
return StepDecision(status="skip_day", message="inverted")
# ---------------------------------------------------------------------------
# Selection algos
# ---------------------------------------------------------------------------
class SelectThese:
"""Select a fixed list of symbols if priced on current date."""
def __init__(self, symbols: list[str]) -> None:
self.symbols = [s.upper() for s in symbols]
def __call__(self, ctx: PipelineContext) -> StepDecision:
available = [s for s in self.symbols if s in ctx.prices.index and pd.notna(ctx.prices[s])]
ctx.selected_symbols = available
if not available:
return StepDecision(status="skip_day", message="no selected symbols with valid prices")
return StepDecision()
class SelectAll:
"""Select all symbols with valid prices on current date."""
def __call__(self, ctx: PipelineContext) -> StepDecision:
available = [s for s in ctx.prices.index if pd.notna(ctx.prices[s]) and float(ctx.prices[s]) > 0]
ctx.selected_symbols = sorted(available)
if not available:
return StepDecision(status="skip_day", message="no symbols with valid prices")
return StepDecision()
class SelectHasData:
"""Select symbols that have at least *min_days* of price history."""
def __init__(self, min_days: int = 1) -> None:
self._min_days = int(min_days)
def __call__(self, ctx: PipelineContext) -> StepDecision:
if ctx.price_history is None or ctx.price_history.empty:
return StepDecision(status="skip_day", message="no price history")
keep = []
for s in ctx.selected_symbols or list(ctx.prices.index):
if s in ctx.price_history.columns:
valid = ctx.price_history[s].dropna()
if len(valid) >= self._min_days:
keep.append(s)
ctx.selected_symbols = keep
if not keep:
return StepDecision(status="skip_day", message=f"no symbols with {self._min_days}+ days")
return StepDecision()
class SelectMomentum:
"""Select top *n* symbols by trailing momentum (total return over *lookback* days)."""
def __init__(self, n: int, lookback: int = 252, sort_descending: bool = True) -> None:
self._n = int(n)
self._lookback = int(lookback)
self._sort_desc = sort_descending
def __call__(self, ctx: PipelineContext) -> StepDecision:
if ctx.price_history is None or ctx.price_history.empty:
return StepDecision(status="skip_day", message="no price history for momentum")
candidates = ctx.selected_symbols or [
s for s in ctx.prices.index if pd.notna(ctx.prices[s])
]
scores: dict[str, float] = {}
for s in candidates:
if s not in ctx.price_history.columns:
continue
series = ctx.price_history[s].dropna()
if len(series) < 2:
continue
window = series.iloc[-self._lookback:]
if len(window) < 2 or float(window.iloc[0]) <= 0:
continue
scores[s] = float(window.iloc[-1] / window.iloc[0] - 1)
ranked = sorted(scores, key=scores.get, reverse=self._sort_desc) # type: ignore[arg-type]
ctx.selected_symbols = ranked[: self._n]
if not ctx.selected_symbols:
return StepDecision(status="skip_day", message="no symbols with enough momentum data")
return StepDecision()
class SelectN:
"""Keep the first *n* symbols from current selection (stable order)."""
def __init__(self, n: int) -> None:
self._n = int(n)
def __call__(self, ctx: PipelineContext) -> StepDecision:
ctx.selected_symbols = ctx.selected_symbols[: self._n]
if not ctx.selected_symbols:
return StepDecision(status="skip_day", message="no symbols after SelectN")
return StepDecision()
class SelectRandomly:
"""Select *n* symbols at random from the current selection."""
def __init__(self, n: int, seed: int | None = None) -> None:
self._n = int(n)
self._rng = _random.Random(seed)
def __call__(self, ctx: PipelineContext) -> StepDecision:
candidates = ctx.selected_symbols or [
s for s in ctx.prices.index if pd.notna(ctx.prices[s])
]
if not candidates:
return StepDecision(status="skip_day", message="no candidates for random selection")
k = min(self._n, len(candidates))
ctx.selected_symbols = sorted(self._rng.sample(candidates, k))
return StepDecision()
class SelectActive:
"""Filter out symbols whose price is zero or NaN (dead/expired)."""
def __call__(self, ctx: PipelineContext) -> StepDecision:
candidates = ctx.selected_symbols or list(ctx.prices.index)
active = [
s for s in candidates
if s in ctx.prices.index and pd.notna(ctx.prices[s]) and float(ctx.prices[s]) > 0
]
ctx.selected_symbols = active
if not active:
return StepDecision(status="skip_day", message="no active symbols")
return StepDecision()
class SelectRegex:
"""Select symbols whose name matches a regex pattern."""
def __init__(self, pattern: str) -> None:
self._pattern = _re.compile(pattern)
def __call__(self, ctx: PipelineContext) -> StepDecision:
candidates = ctx.selected_symbols or list(ctx.prices.index)
matched = [s for s in candidates if self._pattern.search(s)]
ctx.selected_symbols = matched
if not matched:
return StepDecision(status="skip_day", message=f"no symbols match {self._pattern.pattern!r}")
return StepDecision()
class SelectWhere:
"""Select symbols where a user-defined function returns True."""
def __init__(self, fn: Callable[[str, PipelineContext], bool]) -> None:
self._fn = fn
def __call__(self, ctx: PipelineContext) -> StepDecision:
candidates = ctx.selected_symbols or [
s for s in ctx.prices.index if pd.notna(ctx.prices[s])
]
ctx.selected_symbols = [s for s in candidates if self._fn(s, ctx)]
if not ctx.selected_symbols:
return StepDecision(status="skip_day", message="no symbols passed filter")
return StepDecision()
# ---------------------------------------------------------------------------
# Weighting algos
# ---------------------------------------------------------------------------
class WeighSpecified:
"""Set fixed target weights, normalized over selected symbols."""
def __init__(self, weights: dict[str, float]) -> None:
self.weights = {k.upper(): float(v) for k, v in weights.items()}
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.selected_symbols:
return StepDecision(status="skip_day", message="no selected symbols")
raw = {s: self.weights.get(s, 0.0) for s in ctx.selected_symbols}
total = float(sum(raw.values()))
if total <= 0:
return StepDecision(status="skip_day", message="target weights sum to zero")
ctx.target_weights = {s: w / total for s, w in raw.items()}
return StepDecision()
class WeighEqually:
"""Equal-weight all selected symbols."""
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.selected_symbols:
return StepDecision(status="skip_day", message="no selected symbols")
w = 1.0 / len(ctx.selected_symbols)
ctx.target_weights = {s: w for s in ctx.selected_symbols}
return StepDecision()
class WeighRandomly:
"""Assign random weights to selected symbols (normalized to sum to 1).
Useful for constructing random benchmark strategies.
"""
def __init__(self, seed: int | None = None) -> None:
self._rng = np.random.RandomState(seed)
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.selected_symbols:
return StepDecision(status="skip_day", message="no selected symbols")
raw = self._rng.dirichlet(np.ones(len(ctx.selected_symbols)))
ctx.target_weights = {s: float(w) for s, w in zip(ctx.selected_symbols, raw)}
return StepDecision()
class WeighTarget:
"""Read target weights from a pre-computed DataFrame indexed by date.
*weights_df* should have dates as index and symbol names as columns.
On each date, looks up the closest prior row.
"""
def __init__(self, weights_df: pd.DataFrame) -> None:
self._weights = weights_df.sort_index()
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.selected_symbols:
return StepDecision(status="skip_day", message="no selected symbols")
# Find the most recent row <= current date
mask = self._weights.index <= ctx.date
if not mask.any():
return StepDecision(status="skip_day", message="no weight data for this date")
row = self._weights.loc[mask].iloc[-1]
weights = {}
for s in ctx.selected_symbols:
if s in row.index and pd.notna(row[s]):
weights[s] = float(row[s])
if not weights:
return StepDecision(status="skip_day", message="no matching weights")
total = sum(weights.values())
if total <= 0:
return StepDecision(status="skip_day", message="weights sum to zero")
ctx.target_weights = {s: w / total for s, w in weights.items()}
return StepDecision()
class WeighInvVol:
"""Inverse-volatility weighting (risk parity lite).
Weight_i = (1/vol_i) / sum(1/vol_j).
Uses trailing *lookback*-day returns standard deviation.
"""
def __init__(self, lookback: int = 252) -> None:
self._lookback = int(lookback)
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.selected_symbols:
return StepDecision(status="skip_day", message="no selected symbols")
if ctx.price_history is None or ctx.price_history.empty:
return StepDecision(status="skip_day", message="no price history for inv-vol")
inv_vols: dict[str, float] = {}
for s in ctx.selected_symbols:
if s not in ctx.price_history.columns:
continue
series = ctx.price_history[s].dropna()
window = series.iloc[-self._lookback:]
if len(window) < 3:
continue
rets = window.pct_change().dropna()
vol = float(rets.std())
if vol > 0:
inv_vols[s] = 1.0 / vol
if not inv_vols:
return StepDecision(status="skip_day", message="no valid vol data")
total = sum(inv_vols.values())
ctx.target_weights = {s: v / total for s, v in inv_vols.items()}
return StepDecision()
class WeighMeanVar:
"""Mean-variance optimization (max Sharpe ratio portfolio).
Uses trailing *lookback*-day returns. Falls back to equal weight
if optimization fails (singular covariance, etc.).
"""
def __init__(self, lookback: int = 252, risk_free_rate: float = 0.0) -> None:
self._lookback = int(lookback)
self._rf = float(risk_free_rate)
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.selected_symbols:
return StepDecision(status="skip_day", message="no selected symbols")
if ctx.price_history is None or ctx.price_history.empty:
return StepDecision(status="skip_day", message="no price history for mean-var")
syms = [s for s in ctx.selected_symbols if s in ctx.price_history.columns]
if len(syms) < 1:
return StepDecision(status="skip_day", message="no price history columns match")
prices = ctx.price_history[syms].dropna()
if len(prices) < 3:
return StepDecision(status="skip_day", message="insufficient data for mean-var")
rets = prices.iloc[-self._lookback:].pct_change().dropna()
if len(rets) < 3:
return StepDecision(status="skip_day", message="insufficient returns for mean-var")
mu = rets.mean().values
cov = rets.cov().values
n = len(syms)
try:
cov_inv = np.linalg.inv(cov)
except np.linalg.LinAlgError:
# Singular covariance — fall back to equal weight
w = 1.0 / n
ctx.target_weights = {s: w for s in syms}
return StepDecision()
excess = mu - self._rf / 252
raw_w = cov_inv @ excess
# Normalize to sum to 1, allow short positions only if naturally arising
total = float(np.sum(np.abs(raw_w)))
if total <= 0:
w = 1.0 / n
ctx.target_weights = {s: w for s in syms}
return StepDecision()
# Long-only: clip negatives, renormalize
clipped = np.maximum(raw_w, 0.0)
clip_sum = float(np.sum(clipped))
if clip_sum <= 0:
w = 1.0 / n
ctx.target_weights = {s: w for s in syms}
return StepDecision()
weights = clipped / clip_sum
ctx.target_weights = {s: float(weights[i]) for i, s in enumerate(syms)}
return StepDecision()
class WeighERC:
"""Equal Risk Contribution weighting.
Each asset contributes equally to portfolio risk.
Uses iterative bisection approximation.
"""
def __init__(self, lookback: int = 252, max_iter: int = 100) -> None:
self._lookback = int(lookback)
self._max_iter = int(max_iter)
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.selected_symbols:
return StepDecision(status="skip_day", message="no selected symbols")
if ctx.price_history is None or ctx.price_history.empty:
return StepDecision(status="skip_day", message="no price history for ERC")
syms = [s for s in ctx.selected_symbols if s in ctx.price_history.columns]
if len(syms) < 1:
return StepDecision(status="skip_day", message="no matching columns")
prices = ctx.price_history[syms].dropna()
rets = prices.iloc[-self._lookback:].pct_change().dropna()
if len(rets) < 3:
return StepDecision(status="skip_day", message="insufficient data for ERC")
cov = rets.cov().values
n = len(syms)
# Start with equal weights
w = np.ones(n) / n
for _ in range(self._max_iter):
sigma = np.sqrt(float(w @ cov @ w))
if sigma <= 0:
break
mrc = (cov @ w) / sigma # marginal risk contribution
rc = w * mrc # risk contribution
target_rc = sigma / n
# Adjust: increase weight of under-contributing, decrease over-contributing
adj = target_rc / np.maximum(rc, 1e-12)
w = w * adj
w = np.maximum(w, 0.0)
w_sum = float(np.sum(w))
if w_sum > 0:
w = w / w_sum
ctx.target_weights = {s: float(w[i]) for i, s in enumerate(syms)}
return StepDecision()
class TargetVol:
"""Scale weights to target a specific annualized portfolio volatility.
Scales the existing target_weights by (target_vol / realized_vol).
Excess weight goes to cash.
"""
def __init__(self, target: float = 0.10, lookback: int = 252) -> None:
self._target = float(target)
self._lookback = int(lookback)
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.target_weights:
return StepDecision(status="skip_day", message="no target weights to scale")
if ctx.price_history is None or ctx.price_history.empty:
return StepDecision(status="skip_day", message="no price history for vol scaling")
syms = list(ctx.target_weights.keys())
available = [s for s in syms if s in ctx.price_history.columns]
if not available:
return StepDecision(status="skip_day", message="no price data for vol scaling")
prices = ctx.price_history[available].dropna()
rets = prices.iloc[-self._lookback:].pct_change().dropna()
if len(rets) < 3:
return StepDecision() # not enough data, pass through unchanged
weights_arr = np.array([ctx.target_weights.get(s, 0.0) for s in available])
port_rets = rets.values @ weights_arr
realized_vol = float(np.std(port_rets) * np.sqrt(252))
if realized_vol <= 0:
return StepDecision()
scale = min(self._target / realized_vol, 1.0) # never lever above 1.0
ctx.target_weights = {s: w * scale for s, w in ctx.target_weights.items()}
return StepDecision()
# ---------------------------------------------------------------------------
# Weight limits
# ---------------------------------------------------------------------------
class LimitWeights:
"""Cap individual position weights and renormalize."""
def __init__(self, limit: float = 0.25) -> None:
self._limit = float(limit)
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.target_weights:
return StepDecision()
# Iteratively clip and renormalize (may need multiple passes)
weights = dict(ctx.target_weights)
for _ in range(10):
over = {s: w for s, w in weights.items() if w > self._limit}
if not over:
break
under = {s: w for s, w in weights.items() if w <= self._limit}
for s in over:
weights[s] = self._limit
under_sum = sum(under.values())
over_excess = sum(w - self._limit for w in over.values())
if under_sum > 0:
scale = 1.0 + over_excess / under_sum
for s in under:
weights[s] = weights[s] * scale
ctx.target_weights = weights
return StepDecision()
class LimitDeltas:
"""Cap how much any single weight can change between rebalances.
On each call, computes the current portfolio weights from positions and
clips ``target_weights`` so no weight moves more than *limit* from its
current value. Excess is redistributed proportionally.
"""
def __init__(self, limit: float = 0.10) -> None:
self._limit = float(limit)
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.target_weights:
return StepDecision()
total = float(ctx.total_capital)
if total <= 0:
return StepDecision()
# Compute current weights from positions
current: dict[str, float] = {}
for sym in ctx.target_weights:
qty = ctx.positions.get(sym, 0.0)
if sym in ctx.prices.index and pd.notna(ctx.prices[sym]):
current[sym] = float(qty) * float(ctx.prices[sym]) / total
else:
current[sym] = 0.0
# Clip deltas
clipped: dict[str, float] = {}
for sym, target_w in ctx.target_weights.items():
cur_w = current.get(sym, 0.0)
delta = target_w - cur_w
clamped = max(-self._limit, min(self._limit, delta))
clipped[sym] = cur_w + clamped
# Renormalize to sum to original target sum
orig_sum = sum(ctx.target_weights.values())
clip_sum = sum(clipped.values())
if clip_sum > 0 and orig_sum > 0:
scale = orig_sum / clip_sum
clipped = {s: w * scale for s, w in clipped.items()}
ctx.target_weights = clipped
return StepDecision()
class ScaleWeights:
"""Multiply all target weights by a scalar.
Useful for leverage (scale > 1) or de-leverage (scale < 1).
Excess weight goes to cash.
"""
def __init__(self, scale: float) -> None:
self._scale = float(scale)
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.target_weights:
return StepDecision()
ctx.target_weights = {s: w * self._scale for s, w in ctx.target_weights.items()}
return StepDecision()
# ---------------------------------------------------------------------------
# Capital flows
# ---------------------------------------------------------------------------
class CapitalFlow:
"""Model periodic capital additions (+) or withdrawals (-).
*flows* is a dict mapping dates to amounts, or a callable
``(date: pd.Timestamp) -> float`` returning the flow amount.
"""
def __init__(self, flows: dict[str | pd.Timestamp, float] | Callable[[pd.Timestamp], float]) -> None:
if callable(flows):
self._fn = flows
else:
mapping = {pd.Timestamp(k).normalize(): float(v) for k, v in flows.items()}
self._fn = lambda d: mapping.get(d.normalize(), 0.0)
def __call__(self, ctx: PipelineContext) -> StepDecision:
amount = self._fn(ctx.date)
if amount != 0.0:
ctx.cash = float(ctx.cash + amount)
ctx.total_capital = float(ctx.total_capital + amount)
return StepDecision()
# ---------------------------------------------------------------------------
# Risk guards
# ---------------------------------------------------------------------------
class MaxDrawdownGuard:
"""Block new rebalances while drawdown exceeds threshold."""
def __init__(self, max_drawdown_pct: float) -> None:
self.max_drawdown_pct = float(max_drawdown_pct)
self._peak = 0.0
def reset(self) -> None:
self._peak = 0.0
def __call__(self, ctx: PipelineContext) -> StepDecision:
self._peak = max(self._peak, float(ctx.total_capital))
if self._peak <= 0:
return StepDecision()
dd = (self._peak - float(ctx.total_capital)) / self._peak
if dd > self.max_drawdown_pct:
return StepDecision(status="skip_day", message=f"drawdown {dd:.2%} > {self.max_drawdown_pct:.2%}")
return StepDecision()
class HedgeRisks:
"""Adjust target weights to hedge portfolio Greeks toward targets.
Uses a Jacobian-based approach: for each hedge instrument, compute
partial derivatives (delta/vega per unit weight), then solve the
linear system to find weight adjustments that bring portfolio Greeks
closest to targets.
Expects ``ctx.prices`` to contain columns for hedge instruments and
``ctx.price_history`` to be available for estimating betas (used as
a proxy for delta when true Greeks are unavailable).
"""
def __init__(
self,
target_delta: float = 0.0,
target_vega: float = 0.0,
hedge_symbols: list[str] | None = None,
) -> None:
self.target_delta = float(target_delta)
self.target_vega = float(target_vega)
self.hedge_symbols = hedge_symbols
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.target_weights:
return StepDecision(status="skip_day", message="no target weights to hedge")
# Determine hedge instruments
hedgers = self.hedge_symbols or ctx.selected_symbols
if not hedgers:
return StepDecision(status="skip_day", message="no hedge symbols")
hedgers = [s for s in hedgers if s in ctx.prices.index and pd.notna(ctx.prices[s])]
if not hedgers:
return StepDecision(status="skip_day", message="no valid hedge symbols")
if ctx.price_history is None or len(ctx.price_history) < 3:
return StepDecision(status="skip_day", message="insufficient history for hedge")
# Estimate current portfolio delta/vega using trailing returns correlation
port_syms = [s for s in ctx.target_weights if s in ctx.price_history.columns]
if not port_syms:
return StepDecision()
rets = ctx.price_history[list(set(port_syms + hedgers))].pct_change().dropna()
if len(rets) < 3:
return StepDecision(status="skip_day", message="insufficient returns for hedge")
# Portfolio delta ~ sum(weight_i * beta_i), using beta = std_i as proxy
port_delta = 0.0
for s in port_syms:
if s in rets.columns:
port_delta += ctx.target_weights.get(s, 0.0) * float(rets[s].std())
delta_gap = self.target_delta - port_delta
# Build Jacobian: each hedger's marginal delta contribution
n = len(hedgers)
jacobian = np.zeros((1, n))
for j, h in enumerate(hedgers):
if h in rets.columns:
jacobian[0, j] = float(rets[h].std())
target_vec = np.array([delta_gap])
try:
adjustments, _, _, _ = np.linalg.lstsq(jacobian, target_vec, rcond=None)
except np.linalg.LinAlgError:
return StepDecision(message="hedge solve failed, weights unchanged")
for j, h in enumerate(hedgers):
current = ctx.target_weights.get(h, 0.0)
ctx.target_weights[h] = current + float(adjustments[j])
return StepDecision(message=f"hedged delta gap={delta_gap:.4f}")
class Margin:
"""Simulate margin/leverage with interest charges and margin calls.
Allows total exposure to exceed cash by borrowing. Charges daily
interest on borrowed amount. If equity drops below maintenance_pct
of total exposure, forces liquidation via "stop".
"""
def __init__(
self,
leverage: float = 2.0,
interest_rate: float = 0.02,
maintenance_pct: float = 0.25,
) -> None:
self.leverage = float(leverage)
self.interest_rate = float(interest_rate)
self.maintenance_pct = float(maintenance_pct)
self._borrowed = 0.0
def reset(self) -> None:
self._borrowed = 0.0
def __call__(self, ctx: PipelineContext) -> StepDecision:
# Compute current stock value from positions
stock_value = 0.0
for sym, qty in ctx.positions.items():
if sym in ctx.prices.index and pd.notna(ctx.prices[sym]):
stock_value += float(qty) * float(ctx.prices[sym])
self._borrowed = max(0.0, stock_value - float(ctx.cash))
# Charge daily interest on borrowed amount
if self._borrowed > 0:
daily_interest = self.interest_rate / 252.0 * self._borrowed
ctx.cash = float(ctx.cash) - daily_interest
ctx.total_capital = float(ctx.total_capital) - daily_interest
# Margin call check: equity vs total exposure
equity = float(ctx.cash) + stock_value
exposure = stock_value
if exposure > 0 and equity / exposure < self.maintenance_pct:
return StepDecision(status="stop", message=f"margin call: equity/exposure={equity / exposure:.2%}")
# Scale target weights by leverage factor
if ctx.target_weights:
ctx.target_weights = {s: w * self.leverage for s, w in ctx.target_weights.items()}
return StepDecision()
class CouponPayingPosition:
"""Inject periodic coupon cash flows into the portfolio.
Simulates a fixed-income position by adding coupon_amount to cash
at the specified frequency. Returns "stop" after maturity.
"""
_FREQUENCY_MONTHS = {
"annual": 12,
"semi-annual": 6,
"quarterly": 3,
"monthly": 1,
}
def __init__(
self,
coupon_amount: float,
frequency: str = "semi-annual",
start_date: str | pd.Timestamp | None = None,
maturity_date: str | pd.Timestamp | None = None,
) -> None:
self.coupon_amount = float(coupon_amount)
if frequency not in self._FREQUENCY_MONTHS:
raise ValueError(f"frequency must be one of {list(self._FREQUENCY_MONTHS)}")
self.frequency = frequency
self._months = self._FREQUENCY_MONTHS[frequency]
self.start_date = pd.Timestamp(start_date) if start_date else None
self.maturity_date = pd.Timestamp(maturity_date) if maturity_date else None
self._last_coupon_month: tuple[int, int] | None = None
def reset(self) -> None:
self._last_coupon_month = None
def __call__(self, ctx: PipelineContext) -> StepDecision:
# Check maturity
if self.maturity_date and ctx.date >= self.maturity_date:
ctx.cash = float(ctx.cash) + self.coupon_amount # final coupon
ctx.total_capital = float(ctx.total_capital) + self.coupon_amount
return StepDecision(status="stop", message="bond matured")
# Check start date
if self.start_date and ctx.date < self.start_date:
return StepDecision()
# Check if this is a coupon month
month_key = (ctx.date.year, ctx.date.month)
if self._last_coupon_month == month_key:
return StepDecision()
# Determine if enough months have passed since last coupon
if self._last_coupon_month is not None:
last_y, last_m = self._last_coupon_month
months_elapsed = (ctx.date.year - last_y) * 12 + (ctx.date.month - last_m)
if months_elapsed < self._months:
return StepDecision()
# Pay coupon
self._last_coupon_month = month_key
ctx.cash = float(ctx.cash) + self.coupon_amount
ctx.total_capital = float(ctx.total_capital) + self.coupon_amount
return StepDecision(message=f"coupon paid: ${self.coupon_amount:.2f}")
class ReplayTransactions:
"""Replay a pre-recorded trade blotter through the pipeline.
Takes a DataFrame with columns: date, symbol, quantity
(positive=buy, negative=sell). On each matching date, executes
the recorded trades at current prices.
"""
def __init__(self, blotter: pd.DataFrame) -> None:
required = {"date", "symbol", "quantity"}
missing = required - set(blotter.columns)
if missing:
raise ValueError(f"blotter missing columns: {missing}")
self._blotter = blotter.copy()
self._blotter["date"] = pd.to_datetime(self._blotter["date"]).dt.normalize()
def __call__(self, ctx: PipelineContext) -> StepDecision:
day_trades = self._blotter[self._blotter["date"] == ctx.date.normalize()]
if day_trades.empty:
return StepDecision()
executed = 0
for _, trade in day_trades.iterrows():
sym = str(trade["symbol"]).upper()
qty = float(trade["quantity"])
if sym not in ctx.prices.index or pd.isna(ctx.prices[sym]):
continue
price = float(ctx.prices[sym])
if price <= 0:
continue
cost = qty * price
ctx.cash = float(ctx.cash) - cost
ctx.total_capital = float(ctx.total_capital) # recalc handled by backtester
current_qty = ctx.positions.get(sym, 0.0)
new_qty = current_qty + qty
if new_qty == 0:
ctx.positions.pop(sym, None)
else:
ctx.positions[sym] = new_qty
executed += 1
return StepDecision(message=f"replayed {executed} trades")
# ---------------------------------------------------------------------------
# Position management
# ---------------------------------------------------------------------------
class CloseDead:
"""Close positions where price has dropped to zero or is NaN.
Removes dead positions and frees up the capital (at zero value).
"""
def __call__(self, ctx: PipelineContext) -> StepDecision:
dead = []
for sym, qty in ctx.positions.items():
if sym not in ctx.prices.index or pd.isna(ctx.prices[sym]) or float(ctx.prices[sym]) <= 0:
dead.append(sym)
for sym in dead:
del ctx.positions[sym]
if dead:
return StepDecision(message=f"closed dead: {', '.join(dead)}")
return StepDecision()
class ClosePositionsAfterDates:
"""Close specific positions on or after given dates.
*schedule* maps symbol names to the date after which they should be closed.
"""
def __init__(self, schedule: dict[str, str | pd.Timestamp]) -> None:
self._schedule = {s.upper(): pd.Timestamp(d).normalize() for s, d in schedule.items()}
def __call__(self, ctx: PipelineContext) -> StepDecision:
closed = []
for sym, close_date in self._schedule.items():
if ctx.date.normalize() >= close_date and sym in ctx.positions:
del ctx.positions[sym]
closed.append(sym)
if closed:
return StepDecision(message=f"closed after date: {', '.join(closed)}")
return StepDecision()
class Require:
"""Guard: only continue if the wrapped algo returns 'continue'.
Unlike normal pipeline flow, ``Require`` runs the inner algo but does
NOT break the pipeline on skip — it only checks whether the algo *would*
have passed. Use it to conditionally gate downstream steps.
"""
def __init__(self, algo: Algo) -> None:
self._algo = algo
def reset(self) -> None:
if hasattr(self._algo, "reset"):
self._algo.reset()
def __call__(self, ctx: PipelineContext) -> StepDecision:
decision = self._algo(ctx)
if decision.status != "continue":
return StepDecision(status="skip_day", message=f"requirement not met: {decision.message}")
return StepDecision()
# ---------------------------------------------------------------------------
# Rebalancing algos
# ---------------------------------------------------------------------------
class Rebalance:
"""Rebalance positions to target weights at current prices.
Performs a full liquidate-and-rebuy on each rebalance date.
"""
def __call__(self, ctx: PipelineContext) -> StepDecision:
if not ctx.target_weights:
return StepDecision(status="skip_day", message="no target weights")
new_positions: dict[str, float] = {}
spent = 0.0
for sym, w in ctx.target_weights.items():
price = float(ctx.prices[sym])
if price <= 0:
continue
target_value = float(ctx.total_capital) * w
qty = float(np.floor(target_value / price))
new_positions[sym] = qty
spent += qty * price
ctx.positions.clear()
ctx.positions.update(new_positions)
ctx.cash = float(ctx.total_capital - spent)
return StepDecision()
class RebalanceOverTime:
"""Spread rebalancing over *n* periods to reduce market impact.
On each trigger, moves 1/n of the way from current to target weights.
Must be preceded by a scheduling algo and a weighting algo.
"""
def __init__(self, n: int = 5) -> None:
self._n = int(n)
self._target: dict[str, float] = {}
self._remaining = 0
def reset(self) -> None:
self._target = {}
self._remaining = 0
def __call__(self, ctx: PipelineContext) -> StepDecision:
# If new target weights are set, start a new gradual rebalance
if ctx.target_weights and ctx.target_weights != self._target:
self._target = dict(ctx.target_weights)
self._remaining = self._n
if self._remaining <= 0 or not self._target:
return StepDecision(status="skip_day", message="no gradual rebalance in progress")
# Compute current weights from positions
total = float(ctx.total_capital)
if total <= 0:
return StepDecision(status="skip_day", message="no capital")
current_weights: dict[str, float] = {}
all_syms = set(self._target.keys()) | set(ctx.positions.keys())
for sym in all_syms:
qty = ctx.positions.get(sym, 0.0)
if sym in ctx.prices.index and pd.notna(ctx.prices[sym]):
current_weights[sym] = float(qty) * float(ctx.prices[sym]) / total
else:
current_weights[sym] = 0.0
# Move fraction of the way toward target
frac = 1.0 / self._remaining
blended: dict[str, float] = {}
for sym in all_syms:
cur = current_weights.get(sym, 0.0)
tgt = self._target.get(sym, 0.0)
blended[sym] = cur + frac * (tgt - cur)
# Apply blended weights
new_positions: dict[str, float] = {}
spent = 0.0
for sym, w in blended.items():
if sym not in ctx.prices.index or pd.isna(ctx.prices[sym]):
continue
price = float(ctx.prices[sym])
if price <= 0:
continue
target_value = total * w
qty = float(np.floor(target_value / price))
if qty > 0:
new_positions[sym] = qty
spent += qty * price
ctx.positions.clear()
ctx.positions.update(new_positions)
ctx.cash = float(total - spent)
self._remaining -= 1
return StepDecision()
class AlgoPipelineBacktester:
"""Simple stock backtester driven by composable pipeline algos."""
def __init__(
self,
prices: pd.DataFrame,
algos: list[Algo],
initial_capital: float = 1_000_000.0,
) -> None:
self.prices = prices.sort_index()
self.algos = algos
self.initial_capital = float(initial_capital)
self.logs: list[PipelineLogRow] = []
def run(self) -> pd.DataFrame:
self.logs = []
for algo in self.algos:
if hasattr(algo, "reset"):
algo.reset()
cash = float(self.initial_capital)
positions: dict[str, float] = {}
rows: list[dict[str, float | pd.Timestamp]] = []
all_dates = list(self.prices.index)
for i, (date, price_row) in enumerate(self.prices.iterrows()):
stocks_cap = float(sum(float(qty) * float(price_row[sym])
for sym, qty in positions.items()
if sym in price_row.index and pd.notna(price_row[sym])))
total_cap = cash + stocks_cap
# Price history up to current date (for algos that need lookback)
history = self.prices.iloc[:i + 1] if i > 0 else self.prices.iloc[:1]
ctx = PipelineContext(
date=pd.Timestamp(date),
prices=price_row,
total_capital=total_cap,
cash=cash,
positions=dict(positions),
price_history=history,
)
stop_all = False
for algo in self.algos:
decision = algo(ctx)
self.logs.append(
PipelineLogRow(
date=pd.Timestamp(date),
step=algo.__class__.__name__,
status=decision.status,
message=decision.message,
)
)
if decision.status == "skip_day":
break
if decision.status == "stop":
stop_all = True
break
cash = float(ctx.cash)
positions = dict(ctx.positions)
stocks_cap = float(sum(float(qty) * float(price_row[sym])
for sym, qty in positions.items()
if sym in price_row.index and pd.notna(price_row[sym])))
total_cap = cash + stocks_cap
row: dict[str, float | pd.Timestamp] = {
"date": pd.Timestamp(date),
"cash": cash,
"stocks capital": stocks_cap,
"total capital": total_cap,
}
for sym, qty in positions.items():
row[f"{sym} qty"] = float(qty)
rows.append(row)
if stop_all:
break
if not rows:
balance = pd.DataFrame()
self.balance = balance
return balance
balance = pd.DataFrame(rows).set_index("date")
if not balance.empty:
balance["% change"] = balance["total capital"].pct_change()
balance["accumulated return"] = (1.0 + balance["% change"]).cumprod()
self.balance = balance
return balance
def set_date_range(self, start=None, end=None):
"""Filter results to date range, return new BacktestStats."""
from options_portfolio_backtester.analytics.stats import BacktestStats
return BacktestStats.from_balance_range(self.balance, start, end)
def logs_dataframe(self) -> pd.DataFrame:
if not self.logs:
return pd.DataFrame(columns=["date", "step", "status", "message"])
return pd.DataFrame([{
"date": r.date,
"step": r.step,
"status": r.status,
"message": r.message,
} for r in self.logs])
# ---------------------------------------------------------------------------
# Random benchmarking
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class RandomBenchmarkResult:
"""Result of ``benchmark_random``: your strategy vs random portfolios."""
strategy_return: float
random_returns: list[float]
percentile: float # what % of random runs your strategy beat
@property
def mean_random(self) -> float:
return float(np.mean(self.random_returns))
@property
def std_random(self) -> float:
return float(np.std(self.random_returns))
def benchmark_random(
prices: pd.DataFrame,
strategy_algos: list[Algo],
n_random: int = 100,
initial_capital: float = 1_000_000.0,
seed: int = 42,
) -> RandomBenchmarkResult:
"""Compare a strategy against *n_random* random-weight portfolios.
Runs the given strategy once, then runs *n_random* simulations with
``SelectAll → WeighRandomly → Rebalance`` on the same price data.
Returns a ``RandomBenchmarkResult`` with the strategy's total return,
the distribution of random returns, and the percentile rank.
"""
# Run the target strategy
bt = AlgoPipelineBacktester(prices=prices, algos=strategy_algos, initial_capital=initial_capital)
bal = bt.run()
if bal.empty:
strat_ret = 0.0
else:
strat_ret = float(bal["total capital"].iloc[-1] / bal["total capital"].iloc[0] - 1)
# Run random strategies
random_rets: list[float] = []
for i in range(n_random):
random_algos: list[Algo] = [
RunMonthly(),
SelectAll(),
WeighRandomly(seed=seed + i),
Rebalance(),
]
rbt = AlgoPipelineBacktester(prices=prices, algos=random_algos, initial_capital=initial_capital)
rbal = rbt.run()
if rbal.empty:
random_rets.append(0.0)
else:
random_rets.append(float(rbal["total capital"].iloc[-1] / rbal["total capital"].iloc[0] - 1))
beaten = sum(1 for r in random_rets if strat_ret > r)
pct = beaten / max(len(random_rets), 1) * 100
return RandomBenchmarkResult(
strategy_return=strat_ret,
random_returns=random_rets,
percentile=pct,
)
================================================
FILE: options_portfolio_backtester/engine/strategy_tree.py
================================================
"""Hierarchical strategy tree runner."""
from __future__ import annotations
from dataclasses import dataclass, field
import pandas as pd
from options_portfolio_backtester.engine.engine import BacktestEngine
@dataclass
class StrategyTreeNode:
"""Node in a capital-allocation strategy tree."""
name: str
weight: float = 1.0
max_share: float | None = None
engine: BacktestEngine | None = None
children: list["StrategyTreeNode"] = field(default_factory=list)
def __post_init__(self) -> None:
if self.engine is not None and self.children:
raise ValueError(
f"StrategyTreeNode '{self.name}' has both engine and children; "
"a node must be either a leaf (engine) or a branch (children), not both"
)
def is_leaf(self) -> bool:
return self.engine is not None
def to_dot(self) -> str:
"""Generate Graphviz DOT string for this subtree."""
lines = [
"digraph StrategyTree {",
" rankdir=TB;",
' node [style=filled, fillcolor=lightyellow];',
]
self._dot_recursive(lines, parent_id=None)
lines.append("}")
return "\n".join(lines)
def _dot_recursive(self, lines: list[str], parent_id: str | None) -> None:
node_id = f"n{id(self)}"
label = f"{self.name}\\nw={self.weight}"
if self.max_share is not None:
label += f"\\nmax={self.max_share}"
shape = "ellipse" if self.is_leaf() else "box"
lines.append(f' {node_id} [label="{label}", shape={shape}];')
if parent_id:
lines.append(f" {parent_id} -> {node_id};")
for child in self.children:
child._dot_recursive(lines, node_id)
class StrategyTreeEngine:
"""Run leaf engines with capital shares implied by tree weights."""
def __init__(self, root: StrategyTreeNode, initial_capital: int = 1_000_000) -> None:
self.root = root
self.initial_capital = initial_capital
self.throttles: dict[str, dict[str, float]] = {}
def to_dot(self) -> str:
"""Generate Graphviz DOT string for the strategy tree."""
return self.root.to_dot()
def _leaf_shares(self, node: StrategyTreeNode, parent_share: float) -> list[tuple[StrategyTreeNode, float]]:
if node.is_leaf():
capped = min(parent_share, node.max_share) if node.max_share is not None else parent_share
if capped < parent_share:
self.throttles[node.name] = {"requested_share": parent_share, "applied_share": capped}
return [(node, capped)]
if not node.children:
return []
total = sum(c.weight for c in node.children)
if total <= 0:
return []
out: list[tuple[StrategyTreeNode, float]] = []
for child in node.children:
out.extend(self._leaf_shares(child, parent_share * (child.weight / total)))
return out
def run(self, rebalance_freq: int = 0, monthly: bool = False, sma_days: int | None = None) -> dict[str, pd.DataFrame]:
leaf_allocs = self._leaf_shares(self.root, 1.0)
results: dict[str, pd.DataFrame] = {}
self.leaf_weights = {leaf.name: w for leaf, w in leaf_allocs}
self.attribution = {}
allocated_share = float(sum(w for _, w in leaf_allocs))
unallocated_share = max(0.0, 1.0 - allocated_share)
balances: list[pd.DataFrame] = []
for leaf, share in leaf_allocs:
cap = round(self.initial_capital * share)
saved_capital = leaf.engine.initial_capital
leaf.engine.initial_capital = cap
trade_log = leaf.engine.run(rebalance_freq=rebalance_freq, monthly=monthly, sma_days=sma_days)
leaf.engine.initial_capital = saved_capital
results[leaf.name] = trade_log
self.attribution[leaf.name] = {
"weight": share,
"capital": cap,
}
b = leaf.engine.balance[["total capital"]].rename(columns={"total capital": f"{leaf.name}_capital"})
balances.append(b)
if balances:
self.balance = pd.concat(balances, axis=1)
cap_cols = [c for c in self.balance.columns if c.endswith("_capital")]
self.balance["unallocated_cash"] = float(self.initial_capital * unallocated_share)
self.balance["total capital"] = self.balance[cap_cols].sum(axis=1) + self.balance["unallocated_cash"]
self.balance["% change"] = self.balance["total capital"].pct_change()
self.balance["accumulated return"] = (1.0 + self.balance["% change"]).cumprod()
else:
self.balance = pd.DataFrame()
return results
================================================
FILE: options_portfolio_backtester/execution/__init__.py
================================================
================================================
FILE: options_portfolio_backtester/execution/_rust_bridge.py
================================================
"""Rust execution functions from _ob_rust."""
from options_portfolio_backtester._ob_rust import (
rust_option_cost,
rust_stock_cost,
rust_fill_price,
rust_nearest_delta_index,
rust_max_value_index,
rust_risk_check,
)
================================================
FILE: options_portfolio_backtester/execution/cost_model.py
================================================
"""Transaction cost models for options and stocks."""
from __future__ import annotations
from abc import ABC, abstractmethod
from options_portfolio_backtester.execution._rust_bridge import (
rust_option_cost, rust_stock_cost,
)
class TransactionCostModel(ABC):
"""Base class for all transaction cost models."""
@abstractmethod
def option_cost(self, price: float, quantity: int, shares_per_contract: int) -> float:
"""Return total commission for an options trade."""
...
@abstractmethod
def stock_cost(self, price: float, quantity: float) -> float:
"""Return total commission for a stock trade."""
...
class NoCosts(TransactionCostModel):
"""Zero transaction costs — matches original behavior."""
def option_cost(self, price: float, quantity: int, shares_per_contract: int) -> float:
return 0.0
def stock_cost(self, price: float, quantity: float) -> float:
return 0.0
def to_rust_config(self) -> dict:
return {"type": "NoCosts"}
class PerContractCommission(TransactionCostModel):
"""Fixed per-contract commission (e.g., $0.65/contract for IBKR)."""
def __init__(self, rate: float = 0.65, stock_rate: float = 0.005) -> None:
self.rate = rate
self.stock_rate = stock_rate # per-share
def option_cost(self, price: float, quantity: int, shares_per_contract: int) -> float:
return rust_option_cost("PerContract", self.rate, self.stock_rate, [], price, float(quantity), shares_per_contract)
def stock_cost(self, price: float, quantity: float) -> float:
return rust_stock_cost("PerContract", self.rate, self.stock_rate, [], price, float(quantity))
def to_rust_config(self) -> dict:
return {"type": "PerContract", "rate": self.rate, "stock_rate": self.stock_rate}
class TieredCommission(TransactionCostModel):
"""Tiered commission schedule with volume discounts.
Tiers are (max_contracts, rate) pairs sorted by max_contracts ascending.
Contracts beyond the last tier use the last tier's rate.
"""
def __init__(self, tiers: list[tuple[int, float]] | None = None,
stock_rate: float = 0.005) -> None:
# Default: IBKR-style tiers
self.tiers = tiers or [
(10_000, 0.65),
(50_000, 0.50),
(100_000, 0.25),
]
self.stock_rate = stock_rate
def option_cost(self, price: float, quantity: int, shares_per_contract: int) -> float:
return rust_option_cost("Tiered", 0.0, self.stock_rate, self.tiers, price, float(quantity), shares_per_contract)
def stock_cost(self, price: float, quantity: float) -> float:
return rust_stock_cost("Tiered", 0.0, self.stock_rate, self.tiers, price, float(quantity))
def to_rust_config(self) -> dict:
return {
"type": "Tiered",
"tiers": [(max_qty, rate) for max_qty, rate in self.tiers],
"stock_rate": self.stock_rate,
}
class SpreadSlippage(TransactionCostModel):
"""Model slippage as a fraction of the bid-ask spread.
Example: SpreadSlippage(pct=0.5) means you pay half the spread on top
of the execution price.
"""
def __init__(self, pct: float = 0.5) -> None:
assert 0.0 <= pct <= 1.0
self.pct = pct
def option_cost(self, price: float, quantity: int, shares_per_contract: int) -> float:
# Slippage is modeled separately via fill_model; this returns 0
# so it can be composed with a commission model.
return 0.0
def stock_cost(self, price: float, quantity: float) -> float:
return 0.0
def slippage(self, bid: float, ask: float, quantity: int, shares_per_contract: int) -> float:
"""Compute dollar slippage from the spread."""
spread = abs(ask - bid)
return self.pct * spread * abs(quantity) * shares_per_contract
================================================
FILE: options_portfolio_backtester/execution/fill_model.py
================================================
"""Fill models — determine the execution price for trades."""
from __future__ import annotations
from abc import ABC, abstractmethod
import pandas as pd
from options_portfolio_backtester.core.types import Direction
from options_portfolio_backtester.execution._rust_bridge import rust_fill_price
class FillModel(ABC):
"""Determines the price at which a trade is filled."""
@abstractmethod
def get_fill_price(self, row: pd.Series, direction: Direction) -> float:
"""Return the execution price for a given option quote row and direction."""
...
class MarketAtBidAsk(FillModel):
"""Fill at the bid (sell) or ask (buy) — matches original behavior."""
def get_fill_price(self, row: pd.Series, direction: Direction) -> float:
return float(row[direction.price_column])
def to_rust_config(self) -> dict:
return {"type": "MarketAtBidAsk"}
class MidPrice(FillModel):
"""Fill at the midpoint of bid and ask."""
def get_fill_price(self, row: pd.Series, direction: Direction) -> float:
bid = float(row["bid"])
ask = float(row["ask"])
return (bid + ask) / 2.0
def to_rust_config(self) -> dict:
return {"type": "MidPrice"}
class VolumeAwareFill(FillModel):
"""Fill price that adjusts for volume impact.
For low-volume contracts, the fill is pushed toward the less favorable
price. Above `full_volume_threshold`, the fill is at bid/ask.
"""
def __init__(self, full_volume_threshold: int = 100) -> None:
self.full_volume_threshold = full_volume_threshold
def get_fill_price(self, row: pd.Series, direction: Direction) -> float:
bid = float(row["bid"])
ask = float(row["ask"])
is_buy = direction == Direction.BUY
vol_raw = row.get("volume")
volume = None if vol_raw is None or (isinstance(vol_raw, float) and vol_raw != vol_raw) else float(vol_raw)
return rust_fill_price("VolumeAware", self.full_volume_threshold, bid, ask, volume, is_buy)
def to_rust_config(self) -> dict:
return {"type": "VolumeAware", "full_volume_threshold": self.full_volume_threshold}
================================================
FILE: options_portfolio_backtester/execution/signal_selector.py
================================================
"""Signal selectors — choose which contract to trade from a set of candidates."""
from __future__ import annotations
from abc import ABC, abstractmethod
import pandas as pd
from options_portfolio_backtester.execution._rust_bridge import (
rust_nearest_delta_index, rust_max_value_index,
)
class SignalSelector(ABC):
"""Picks one entry signal from a DataFrame of candidates."""
@property
def column_requirements(self) -> list[str]:
"""Extra columns needed from raw options data beyond standard signal fields."""
return []
@abstractmethod
def select(self, candidates: pd.DataFrame) -> pd.Series:
"""Return the single row (as Series) to execute from candidates.
Args:
candidates: DataFrame of entry signals, pre-sorted if entry_sort was set.
Returns:
A single row (pd.Series) from candidates.
"""
...
class FirstMatch(SignalSelector):
"""Pick the first row — matches original iloc[0] behavior."""
def select(self, candidates: pd.DataFrame) -> pd.Series:
return candidates.iloc[0]
def to_rust_config(self) -> dict:
return {"type": "FirstMatch"}
class NearestDelta(SignalSelector):
"""Pick the contract whose delta is closest to `target_delta`.
Requires a 'delta' column in candidates.
"""
def __init__(self, target_delta: float = -0.30, delta_column: str = "delta") -> None:
self.target_delta = target_delta
self.delta_column = delta_column
@property
def column_requirements(self) -> list[str]:
return [self.delta_column]
def select(self, candidates: pd.DataFrame) -> pd.Series:
if self.delta_column not in candidates.columns:
return candidates.iloc[0]
values = candidates[self.delta_column].tolist()
idx = rust_nearest_delta_index(values, self.target_delta)
return candidates.iloc[idx]
def to_rust_config(self) -> dict:
return {"type": "NearestDelta", "target": self.target_delta, "column": self.delta_column}
class MaxOpenInterest(SignalSelector):
"""Pick the contract with the highest open interest (proxy for liquidity).
Requires an 'openinterest' or 'open_interest' column.
"""
def __init__(self, oi_column: str = "openinterest") -> None:
self.oi_column = oi_column
@property
def column_requirements(self) -> list[str]:
return [self.oi_column]
def select(self, candidates: pd.DataFrame) -> pd.Series:
if self.oi_column not in candidates.columns:
return candidates.iloc[0]
values = candidates[self.oi_column].astype(float).tolist()
idx = rust_max_value_index(values)
return candidates.iloc[idx]
def to_rust_config(self) -> dict:
return {"type": "MaxOpenInterest", "column": self.oi_column}
================================================
FILE: options_portfolio_backtester/execution/sizer.py
================================================
"""Position sizing models — determine how many contracts to trade."""
from __future__ import annotations
from abc import ABC, abstractmethod
class PositionSizer(ABC):
"""Determines the number of contracts to trade."""
@abstractmethod
def size(self, cost_per_contract: float, available_capital: float,
total_capital: float) -> int:
"""Return the number of contracts to trade.
Args:
cost_per_contract: Dollar cost for one contract (absolute value).
available_capital: Capital allocated to this trade.
total_capital: Total portfolio value.
"""
...
class CapitalBased(PositionSizer):
"""Buy as many contracts as the allocation allows — matches original behavior.
qty = available_capital // cost_per_contract
"""
def size(self, cost_per_contract: float, available_capital: float,
total_capital: float) -> int:
if cost_per_contract == 0:
return 0
return int(available_capital // abs(cost_per_contract))
class FixedQuantity(PositionSizer):
"""Always trade a fixed number of contracts."""
def __init__(self, quantity: int = 1) -> None:
self.quantity = quantity
def size(self, cost_per_contract: float, available_capital: float,
total_capital: float) -> int:
if abs(cost_per_contract) * self.quantity > available_capital:
return int(available_capital // abs(cost_per_contract)) if cost_per_contract != 0 else 0
return self.quantity
class FixedDollar(PositionSizer):
"""Size positions to a fixed dollar amount."""
def __init__(self, amount: float = 10_000.0) -> None:
self.amount = amount
def size(self, cost_per_contract: float, available_capital: float,
total_capital: float) -> int:
if cost_per_contract == 0:
return 0
target = min(self.amount, available_capital)
return int(target // abs(cost_per_contract))
class PercentOfPortfolio(PositionSizer):
"""Size positions as a percentage of total portfolio value."""
def __init__(self, pct: float = 0.01) -> None:
assert 0.0 < pct <= 1.0
self.pct = pct
def size(self, cost_per_contract: float, available_capital: float,
total_capital: float) -> int:
if cost_per_contract == 0:
return 0
target = min(self.pct * total_capital, available_capital)
return int(target // abs(cost_per_contract))
================================================
FILE: options_portfolio_backtester/portfolio/__init__.py
================================================
================================================
FILE: options_portfolio_backtester/portfolio/greeks.py
================================================
"""Portfolio-level Greeks aggregation."""
from __future__ import annotations
from options_portfolio_backtester.core.types import Greeks
from options_portfolio_backtester.portfolio.position import OptionPosition
def aggregate_greeks(
positions: dict[int, OptionPosition],
leg_greeks_by_position: dict[int, dict[str, Greeks]],
) -> Greeks:
"""Compute portfolio-level Greeks by summing across all positions.
Args:
positions: {position_id: OptionPosition}.
leg_greeks_by_position: {position_id: {leg_name: Greeks}}.
Returns:
Total portfolio Greeks.
"""
total = Greeks()
for pid, pos in positions.items():
pos_greeks = leg_greeks_by_position.get(pid, {})
total = total + pos.greeks(pos_greeks)
return total
================================================
FILE: options_portfolio_backtester/portfolio/portfolio.py
================================================
"""Portfolio — clean replacement for MultiIndex DataFrames.
Uses plain dicts and dataclasses instead of MultiIndex DataFrames for
inventory tracking. Simpler, extensible, debuggable.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from options_portfolio_backtester.core.types import Greeks
from options_portfolio_backtester.portfolio.position import OptionPosition
from options_portfolio_backtester.portfolio.greeks import aggregate_greeks
@dataclass
class StockHolding:
"""A stock position in the portfolio."""
symbol: str
quantity: float
cost_basis: float # average price paid
class Portfolio:
"""Portfolio state — cash, option positions, stock holdings.
Replaces the old MultiIndex _options_inventory and _stocks_inventory
DataFrames with typed, inspectable data structures.
"""
def __init__(self, initial_cash: float = 0.0) -> None:
self.cash: float = initial_cash
self.option_positions: dict[int, OptionPosition] = {}
self.stock_holdings: dict[str, StockHolding] = {}
self._next_position_id: int = 0
def next_position_id(self) -> int:
pid = self._next_position_id
self._next_position_id += 1
return pid
# -- Option positions --
def add_option_position(self, pos: OptionPosition) -> None:
self.option_positions[pos.position_id] = pos
def remove_option_position(self, position_id: int) -> OptionPosition | None:
return self.option_positions.pop(position_id, None)
def options_value(self, current_prices: dict[int, dict[str, float]],
shares_per_contract: int) -> float:
"""Total mark-to-market value of all option positions.
Args:
current_prices: {position_id: {leg_name: exit_price}}.
shares_per_contract: Contract multiplier.
"""
total = 0.0
for pid, pos in self.option_positions.items():
prices = current_prices.get(pid, {})
total += pos.current_value(prices, shares_per_contract)
return total
# -- Stock holdings --
def set_stock_holding(self, symbol: str, quantity: float,
price: float) -> None:
self.stock_holdings[symbol] = StockHolding(
symbol=symbol, quantity=quantity, cost_basis=price,
)
def clear_stock_holdings(self) -> None:
self.stock_holdings.clear()
def stocks_value(self, current_prices: dict[str, float]) -> float:
"""Total value of stock holdings at current prices."""
total = 0.0
for symbol, holding in self.stock_holdings.items():
price = current_prices.get(symbol, holding.cost_basis)
total += holding.quantity * price
return total
# -- Portfolio totals --
def total_value(self, stock_prices: dict[str, float],
option_prices: dict[int, dict[str, float]],
shares_per_contract: int) -> float:
"""Total portfolio value: cash + stocks + options."""
return (self.cash
+ self.stocks_value(stock_prices)
+ self.options_value(option_prices, shares_per_contract))
def portfolio_greeks(self,
leg_greeks_by_position: dict[int, dict[str, Greeks]]) -> Greeks:
"""Aggregate Greeks across all option positions."""
return aggregate_greeks(self.option_positions, leg_greeks_by_position)
================================================
FILE: options_portfolio_backtester/portfolio/position.py
================================================
"""Option position and position leg — replaces MultiIndex inventory rows."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from options_portfolio_backtester.core.types import (
Direction, OptionType, Order, Greeks, get_order, Signal,
)
@dataclass
class PositionLeg:
"""A single leg within an option position."""
name: str
contract_id: str
underlying: str
expiration: Any # pd.Timestamp
option_type: OptionType
strike: float
entry_price: float
direction: Direction
order: Order
@property
def exit_order(self) -> Order:
return ~self.order
def current_value(self, current_price: float, quantity: int,
shares_per_contract: int) -> float:
"""Mark-to-market value of this leg.
For a BUY leg, value = current_price * qty * spc (we own it).
For a SELL leg, value = -current_price * qty * spc (we owe it).
"""
sign = -1 if self.direction == Direction.SELL else 1
return sign * current_price * quantity * shares_per_contract
@dataclass
class OptionPosition:
"""A multi-leg option position.
Replaces one row in the old MultiIndex _options_inventory DataFrame.
"""
position_id: int
legs: dict[str, PositionLeg] = field(default_factory=dict)
quantity: int = 0
entry_cost: float = 0.0 # total cost at entry (negative for debit)
entry_date: Any = None # pd.Timestamp
def add_leg(self, leg: PositionLeg) -> None:
self.legs[leg.name] = leg
def current_value(self, current_prices: dict[str, float],
shares_per_contract: int) -> float:
"""Total MTM value of this position across all legs.
Args:
current_prices: {leg_name: exit_price} for each leg.
shares_per_contract: Contract multiplier.
"""
total = 0.0
for leg_name, leg in self.legs.items():
price = current_prices.get(leg_name, 0.0)
total += leg.current_value(price, self.quantity, shares_per_contract)
return total
def greeks(self, leg_greeks: dict[str, Greeks]) -> Greeks:
"""Aggregate Greeks across all legs, scaled by quantity.
Args:
leg_greeks: {leg_name: Greeks} for each leg.
"""
total = Greeks()
for leg_name, leg in self.legs.items():
g = leg_greeks.get(leg_name, Greeks())
sign = 1 if leg.direction == Direction.BUY else -1
total = total + g * (sign * self.quantity)
return total
================================================
FILE: options_portfolio_backtester/portfolio/risk.py
================================================
"""Risk management — constraints checked before entering positions."""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from options_portfolio_backtester.core.types import Greeks
from options_portfolio_backtester.execution._rust_bridge import rust_risk_check
class RiskConstraint(ABC):
"""A single risk constraint."""
@abstractmethod
def check(self, current_greeks: Greeks, proposed_greeks: Greeks,
portfolio_value: float, peak_value: float) -> bool:
"""Return True if the trade is allowed, False if it violates the constraint."""
...
@abstractmethod
def describe(self) -> str:
"""Human-readable description of the constraint."""
...
def _greeks_list(g: Greeks) -> list[float]:
return [g.delta, g.gamma, g.theta, g.vega]
class MaxDelta(RiskConstraint):
"""Reject trades that would push portfolio delta beyond a limit."""
def __init__(self, limit: float = 100.0) -> None:
self.limit = limit
def check(self, current_greeks: Greeks, proposed_greeks: Greeks,
portfolio_value: float, peak_value: float) -> bool:
return rust_risk_check(
"MaxDelta", self.limit,
_greeks_list(current_greeks), _greeks_list(proposed_greeks),
portfolio_value, peak_value,
)
def describe(self) -> str:
return f"MaxDelta(limit={self.limit})"
def to_rust_config(self) -> dict:
return {"type": "MaxDelta", "limit": self.limit}
class MaxVega(RiskConstraint):
"""Reject trades that would push portfolio vega beyond a limit."""
def __init__(self, limit: float = 50.0) -> None:
self.limit = limit
def check(self, current_greeks: Greeks, proposed_greeks: Greeks,
portfolio_value: float, peak_value: float) -> bool:
return rust_risk_check(
"MaxVega", self.limit,
_greeks_list(current_greeks), _greeks_list(proposed_greeks),
portfolio_value, peak_value,
)
def describe(self) -> str:
return f"MaxVega(limit={self.limit})"
def to_rust_config(self) -> dict:
return {"type": "MaxVega", "limit": self.limit}
class MaxDrawdown(RiskConstraint):
"""Reject new entries if portfolio drawdown exceeds a threshold."""
def __init__(self, max_dd_pct: float = 0.20) -> None:
self.max_dd_pct = max_dd_pct
def check(self, current_greeks: Greeks, proposed_greeks: Greeks,
portfolio_value: float, peak_value: float) -> bool:
return rust_risk_check(
"MaxDrawdown", self.max_dd_pct,
_greeks_list(current_greeks), _greeks_list(proposed_greeks),
portfolio_value, peak_value,
)
def describe(self) -> str:
return f"MaxDrawdown(max_dd_pct={self.max_dd_pct})"
def to_rust_config(self) -> dict:
return {"type": "MaxDrawdown", "max_dd_pct": self.max_dd_pct}
class RiskManager:
"""Evaluates a set of risk constraints before allowing a trade."""
def __init__(self, constraints: list[RiskConstraint] | None = None) -> None:
self.constraints = constraints or []
def add_constraint(self, constraint: RiskConstraint) -> None:
self.constraints.append(constraint)
def is_allowed(self, current_greeks: Greeks, proposed_greeks: Greeks,
portfolio_value: float, peak_value: float) -> tuple[bool, str]:
"""Check all constraints. Returns (allowed, reason)."""
for c in self.constraints:
if not c.check(current_greeks, proposed_greeks,
portfolio_value, peak_value):
return False, c.describe()
return True, ""
================================================
FILE: options_portfolio_backtester/strategy/__init__.py
================================================
================================================
FILE: options_portfolio_backtester/strategy/presets.py
================================================
"""Pre-built strategy constructors for common options strategies."""
from __future__ import annotations
from typing import TYPE_CHECKING
from options_portfolio_backtester.core.types import Direction, OptionType
from options_portfolio_backtester.strategy.strategy import Strategy
from options_portfolio_backtester.strategy.strategy_leg import StrategyLeg
if TYPE_CHECKING:
from options_portfolio_backtester.data.schema import Schema
def strangle(
schema: "Schema",
underlying: str,
direction: Direction,
dte_range: tuple[int, int],
dte_exit: int,
otm_pct: float = 0.0,
pct_tolerance: float = 1.0,
exit_thresholds: tuple[float, float] = (float("inf"), float("inf")),
) -> Strategy:
"""Build a strangle (long or short) strategy."""
strat = Strategy(schema)
otm_lo = (otm_pct - pct_tolerance) / 100
otm_hi = (otm_pct + pct_tolerance) / 100
call_leg = StrategyLeg("leg_1", schema, option_type=OptionType.CALL, direction=direction)
call_leg.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
& (schema.strike >= schema.underlying_last * (1 + otm_lo))
& (schema.strike <= schema.underlying_last * (1 + otm_hi))
)
call_leg.exit_filter = schema.dte <= dte_exit
put_leg = StrategyLeg("leg_2", schema, option_type=OptionType.PUT, direction=direction)
put_leg.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
& (schema.strike <= schema.underlying_last * (1 - otm_lo))
& (schema.strike >= schema.underlying_last * (1 - otm_hi))
)
put_leg.exit_filter = schema.dte <= dte_exit
strat.add_legs([call_leg, put_leg])
strat.add_exit_thresholds(exit_thresholds[0], exit_thresholds[1])
return strat
def iron_condor(
schema: "Schema",
underlying: str,
dte_range: tuple[int, int],
dte_exit: int,
short_delta_call: float = 0.30,
short_delta_put: float = -0.30,
wing_width: float = 5.0,
exit_thresholds: tuple[float, float] = (float("inf"), float("inf")),
) -> Strategy:
"""Build a short iron condor (sell inner, buy outer wings).
This is a simplified version using strike offsets; for delta-based
selection, use a NearestDelta signal_selector on each leg.
"""
strat = Strategy(schema)
# Short call (inner)
sc = StrategyLeg("leg_1", schema, option_type=OptionType.CALL, direction=Direction.SELL)
sc.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
)
sc.exit_filter = schema.dte <= dte_exit
# Long call (outer wing)
lc = StrategyLeg("leg_2", schema, option_type=OptionType.CALL, direction=Direction.BUY)
lc.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
)
lc.exit_filter = schema.dte <= dte_exit
# Short put (inner)
sp = StrategyLeg("leg_3", schema, option_type=OptionType.PUT, direction=Direction.SELL)
sp.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
)
sp.exit_filter = schema.dte <= dte_exit
# Long put (outer wing)
lp = StrategyLeg("leg_4", schema, option_type=OptionType.PUT, direction=Direction.BUY)
lp.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
)
lp.exit_filter = schema.dte <= dte_exit
strat.add_legs([sc, lc, sp, lp])
strat.add_exit_thresholds(exit_thresholds[0], exit_thresholds[1])
return strat
def covered_call(
schema: "Schema",
underlying: str,
dte_range: tuple[int, int],
dte_exit: int,
otm_pct: float = 2.0,
pct_tolerance: float = 1.0,
exit_thresholds: tuple[float, float] = (float("inf"), float("inf")),
) -> Strategy:
"""Build a covered call strategy (sell OTM calls against stock)."""
strat = Strategy(schema)
otm_lo = (otm_pct - pct_tolerance) / 100
otm_hi = (otm_pct + pct_tolerance) / 100
leg = StrategyLeg("leg_1", schema, option_type=OptionType.CALL, direction=Direction.SELL)
leg.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
& (schema.strike >= schema.underlying_last * (1 + otm_lo))
& (schema.strike <= schema.underlying_last * (1 + otm_hi))
)
leg.exit_filter = schema.dte <= dte_exit
strat.add_leg(leg)
strat.add_exit_thresholds(exit_thresholds[0], exit_thresholds[1])
return strat
def cash_secured_put(
schema: "Schema",
underlying: str,
dte_range: tuple[int, int],
dte_exit: int,
otm_pct: float = 2.0,
pct_tolerance: float = 1.0,
exit_thresholds: tuple[float, float] = (float("inf"), float("inf")),
) -> Strategy:
"""Build a cash-secured put strategy (sell OTM puts)."""
strat = Strategy(schema)
otm_lo = (otm_pct - pct_tolerance) / 100
otm_hi = (otm_pct + pct_tolerance) / 100
leg = StrategyLeg("leg_1", schema, option_type=OptionType.PUT, direction=Direction.SELL)
leg.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
& (schema.strike <= schema.underlying_last * (1 - otm_lo))
& (schema.strike >= schema.underlying_last * (1 - otm_hi))
)
leg.exit_filter = schema.dte <= dte_exit
strat.add_leg(leg)
strat.add_exit_thresholds(exit_thresholds[0], exit_thresholds[1])
return strat
def collar(
schema: "Schema",
underlying: str,
dte_range: tuple[int, int],
dte_exit: int,
call_otm_pct: float = 2.0,
put_otm_pct: float = 2.0,
pct_tolerance: float = 1.0,
exit_thresholds: tuple[float, float] = (float("inf"), float("inf")),
) -> Strategy:
"""Build a collar strategy (long put + short call against stock)."""
strat = Strategy(schema)
call_lo = (call_otm_pct - pct_tolerance) / 100
call_hi = (call_otm_pct + pct_tolerance) / 100
put_lo = (put_otm_pct - pct_tolerance) / 100
put_hi = (put_otm_pct + pct_tolerance) / 100
short_call = StrategyLeg("leg_1", schema, option_type=OptionType.CALL, direction=Direction.SELL)
short_call.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
& (schema.strike >= schema.underlying_last * (1 + call_lo))
& (schema.strike <= schema.underlying_last * (1 + call_hi))
)
short_call.exit_filter = schema.dte <= dte_exit
long_put = StrategyLeg("leg_2", schema, option_type=OptionType.PUT, direction=Direction.BUY)
long_put.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
& (schema.strike <= schema.underlying_last * (1 - put_lo))
& (schema.strike >= schema.underlying_last * (1 - put_hi))
)
long_put.exit_filter = schema.dte <= dte_exit
strat.add_legs([short_call, long_put])
strat.add_exit_thresholds(exit_thresholds[0], exit_thresholds[1])
return strat
def butterfly(
schema: "Schema",
underlying: str,
dte_range: tuple[int, int],
dte_exit: int,
option_type: OptionType = OptionType.CALL,
exit_thresholds: tuple[float, float] = (float("inf"), float("inf")),
) -> Strategy:
"""Build a long butterfly spread (buy 1 lower, sell 2 middle, buy 1 upper).
Uses entry_sort on strike to pick the legs. The middle leg is a SELL
direction with double quantity handled by the sizer.
"""
strat = Strategy(schema)
# Lower wing (buy)
lower = StrategyLeg("leg_1", schema, option_type=option_type, direction=Direction.BUY)
lower.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
)
lower.entry_sort = ("strike", True) # ascending — lowest strike first
lower.exit_filter = schema.dte <= dte_exit
# Middle (sell 2x)
middle = StrategyLeg("leg_2", schema, option_type=option_type, direction=Direction.SELL)
middle.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
)
middle.exit_filter = schema.dte <= dte_exit
# Upper wing (buy)
upper = StrategyLeg("leg_3", schema, option_type=option_type, direction=Direction.BUY)
upper.entry_filter = (
(schema.underlying == underlying)
& (schema.dte >= dte_range[0])
& (schema.dte <= dte_range[1])
)
upper.entry_sort = ("strike", False) # descending — highest strike first
upper.exit_filter = schema.dte <= dte_exit
strat.add_legs([lower, middle, upper])
strat.add_exit_thresholds(exit_thresholds[0], exit_thresholds[1])
return strat
class Strangle(Strategy):
"""Class-based Strangle constructor."""
def __init__(
self,
schema: "Schema",
name: str,
underlying: str,
dte_entry_range: tuple[int, int],
dte_exit: int,
otm_pct: float = 0,
pct_tolerance: float = 1,
exit_thresholds: tuple[float, float] = (float('inf'), float('inf')),
shares_per_contract: int = 100,
) -> None:
assert (name.lower() == 'short' or name.lower() == 'long')
super().__init__(schema)
direction = Direction.SELL if name.lower() == 'short' else Direction.BUY
leg1 = StrategyLeg(
"leg_1",
schema,
option_type=OptionType.CALL,
direction=direction,
)
otm_lower_bound = (otm_pct - pct_tolerance) / 100
otm_upper_bound = (otm_pct + pct_tolerance) / 100
leg1.entry_filter = (schema.underlying == underlying) & (schema.dte >= dte_entry_range[0]) & (
schema.dte <= dte_entry_range[1]) & (schema.strike >= schema.underlying_last *
(1 + otm_lower_bound)) & (schema.strike <= schema.underlying_last *
(1 + otm_upper_bound))
leg1.exit_filter = (schema.dte <= dte_exit)
leg2 = StrategyLeg("leg_2", schema, option_type=OptionType.PUT, direction=direction)
leg2.entry_filter = (schema.underlying == underlying) & (schema.dte >= dte_entry_range[0]) & (
schema.dte <= dte_entry_range[1]) & (schema.strike <= schema.underlying_last *
(1 - otm_lower_bound)) & (schema.strike >= schema.underlying_last *
(1 - otm_upper_bound))
leg2.exit_filter = (schema.dte <= dte_exit)
self.add_legs([leg1, leg2])
self.add_exit_thresholds(exit_thresholds[0], exit_thresholds[1])
================================================
FILE: options_portfolio_backtester/strategy/strategy.py
================================================
"""Strategy container — preserved interface with richer execution support."""
from __future__ import annotations
import math
from typing import TYPE_CHECKING
import numpy as np
import pandas as pd
from options_portfolio_backtester.execution.cost_model import TransactionCostModel, NoCosts
from options_portfolio_backtester.execution.sizer import PositionSizer, CapitalBased
from options_portfolio_backtester.execution.signal_selector import SignalSelector, FirstMatch
if TYPE_CHECKING:
from options_portfolio_backtester.data.schema import Schema
from .strategy_leg import StrategyLeg
class Strategy:
"""Options strategy — collection of legs with exit thresholds.
API-compatible with backtester.strategy.strategy.Strategy, adding optional
cost_model, sizer, and signal_selector at the strategy level.
"""
def __init__(
self,
schema: "Schema",
cost_model: TransactionCostModel | None = None,
sizer: PositionSizer | None = None,
signal_selector: SignalSelector | None = None,
) -> None:
self.schema = schema
self.legs: list[StrategyLeg] = []
self.conditions: list = []
self.exit_thresholds: tuple[float, float] = (math.inf, math.inf)
self.cost_model = cost_model or NoCosts()
self.sizer = sizer or CapitalBased()
self.signal_selector = signal_selector or FirstMatch()
def add_leg(self, leg: "StrategyLeg") -> "Strategy":
assert self.schema == leg.schema
leg.name = f"leg_{len(self.legs) + 1}"
self.legs.append(leg)
return self
def add_legs(self, legs: list["StrategyLeg"]) -> "Strategy":
for leg in legs:
self.add_leg(leg)
return self
def remove_leg(self, leg_number: int) -> "Strategy":
self.legs.pop(leg_number)
return self
def clear_legs(self) -> "Strategy":
self.legs = []
return self
def add_exit_thresholds(self, profit_pct: float = math.inf,
loss_pct: float = math.inf) -> None:
assert profit_pct >= 0
assert loss_pct >= 0
self.exit_thresholds = (profit_pct, loss_pct)
def filter_thresholds(self, entry_cost: pd.Series,
current_cost: pd.Series) -> pd.Series:
profit_pct, loss_pct = self.exit_thresholds
excess_return = (current_cost / entry_cost + 1) * -np.sign(entry_cost)
return (excess_return >= profit_pct) | (excess_return <= -loss_pct)
def __repr__(self) -> str:
return f"Strategy(legs={self.legs}, exit_thresholds={self.exit_thresholds})"
================================================
FILE: options_portfolio_backtester/strategy/strategy_leg.py
================================================
"""Strategy leg — re-exports the original StrategyLeg for now.
The new StrategyLeg is API-compatible with the original and adds support
for the new execution components (signal_selector, fill_model).
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from options_portfolio_backtester.core.types import Direction, OptionType
from options_portfolio_backtester.execution.signal_selector import SignalSelector, FirstMatch
from options_portfolio_backtester.execution.fill_model import FillModel, MarketAtBidAsk
if TYPE_CHECKING:
from options_portfolio_backtester.data.schema import Filter, Schema
class StrategyLeg:
"""A single option leg in a strategy.
API-compatible with backtester.strategy.strategy_leg.StrategyLeg, adding
optional signal_selector and fill_model.
"""
def __init__(
self,
name: str,
schema: "Schema",
option_type: OptionType = OptionType.CALL,
direction: Direction = Direction.BUY,
signal_selector: SignalSelector | None = None,
fill_model: FillModel | None = None,
) -> None:
self.name = name
self.schema = schema
self.type = option_type
self.direction = direction
self.signal_selector = signal_selector # None = use engine-level default
self.fill_model = fill_model # None = use engine-level default
self.entry_sort: tuple[str, bool] | None = None
self._entry_filter: "Filter" = self._base_entry_filter()
self._exit_filter: "Filter" = self._base_exit_filter()
@property
def entry_filter(self) -> "Filter":
return self._entry_filter
@entry_filter.setter
def entry_filter(self, flt: "Filter") -> None:
self._entry_filter = self._base_entry_filter() & flt
@property
def exit_filter(self) -> "Filter":
return self._exit_filter
@exit_filter.setter
def exit_filter(self, flt: "Filter") -> None:
self._exit_filter = self._base_exit_filter() & flt
def _base_entry_filter(self) -> "Filter":
if self.direction == Direction.BUY:
return (self.schema.type == self.type.value) & (self.schema.ask > 0)
return (self.schema.type == self.type.value) & (self.schema.bid > 0)
def _base_exit_filter(self) -> "Filter":
return self.schema.type == self.type.value
def __repr__(self) -> str:
return (
f"StrategyLeg(name={self.name}, type={self.type}, "
f"direction={self.direction}, entry_filter={self._entry_filter}, "
f"exit_filter={self._exit_filter})"
)
================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["maturin>=1.7,<2.0"]
build-backend = "maturin"
[project]
name = "options_portfolio_backtester"
version = "0.3.0"
description = "The open-source options backtesting framework"
readme = "README.md"
license = {text = "MIT"}
requires-python = ">=3.11"
dependencies = [
"pandas>=2.1",
"numpy>=1.26",
"altair>=5.0",
"pyprind>=2.11",
"pyarrow>=14.0",
]
[project.optional-dependencies]
rust = ["polars>=1.0,<1.6"]
charts = ["seaborn>=0.13", "matplotlib>=3.8"]
dev = [
"pytest>=8.0",
"hypothesis>=6.0",
"pytest-benchmark",
"mypy>=1.8",
"ruff>=0.3",
"pandas-stubs",
"maturin>=1.7",
]
notebooks = ["jupyter", "nbconvert"]
[tool.maturin]
manifest-path = "rust/ob_python/Cargo.toml"
module-name = "options_portfolio_backtester._ob_rust"
python-source = "."
features = ["pyo3/extension-module"]
[tool.setuptools.packages.find]
include = ["options_portfolio_backtester*"]
[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = "-m \"not bench\" --ignore=tests/bench --ignore=tests/convexity --ignore=tests/compat --ignore=tests/test_deep_analytics_convexity.py"
markers = [
"bench: benchmark/property tests requiring explicit opt-in",
"slow: full-range stress tests (17-year SPY, minutes to run)",
"chaos: chaos / fault-injection tests",
]
filterwarnings = ["ignore::DeprecationWarning"]
[tool.mypy]
python_version = "3.12"
warn_unused_configs = true
disallow_untyped_defs = false
ignore_missing_imports = true
check_untyped_defs = false
[tool.ruff]
line-length = 119
target-version = "py312"
[tool.ruff.lint]
select = ["E", "F", "W", "I"]
ignore = ["E126", "F403", "F405", "W504"]
================================================
FILE: rust/.cargo/config.toml
================================================
[env]
PYO3_USE_ABI3_FORWARD_COMPATIBILITY = "1"
================================================
FILE: rust/Cargo.toml
================================================
[workspace]
members = ["ob_core", "ob_python"]
resolver = "2"
================================================
FILE: rust/ob_core/Cargo.toml
================================================
[package]
name = "ob_core"
version = "0.1.0"
edition = "2021"
[dependencies]
polars = { version = "0.48", features = ["lazy", "parquet", "dtype-struct", "semi_anti_join"] }
arrow = { version = "55", features = ["ffi"] }
chrono = "0.4"
rayon = "1.10"
thiserror = "2"
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
[[bench]]
name = "hot_paths"
harness = false
================================================
FILE: rust/ob_core/benches/hot_paths.rs
================================================
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use polars::prelude::*;
use ob_core::entries::{compute_entry_qty, compute_leg_entries};
use ob_core::exits::threshold_exit_mask;
use ob_core::filter::CompiledFilter;
use ob_core::inventory::join_inventory_to_market;
use ob_core::stats::compute_stats;
use ob_core::types::Direction;
fn make_options_df(n: usize) -> DataFrame {
let contracts: Vec = (0..n).map(|i| format!("SPX_{i}")).collect();
let underlyings: Vec<&str> = vec!["SPX"; n];
let types: Vec<&str> = (0..n)
.map(|i| if i % 2 == 0 { "put" } else { "call" })
.collect();
let expirations: Vec<&str> = vec!["2024-06-01"; n];
let strikes: Vec = (0..n).map(|i| 3800.0 + i as f64 * 5.0).collect();
let asks: Vec = (0..n).map(|i| 1.0 + (i % 50) as f64 * 0.5).collect();
let bids: Vec = asks.iter().map(|a| a * 0.95).collect();
let dtes: Vec = (0..n).map(|i| 30 + (i % 180) as i32).collect();
DataFrame::new(vec![
Column::new("optionroot".into(), contracts),
Column::new("underlying".into(), underlyings),
Column::new("type".into(), types),
Column::new("expiration".into(), expirations),
Column::new("strike".into(), strikes),
Column::new("ask".into(), asks),
Column::new("bid".into(), bids),
Column::new("dte".into(), dtes),
])
.unwrap()
}
fn bench_inventory_join(c: &mut Criterion) {
let opts = make_options_df(10_000);
let n_inv = 50;
let contracts: Vec = (0..n_inv).map(|i| format!("SPX_{i}")).collect();
let qtys: Vec = vec![10.0; n_inv];
let types: Vec = (0..n_inv)
.map(|i| {
if i % 2 == 0 {
"put".into()
} else {
"call".into()
}
})
.collect();
let underlyings: Vec = vec!["SPX".into(); n_inv];
let strikes: Vec = (0..n_inv).map(|i| 3800.0 + i as f64 * 5.0).collect();
c.bench_function("inventory_join_50x10k", |b| {
b.iter(|| {
let result = join_inventory_to_market(
black_box(&contracts),
black_box(&qtys),
black_box(&types),
black_box(&underlyings),
black_box(&strikes),
black_box(&opts),
None,
"optionroot",
"quotedate",
"bid",
None,
None,
Direction::Buy,
100,
)
.unwrap();
black_box(result.height());
});
});
}
fn bench_filter_compile_and_apply(c: &mut Criterion) {
let df = make_options_df(10_000);
let filter = CompiledFilter::new(
"(type == 'put') & (ask > 0) & (underlying == 'SPX') & (dte >= 60) & (dte <= 120)",
)
.unwrap();
c.bench_function("filter_apply_10k", |b| {
b.iter(|| {
let result = filter.apply(black_box(&df)).unwrap();
black_box(result.height());
});
});
}
fn bench_filter_compile(c: &mut Criterion) {
c.bench_function("filter_compile", |b| {
b.iter(|| {
let f = CompiledFilter::new(black_box(
"(type == 'put') & (ask > 0) & (underlying == 'SPX') & (dte >= 60) & (dte <= 120)",
))
.unwrap();
black_box(&f);
});
});
}
fn bench_entry_computation(c: &mut Criterion) {
let opts = make_options_df(10_000);
let held: Vec = (0..10).map(|i| format!("SPX_{i}")).collect();
let filter = CompiledFilter::new("(type == 'put') & (ask > 0) & (dte >= 60)").unwrap();
c.bench_function("entry_compute_10k", |b| {
b.iter(|| {
let result = compute_leg_entries(
black_box(&opts),
black_box(&held),
black_box(&filter),
"optionroot",
"ask",
Some("strike"),
true,
100,
false,
)
.unwrap();
black_box(result.height());
});
});
}
fn bench_exit_mask(c: &mut Criterion) {
let n = 1000;
let entries: Vec = (0..n).map(|i| 100.0 + (i % 50) as f64).collect();
let currents: Vec = (0..n).map(|i| 80.0 + (i % 80) as f64).collect();
let entry_s = Series::new("entry".into(), &entries);
let current_s = Series::new("current".into(), ¤ts);
c.bench_function("exit_mask_1k", |b| {
b.iter(|| {
let mask = threshold_exit_mask(
black_box(&entry_s),
black_box(¤t_s),
Some(0.5),
Some(0.2),
)
.unwrap();
black_box(mask.len());
});
});
}
fn bench_stats_computation(c: &mut Criterion) {
let n = 2520; // ~10 years of trading days
let returns: Vec = (0..n).map(|i| ((i as f64 * 0.1).sin()) * 0.02).collect();
let pnls: Vec = (0..100)
.map(|i| if i % 3 == 0 { -50.0 } else { 100.0 })
.collect();
c.bench_function("stats_10yr", |b| {
b.iter(|| {
let s = compute_stats(black_box(&returns), black_box(&pnls), 0.02);
black_box(s);
});
});
}
fn bench_entry_qty(c: &mut Criterion) {
let n = 5000;
let costs: Vec = (0..n).map(|i| 50.0 + (i % 200) as f64).collect();
let series = Series::new("cost".into(), &costs);
c.bench_function("entry_qty_5k", |b| {
b.iter(|| {
let qty = compute_entry_qty(black_box(&series), 1_000_000.0).unwrap();
black_box(qty.len());
});
});
}
criterion_group!(
benches,
bench_inventory_join,
bench_filter_compile,
bench_filter_compile_and_apply,
bench_entry_computation,
bench_exit_mask,
bench_stats_computation,
bench_entry_qty,
);
criterion_main!(benches);
================================================
FILE: rust/ob_core/src/backtest.rs
================================================
//! Full backtest loop — mirrors BacktestEngine.run() for parity.
//!
//! Pre-partitions all data by date at startup for O(1) lookups instead of
//! O(n) DataFrame scans on each access. Uses i64 nanosecond timestamps as
//! HashMap keys to avoid string conversion overhead entirely.
//!
//! Key optimizations:
//! - filter_by_date() → HashMap::get() O(n) → O(1)
//! - get_contract_field_f64() → DayOptions::get_f64() O(n) → O(1)
//! - get_contract_field_str() → DayOptions::get_str() O(n) → O(1)
//! - get_symbol_price() → DayStocks::get_price() O(n) → O(1)
//! - Date keys are i64 (nanoseconds) — no string allocation or comparison.
use std::collections::HashMap;
use chrono::DateTime;
use polars::prelude::*;
use crate::cost_model::CostModel;
use crate::entries::compute_leg_entries;
use crate::fill_model::FillModel;
use crate::filter::CompiledFilter;
use crate::risk::{self, RiskConstraint};
use crate::signal_selector::SignalSelector;
use crate::stats;
use crate::stats::Stats;
use crate::types::{Direction, Greeks, LegConfig};
#[derive(Clone)]
pub struct BacktestConfig {
pub allocation_stocks: f64,
pub allocation_options: f64,
pub allocation_cash: f64,
pub initial_capital: f64,
pub shares_per_contract: i64,
pub legs: Vec,
pub profit_pct: Option,
pub loss_pct: Option,
pub stock_symbols: Vec,
pub stock_percentages: Vec,
/// Pre-computed rebalance dates as nanoseconds since epoch.
pub rebalance_dates: Vec,
/// Transaction cost model.
pub cost_model: CostModel,
/// Fill model for execution pricing.
pub fill_model: FillModel,
/// Engine-level signal selector.
pub signal_selector: SignalSelector,
/// Risk constraints checked before entries.
pub risk_constraints: Vec,
/// SMA days for stock gating (None = no SMA gate).
pub sma_days: Option,
/// Options budget as a percentage of total capital per rebalance (overrides allocation_options).
pub options_budget_pct: Option,
/// Annual options budget as a percentage of total capital, auto-divided by rebalances/year.
pub options_budget_annual_pct: Option,
/// Stop the backtest if cash goes negative (mirrors Python's stop_if_broke).
pub stop_if_broke: bool,
/// Maximum short notional as fraction of total capital (None = no limit).
pub max_notional_pct: Option,
/// Check exits on every trading day, not just rebalance dates.
pub check_exits_daily: bool,
/// When true, spend the full budget each rebalance ignoring existing position value.
/// Default (false) uses target model: spend = budget - existing_options_value.
pub options_budget_fresh_spend: bool,
/// When true, rebalance stocks immediately after daily option exits.
/// Allows reinvesting put profits into stocks without waiting for the next rebalance date.
pub rebalance_stocks_on_exit: bool,
}
pub struct BacktestResult {
pub balance: DataFrame,
pub trade_log: DataFrame,
pub final_cash: f64,
pub stats: Stats,
}
/// Configuration for one strategy slot in a multi-strategy backtest.
#[derive(Clone)]
pub struct StrategySlotConfig {
pub name: String,
pub legs: Vec,
pub weight: f64,
pub rebalance_dates: Vec,
pub profit_pct: Option,
pub loss_pct: Option,
pub check_exits_daily: bool,
}
struct Position {
leg_contracts: Vec,
leg_types: Vec,
leg_directions: Vec,
quantity: f64,
entry_cost: f64,
greeks: Greeks,
/// Entry-time metadata per leg, used as fallback when contract is missing from today's data.
leg_underlyings: Vec,
leg_expirations: Vec,
leg_strikes: Vec,
}
struct StockHolding {
symbol: String,
qty: f64,
price: f64,
}
/// Per-leg per-position entry in trade log (flat, converted to MultiIndex in Python).
struct TradeRow {
date: i64,
leg_data: Vec,
total_cost: f64,
qty: f64,
}
struct LegTradeData {
contract: String,
underlying: String,
expiration: String,
opt_type: String,
strike: f64,
cost: f64,
order: String,
}
/// Balance row for a single date range day.
struct BalanceDay {
date: i64,
cash: f64,
calls_capital: f64,
puts_capital: f64,
options_qty: f64,
stocks_qty: f64,
stock_values: Vec<(String, f64)>,
stock_qtys: Vec<(String, f64)>,
}
// ---------------------------------------------------------------------------
// Date conversion helpers.
// ---------------------------------------------------------------------------
/// Convert nanoseconds since epoch to "YYYY-MM-DD HH:MM:SS" string.
fn ns_to_datestring(ns: i64) -> String {
let secs = ns.div_euclid(1_000_000_000);
let nsec = ns.rem_euclid(1_000_000_000) as u32;
DateTime::from_timestamp(secs, nsec)
.map(|dt| dt.format("%Y-%m-%d %H:%M:%S").to_string())
.unwrap_or_default()
}
/// Parse "YYYY-MM-DD HH:MM:SS" to nanoseconds since epoch.
fn parse_datestring_to_ns(s: &str) -> Option {
chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S")
.ok()
.map(|dt| {
let ts = dt.and_utc().timestamp();
ts * 1_000_000_000
})
}
/// Extract an i64 date key (nanoseconds) from a column value at index.
/// Handles Datetime (any time unit), Date, and String columns.
fn extract_date_ns(col: &Column, idx: usize) -> i64 {
match col.dtype() {
DataType::Datetime(tu, _) => {
let val = col.datetime().unwrap().get(idx).unwrap_or(0);
match tu {
TimeUnit::Nanoseconds => val,
TimeUnit::Microseconds => val * 1_000,
TimeUnit::Milliseconds => val * 1_000_000,
}
}
DataType::Date => {
let days = col.date().unwrap().get(idx).unwrap_or(0);
days as i64 * 86_400_000_000_000i64
}
_ => {
col.str().ok()
.and_then(|ca| ca.get(idx))
.and_then(parse_datestring_to_ns)
.unwrap_or(0)
}
}
}
/// Read a column value as a String, handling both String and Datetime columns.
fn column_value_to_string(col: &Column, idx: usize) -> String {
if let Ok(ca) = col.str() {
return ca.get(idx).unwrap_or("").to_string();
}
match col.dtype() {
DataType::Datetime(tu, _) => {
let val = col.datetime().unwrap().get(idx).unwrap_or(0);
let ns = match tu {
TimeUnit::Nanoseconds => val,
TimeUnit::Microseconds => val * 1_000,
TimeUnit::Milliseconds => val * 1_000_000,
};
ns_to_datestring(ns)
}
DataType::Date => {
let days = col.date().unwrap().get(idx).unwrap_or(0);
ns_to_datestring(days as i64 * 86_400_000_000_000i64)
}
_ => String::new(),
}
}
// ---------------------------------------------------------------------------
// Pre-partitioned data structures — O(1) date and contract lookups.
// ---------------------------------------------------------------------------
/// Options data for a single date with O(1) contract lookups.
struct DayOptions {
df: DataFrame,
/// contract_string → row index within `df`.
contract_idx: HashMap,
}
impl DayOptions {
fn new(df: DataFrame, contract_col: &str) -> Self {
let mut contract_idx = HashMap::with_capacity(df.height());
if let Ok(col) = df.column(contract_col) {
if let Ok(ca) = col.str() {
for (i, val) in ca.into_iter().enumerate() {
if let Some(v) = val {
// Keep first occurrence (matches original filter + iloc[0]).
contract_idx.entry(v.to_string()).or_insert(i);
}
}
}
}
DayOptions { df, contract_idx }
}
/// Get a float64 field for a contract — O(1).
fn get_f64(&self, contract: &str, field: &str) -> Option {
let &row_idx = self.contract_idx.get(contract)?;
let col = self.df.column(field).ok()?;
// Fast path: column is already f64.
if let Ok(ca) = col.f64() {
return ca.get(row_idx);
}
// Slow path: cast to f64 (e.g. Int64 strike column).
let casted = col.cast(&DataType::Float64).ok()?;
casted.f64().ok()?.get(row_idx)
}
/// Get a string field for a contract — O(1).
/// Handles both String and Datetime columns (for expiration).
fn get_str(&self, contract: &str, field: &str) -> Option {
let &row_idx = self.contract_idx.get(contract)?;
let col = self.df.column(field).ok()?;
let s = column_value_to_string(col, row_idx);
if s.is_empty() { None } else { Some(s) }
}
fn height(&self) -> usize {
self.df.height()
}
}
/// Stocks data for a single date — O(1) price lookups.
struct DayStocks {
prices: HashMap,
}
impl DayStocks {
fn get_price(&self, symbol: &str) -> Option {
self.prices.get(symbol).copied()
}
}
/// All data pre-partitioned by date.
pub struct PartitionedData {
options: HashMap,
stocks: HashMap,
/// All option dates as nanoseconds, sorted ascending.
all_dates_sorted: Vec,
}
/// Schema column name mappings passed from Python.
#[derive(Clone)]
pub struct SchemaMapping {
pub contract: String,
pub date: String,
pub stocks_date: String,
pub stocks_sym: String,
pub stocks_price: String,
pub underlying: String,
pub expiration: String,
pub option_type: String,
pub strike: String,
}
// ---------------------------------------------------------------------------
// Main entry point.
// ---------------------------------------------------------------------------
pub fn run_backtest(
config: &BacktestConfig,
options_data: &DataFrame,
stocks_data: &DataFrame,
schema: &SchemaMapping,
) -> PolarsResult {
let partitioned = prepartition_data(options_data, stocks_data, schema)?;
run_backtest_prepartitioned(config, &partitioned, schema)
}
/// Pre-compiled entry and exit filters for a backtest config.
/// Avoids redundant filter parsing when running multiple configs in a sweep.
pub struct PrecompiledFilters {
pub entry: Vec