Repository: pcctradinginc-alt/Smart-Money-To-Option-Scanner
Branch: main
Commit: b32074aaf2da
Files: 53
Total size: 209.8 KB
Directory structure:
gitextract_9gdq6gkd/
├── .github/
│ └── workflows/
│ ├── backtest.yml
│ ├── daily_light.yml
│ ├── keepalive.yml
│ ├── main.yml
│ ├── thirteenf_dedicated.yml
│ ├── weekly_full.yml
│ └── weekly_review.yml
├── LICENSE
├── README.md
├── config/
│ ├── fund_weights.yaml
│ ├── funds_to_track.yaml
│ └── thresholds.yaml
├── docs/
│ ├── ARCHITECTURE.md
│ ├── BACKTESTING.md
│ └── DEPLOYMENT.md
├── requirements.txt
├── scripts/
│ ├── backtest.py
│ └── daily_scan.py
├── src/
│ ├── __init__.py
│ ├── ai/
│ │ ├── __init__.py
│ │ ├── outcome_tracker.py
│ │ └── single_analyzer.py
│ ├── alerts/
│ │ ├── __init__.py
│ │ └── email_sender.py
│ ├── enrich/
│ │ ├── __init__.py
│ │ ├── catalyst_finder.py
│ │ ├── macro_context.py
│ │ ├── options_prefilter.py
│ │ ├── price_context.py
│ │ └── sentiment.py
│ ├── execution/
│ │ ├── __init__.py
│ │ ├── exit_manager.py
│ │ └── tradier_client.py
│ ├── ingest/
│ │ ├── __init__.py
│ │ ├── eight_k_fetcher.py
│ │ ├── form4_fetcher.py
│ │ ├── gov_trades_fetcher.py
│ │ ├── news_fetcher.py
│ │ └── thirteenf_fetcher.py
│ ├── score/
│ │ ├── __init__.py
│ │ ├── fund_scorer.py
│ │ ├── signal_builder.py
│ │ └── signal_filter.py
│ └── utils/
│ ├── __init__.py
│ ├── config.py
│ ├── logger.py
│ ├── retry.py
│ ├── storage.py
│ └── ticker_resolver.py
└── tests/
├── __init__.py
├── test_ai.py
├── test_ingest.py
└── test_scoring.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/workflows/backtest.yml
================================================
name: Backtest
on:
schedule:
- cron: '0 2 * * 0'
workflow_dispatch:
jobs:
backtest:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install deps
run: pip install -r requirements.txt
- name: Run Backtest
env:
GMAIL_USER: ${{ secrets.GMAIL_USER }}
GMAIL_PASSWORD: ${{ secrets.GMAIL_PASSWORD }}
RECIPIENT_EMAIL: ${{ secrets.RECIPIENT_EMAIL }}
run: python scripts/backtest.py
================================================
FILE: .github/workflows/daily_light.yml
================================================
name: Daily Light Scan
on:
schedule:
- cron: '30 14 * * 1-5'
- cron: '30 21 * * 1-5'
workflow_dispatch:
jobs:
scan:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install deps
run: pip install -r requirements.txt
- name: Restore DB cache
uses: actions/cache@v4
with:
path: data/scanner.db
# [LÖSUNG 1] Key auf v2 geändert, um die alte inkompatible DB zu verwerfen
key: scanner-db-v2-${{ github.run_id }}
restore-keys: |
scanner-db-v2-
- name: Run Daily Light
env:
GMAIL_USER: ${{ secrets.GMAIL_USER }}
GMAIL_PASSWORD: ${{ secrets.GMAIL_PASSWORD }}
RECIPIENT_EMAIL: ${{ secrets.RECIPIENT_EMAIL }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
TRADIER_API_KEY: ${{ secrets.TRADIER_API_KEY }}
TRADIER_ACCOUNT_ID: ${{ secrets.TRADIER_ACCOUNT_ID }}
# SEC Compliance
EDGAR_USER_AGENT: ${{ secrets.EDGAR_USER_AGENT }}
# Ticker-Cache Pfad
TICKER_DB_PATH: "data/scanner.db"
run: |
mkdir -p data
python scripts/daily_scan.py --run-mode daily_light
- name: Save DB cache
if: always()
uses: actions/cache/save@v4
with:
path: data/scanner.db
key: scanner-db-v2-${{ github.run_id }}
================================================
FILE: .github/workflows/keepalive.yml
================================================
name: Keepalive
on:
schedule:
- cron: '0 0 1 */2 *'
workflow_dispatch:
jobs:
keepalive:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: echo "Repo aktiv. Datum $(date)"
================================================
FILE: .github/workflows/main.yml
================================================
name: Smart Money Scanner (Manual Run)
on:
workflow_dispatch:
inputs:
run_mode:
description: 'Welcher Modus soll laufen?'
required: true
default: 'daily_light'
type: choice
options:
- daily_light
- weekly_full
- thirteenf
- weekly_review
jobs:
scan:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install deps
run: pip install -r requirements.txt
- name: Restore DB cache
uses: actions/cache@v4
with:
path: data/scanner.db
key: scanner-db-${{ github.run_number }}
restore-keys: |
scanner-db-
- name: Run Scanner (${{ github.event.inputs.run_mode }})
env:
GMAIL_USER: ${{ secrets.GMAIL_USER }}
GMAIL_PASSWORD: ${{ secrets.GMAIL_PASSWORD }}
RECIPIENT_EMAIL: ${{ secrets.RECIPIENT_EMAIL }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
TRADIER_API_KEY: ${{ secrets.TRADIER_API_KEY }}
TRADIER_ACCOUNT_ID: ${{ secrets.TRADIER_ACCOUNT_ID }}
run: python scripts/daily_scan.py --run-mode ${{ github.event.inputs.run_mode }}
- name: Save DB cache
if: always()
uses: actions/cache/save@v4
with:
path: data/scanner.db
key: scanner-db-${{ github.run_number }}
================================================
FILE: .github/workflows/thirteenf_dedicated.yml
================================================
name: 13F Dedicated Scan
on:
schedule:
- cron: '0 6 15 2,5,8,11 *'
workflow_dispatch:
jobs:
scan:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install deps
run: pip install -r requirements.txt
- name: Restore DB cache
uses: actions/cache@v4
with:
path: data/scanner.db
key: scanner-db-${{ github.run_number }}
restore-keys: |
scanner-db-
- name: Run 13F Dedicated
env:
GMAIL_USER: ${{ secrets.GMAIL_USER }}
GMAIL_PASSWORD: ${{ secrets.GMAIL_PASSWORD }}
RECIPIENT_EMAIL: ${{ secrets.RECIPIENT_EMAIL }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
TRADIER_API_KEY: ${{ secrets.TRADIER_API_KEY }}
TRADIER_ACCOUNT_ID: ${{ secrets.TRADIER_ACCOUNT_ID }}
# Diese Zeile wurde hinzugefügt:
EDGAR_USER_AGENT: ${{ secrets.EDGAR_USER_AGENT }}
run: python scripts/daily_scan.py --run-mode thirteenf
- name: Save DB cache
if: always()
uses: actions/cache/save@v4
with:
path: data/scanner.db
key: scanner-db-${{ github.run_number }}
================================================
FILE: .github/workflows/weekly_full.yml
================================================
name: Weekly Full Scan
on:
schedule:
- cron: '0 5 * * 1'
workflow_dispatch:
jobs:
scan:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install deps
run: pip install -r requirements.txt
- name: Restore DB cache
uses: actions/cache@v4
with:
path: data/scanner.db
key: scanner-db-${{ github.run_number }}
restore-keys: |
scanner-db-
- name: Run Weekly Full
env:
GMAIL_USER: ${{ secrets.GMAIL_USER }}
GMAIL_PASSWORD: ${{ secrets.GMAIL_PASSWORD }}
RECIPIENT_EMAIL: ${{ secrets.RECIPIENT_EMAIL }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
TRADIER_API_KEY: ${{ secrets.TRADIER_API_KEY }}
TRADIER_ACCOUNT_ID: ${{ secrets.TRADIER_ACCOUNT_ID }}
run: python scripts/daily_scan.py --run-mode weekly_full
- name: Save DB cache
if: always()
uses: actions/cache/save@v4
with:
path: data/scanner.db
key: scanner-db-${{ github.run_number }}
================================================
FILE: .github/workflows/weekly_review.yml
================================================
name: Weekly Review
on:
schedule:
- cron: '0 17 * * 0'
workflow_dispatch:
jobs:
review:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install deps
run: pip install -r requirements.txt
- name: Restore DB cache
uses: actions/cache@v4
with:
path: data/scanner.db
key: scanner-db-${{ github.run_number }}
restore-keys: |
scanner-db-
- name: Run Weekly Review
env:
GMAIL_USER: ${{ secrets.GMAIL_USER }}
GMAIL_PASSWORD: ${{ secrets.GMAIL_PASSWORD }}
RECIPIENT_EMAIL: ${{ secrets.RECIPIENT_EMAIL }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: python scripts/daily_scan.py --run-mode weekly_review
- name: Save DB cache
if: always()
uses: actions/cache/save@v4
with:
path: data/scanner.db
key: scanner-db-${{ github.run_number }}
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2026 Smart Money Scanner v2
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
Smart Money Scanner v2
**SEC Insider + Institutional + Politiker-Trade Scanner mit KI-Analyse**
Optimiert für 90–180 Tage Call-Optionen basierend auf institutioneller Conviction.
Vollautomatisch via GitHub Actions. Tradier Pro für Real-Time Options-Daten.
---
## 🎯 Kern-These
Große institutionelle Käufe (Hedgefonds, Insider, Politiker) die sich **über mehrere Wochen und Quartale wiederholen** signalisieren echte mittelfristige Überzeugung. Diese lässt sich mit 90–180 Tage Calls mit gutem Risk/Reward abbilden — wenn:
1. **IV-Rank ≤ 50** (billige Optionen, dynamischer Threshold)
2. **Datierter Katalysator in Laufzeit** (z.B. Earnings in 60 Tagen)
3. **Systematisches Muster, nicht Rebalancing** (3 Quartale in Folge)
4. **Strikte Exit-Regeln** (≤21d raus, +80% TP, −45% SL)
## 🏗️ Architektur
**Modularer Aufbau:** Jedes Modul ist eigenständig und ersetzbar.
```
src/
├── ingest/ # Datenquellen (Form4, 13F, 8K, Gov, News)
├── enrich/ # Anreicherung (Preis, Catalyst, Options-PreFilter)
├── score/ # Scoring (Signal-Builder, Merger, Filter, Trends)
├── ai/ # Claude-Analyse + Outcome-Tracking
├── execution/ # Tradier API + Exit-Manager
├── alerts/ # Email-Versand
└── utils/ # Logger, Storage, Retry
```
## 📅 Cron Schedule
| Workflow | Zeit (UTC) | Was passiert |
|----------|-----------|--------------|
| Daily Light | Mo-Fr 14:30, 21:30 | Form4 Update + Exit-Check |
| Weekly Full | Mo 05:00 | Volle Pipeline + Claude |
| 13F Dedicated | 15. Feb/Mai/Aug/Nov | Multi-Quartals-Trend Analyse |
| Weekly Review | So 17:00 | Claude Meta-Analyse |
| Backtest | So 02:00 | Performance-Validierung |
## ⚠️ Disclaimer
Kein Finanzrat. Eigene Due Diligence erforderlich. Code ist Open Source und ohne Gewährleistung.
## 📝 Lizenz
MIT License — see LICENSE file
================================================
FILE: config/fund_weights.yaml
================================================
# config/fund_weights.yaml
# Score 0-50. Wird auto-kalibriert basierend auf Outcomes.
# Funds mit Score < 15 werden komplett ignoriert.
funds:
Berkshire Hathaway:
score: 48
category: value_conviction
comment: Stärkster langfristiger Track Record
Pershing Square:
score: 42
category: activist_conviction
comment: Hohe Conviction + klare These
Elliott Management:
score: 40
category: activist
comment: Sehr stark bei 13D/G
Starboard Value:
score: 38
category: activist
Icahn Enterprises:
score: 36
category: activist
Coatue Management:
score: 32
category: growth_tech
Situational Awareness LP:
score: 35
category: ai_infrastructure
comment: "Leo Aschenbrenner - AI-fokussiert"
D1 Capital Partners:
score: 30
category: growth
Tiger Global:
score: 25
category: growth
note: Nur bei sehr großen neuen Positionen (>4% Portfolio)
ARK Invest:
score: 18
category: momentum
note: Nur bei extremen Käufen
Two Sigma:
score: 14
category: quant
note: Oft schon eingepreist
ignored_funds:
- Vanguard
- BlackRock
- Fidelity
- State Street
- Invesco
- iShares
================================================
FILE: config/funds_to_track.yaml
================================================
# config/funds_to_track.yaml
# CIK-Nummern für SEC EDGAR 13F-Abfragen
funds:
- name: Berkshire Hathaway
cik: "0001067983"
- name: Pershing Square
cik: "0001336528"
- name: Elliott Management
cik: "0001791786"
- name: Starboard Value
cik: "0001517137"
- name: Icahn Capital LP
cik: "0001412093"
- name: Coatue Management
cik: "0001135730"
- name: Situational Awareness LP
cik: "0002045724"
- name: D1 Capital Partners
cik: "0001747057"
- name: Tiger Global
cik: "0001167483"
- name: ARK Invest
cik: "0001697748"
- name: Two Sigma
cik: "0001179392"
================================================
FILE: config/thresholds.yaml
================================================
# config/thresholds.yaml
# ZENTRALE STELLE für alle Magic Numbers
# Änderungen hier propagieren durch das ganze System
# ── SIGNAL-FILTER (Hard Gates) ───────────────────────────────────────
signal_filter:
min_fund_score: 15 # Fund muss bekannt + relevant sein
min_sources: 2 # Multi-Signal-Gate
min_conviction: 0.38 # Mindest-Überzeugungsgrad
max_neg_news: -0.70 # Sehr negative News blocken (lockerer als v1)
# ── OPTIONS-PARAMETER (für 2-6 Monats-Calls) ─────────────────────────
options:
min_days_to_exp: 90 # Unter 90d: Theta frisst zu viel
max_days_to_exp: 180 # Über 180d: Liquidität sinkt
target_otm_min_pct: 0.05 # 5% OTM minimum
target_otm_max_pct: 0.15 # 15% OTM maximum
# IV-Rank: dynamischer statt hart
iv_rank_ideal: 35 # Bonus wenn drunter
iv_rank_acceptable: 50 # Conviction -0.10
iv_rank_risky: 70 # Conviction -0.20
iv_rank_kill: 70 # Hard-Kill drüber
min_open_interest: 500
max_spread_pct: 4.0 # Bid/Ask-Spread
ideal_delta_min: 0.35
ideal_delta_max: 0.45
# ── EXIT-REGELN (NICHT VERHANDELBAR) ─────────────────────────────────
exit_rules:
take_profit_pct: 80 # +80% TP
stop_loss_pct: -45 # -45% SL
partial_take_pct: 50 # 50% Position bei +50%
min_days_remaining: 21 # Hard Time-Exit
exit_if_fund_sells: true
exit_if_insider_sells: true
vix_emergency_exit: 35 # VIX > 35 für 2 Tage = alle Exits
# ── POSITION SIZING (Kelly) ──────────────────────────────────────────
position_sizing:
use_kelly: true
kelly_fraction: 0.25 # Quarter-Kelly (sicherer)
max_position_pct: 0.05 # Max 5% pro Trade
min_position_pct: 0.005 # Min 0.5%
max_total_exposure: 0.40 # Max 40% Tech/Sektor
# Default Win-Rate für unbekannte Funds
default_win_rate: 0.42
default_avg_win: 0.70
default_avg_loss: 0.42
# ── 13F MULTI-QUARTALS-TREND ─────────────────────────────────────────
thirteenf:
min_position_value_usd: 100000 # $100k Mindestposition
min_new_position_usd: 500000 # $500k für neue Pos.
min_increase_pct: 15.0 # 15% Aufstockung
min_decrease_pct: 30.0
min_portfolio_pct: 0.3 # 0.3% Portfolio-Anteil
# Multi-Quartals-Bonus
consecutive_2_bonus: 0.15
consecutive_3plus_bonus: 0.30 # Stärkstes Signal!
# ── INSIDER PATTERN (Form 4) ─────────────────────────────────────────
insider:
pattern_lookback_days: 90
min_systematic_weeks: 3
min_unique_buyers: 2
min_total_usd: 500000
# ── CATALYST FINDER ──────────────────────────────────────────────────
catalyst:
earnings_too_close_days: 30 # IV bereits aufgebläht
earnings_optimal_min_days: 30
earnings_optimal_max_days: 90
bonus_optimal: 0.20
bonus_acceptable: 0.05
penalty_too_close: -0.15
penalty_no_catalyst: -0.10
# ── DUPLIKAT-CHECK (differenziert nach Signal-Typ) ──────────────────
duplicates:
insider_buy_days: 5
thirteenf_days: 90 # Quartals-Filing, nie doppelt
eight_k_days: 0 # Jedes Event einmalig
gov_buy_days: 14
# ── BACKTEST PARAMETER ───────────────────────────────────────────────
backtest:
start_year: 2018
end_year: 2025
entry_delay_days: 1 # Realistisch: nächster Handelstag
# Kosten-Modell
spread_cost_per_trade_pct: 0.012 # 1.2% round-trip
tax_rate_short_term: 0.35 # DE/AT
# Validierung-Kriterien
min_win_rate: 0.40
min_sortino: 0.80
max_drawdown: -0.35
================================================
FILE: docs/ARCHITECTURE.md
================================================
# Architecture
## Modulare Struktur
Das System ist in 6 unabhängige Layer aufgeteilt. Jeder Layer kann
einzeln gewartet, ersetzt und getestet werden ohne andere zu brechen.
```
┌─────────────────────────────────────────────────────────────┐
│ scripts/daily_scan.py │
│ (Orchestrator - dünne Pipeline) │
└──────────────────────┬──────────────────────────────────────┘
│
┌───────────────┼───────────────┬──────────────┐
▼ ▼ ▼ ▼
┌────────┐ ┌────────┐ ┌────────┐ ┌─────────┐
│ ingest │ ──▶ │ enrich │ ──▶ │ score │ ──▶│ ai │
└────────┘ └────────┘ └────────┘ └─────────┘
│
▼
┌──────────┐
│execution │
└──────────┘
│
▼
┌──────────┐
│ alerts │
└──────────┘
│
┌───────────────────────────┴─────┐
▼ ▼
┌────────┐ ┌─────────┐
│ utils │ ◀──── used by all ───▶│ config │
└────────┘ └─────────┘
```
## Layer-Verantwortlichkeiten
### `src/ingest/`
**Was:** Holt Rohdaten von externen Quellen.
**Wie:** Jedes Modul hat ein einfaches Interface: `fetch() -> List[Dict]`.
**Module:**
- `form4_fetcher` — SEC Insider mit Cluster-Detection
- `thirteenf_fetcher` — Hedgefonds-Filings + Multi-Quartals-Trend
- `eight_k_fetcher` — Corporate Events mit Item-Score
- `gov_trades_fetcher` — Politiker-Trades (Quiver)
- `news_fetcher` — Google + Yahoo RSS
**Modul ersetzen:** Nur Interface beibehalten (`fetch() -> List[Dict]`),
Implementation kann komplett anders sein.
### `src/enrich/`
**Was:** Reichert Rohdaten an.
**Wie:** Jedes Modul nimmt einen Ticker oder Signal entgegen.
**Module:**
- `price_context` — Yahoo Finance Kursdaten
- `catalyst_finder` — Earnings-Termine im Optionsfenster
- `options_prefilter` — Tradier Pre-Filter (Hard-Gate)
- `sentiment` — News-Sentiment via Phrasen
- `macro_context` — Polymarket + Kalshi
### `src/score/`
**Was:** Bewertet & filtert Signale.
**Wie:** Verwendet `Signal`-Dataclass + zentrale Thresholds.
**Module:**
- `fund_scorer` — Fund-Score Lookup
- `signal_filter` — Hard-Gates + Weighted Score
- `signal_builder` — Erzeugt + merged Signale
### `src/ai/`
**Was:** Claude-Analyse + Outcome-Tracking.
**Module:**
- `single_analyzer` — Claude Sonnet 4.5 Single-Signal-Analyse
- `outcome_tracker` — 30/60/90d Returns + Auto-Kalibrierung
### `src/execution/`
**Was:** Tradier API + Position-Management.
**Module:**
- `tradier_client` — Wrapper für Tradier API
- `exit_manager` — Tägliche Position-Checks (TP/SL/Time)
### `src/alerts/`
**Was:** Benachrichtigungen.
**Module:**
- `email_sender` — Apple-Style HTML-Mail via Gmail SMTP
### `src/utils/`
**Was:** Querschnittsfunktionen.
**Module:**
- `logger` — Loguru-basiert
- `retry` — Exponential Backoff Decorator
- `config` — YAML-Loader mit Cache
- `storage` — SQLite-Wrapper (alle DB-Zugriffe)
- `ticker_resolver` — CIK → Ticker
## Design-Prinzipien
### 1. Single Responsibility
Jedes Modul macht GENAU EINE Sache.
### 2. Interface-Stabilität
Module exponieren simple Interfaces:
```python
# Ingest
def fetch() -> List[Dict]
# Enrich
def get_price_context(ticker: str) -> Dict
# Score
def filter_and_rank(signals: List[Signal]) -> List[Signal]
```
### 3. Configuration over Code
Alle Magic Numbers in `config/thresholds.yaml`. Code referenziert via
`get_threshold("category", "key")`.
### 4. Fail-Safe
Fehler in einem Modul brechen NICHT die ganze Pipeline. Try/Except an
strategischen Stellen, Logging, weiter.
### 5. Storage-Abstraktion
Module greifen NICHT direkt auf SQLite zu. Alles geht über `src/utils/storage.py`.
Vorteil: Storage-Backend kann gewechselt werden (z.B. Postgres) ohne andere
Module anzufassen.
### 6. Tradier-Abstraktion
Tradier-Calls gehen NICHT direkt aus Modulen. Alles über `tradier_client.py`.
Vorteil: API-Wechsel (z.B. zu IBKR) bedeutet eine neue Datei, nicht 10.
## Datenfluss
```
1. INGEST → Form4 RSS, 13F XML, 8K RSS, Quiver API
↓
2. BUILD → List[Dict] → List[Signal] (mit Fund-Score)
↓
3. MERGE → Signal-Cluster nach Ticker
↓
4. FILTER → Hard-Gates (Fund, Sources, Conviction)
↓
5. ENRICH → Top-N: + Preis, Catalyst, Options, News
↓
6. CLAUDE → Single-Signal Analysis → action/confidence/instrument
↓
7. PERSIST → SQLite (signals, open_positions)
↓
8. NOTIFY → HTML Email
```
## Erweiterbarkeit
### Neue Datenquelle hinzufügen
1. `src/ingest/my_source.py` mit `fetch() -> List[Dict]`
2. In `daily_scan.py` step_ingest erweitern
3. In `signal_builder.py` Builder-Funktion `build_signals_from_my_source`
4. Done. Keine andere Komponente muss angepasst werden.
### Scoring ändern
Nur `src/score/signal_filter.py` editieren. Tests laufen lassen.
### Anderen LLM benutzen
Nur `src/ai/single_analyzer.py` umbauen. Interface bleibt:
`analyze(signal, news) -> Dict`.
### Andere Broker-API
`src/execution/tradier_client.py` durch `ibkr_client.py` ersetzen, gleiches
Interface anbieten.
================================================
FILE: docs/BACKTESTING.md
================================================
# Backtesting
## Was der Backtest tut
Der Backtest in `scripts/backtest.py` simuliert Call-Optionen-Trades
auf einer Auswahl von Tickers (AAPL, MSFT, GOOGL, ...) zu pseudo-zufälligen
Daten zwischen 2019-2025.
Die Optionspreise werden via **Black-Scholes approximiert** (nicht echte
Marktdaten), weil echte historische Optionspreise teuer sind.
## Was der Backtest validiert
✅ **Risk/Reward-Modell** — passt 80% TP / -45% SL?
✅ **Exit-Disziplin** — Time-Exit bei ≤21d hält Theta-Verluste in Grenzen?
✅ **Robustheit über Marktphasen** — funktioniert es 2020 (Crash) UND 2022 (Bear)?
✅ **Drawdown-Profile** — bleibt Max DD unter 35%?
## Was der Backtest NICHT validiert
❌ **Echte Spread-Kosten** — werden nur grob approximiert
❌ **Tatsächliche Fill-Preise** — Annahme: Mid-Preis
❌ **Slippage bei großen Positionen**
❌ **Fund-Score-Logik** — wir testen die *Mechanik*, nicht die *Selektion*
## Ergebnisse interpretieren
```
Win-Rate: ≥ 40% ✓ akzeptabel
Win-Rate: ≥ 45% ✓✓ gut
Win-Rate: ≥ 50% ✓✓✓ exzellent
Sortino: ≥ 0.80 ✓ akzeptabel
Sortino: ≥ 1.20 ✓✓ gut
Sortino: ≥ 2.00 ✓✓✓ exzellent
Max DD: ≥ -35% ✓ akzeptabel
Max DD: ≥ -25% ✓✓ gut
Max DD: ≥ -15% ✓✓✓ exzellent
```
## Wann Live gehen?
✅ Backtest passt grobe Validierung
✅ Mindestens 2 Wochen Paper-Trading parallel
✅ Mindestens 5 echte Mini-Positionen (0.5%) zur Verifikation
✅ Eine vollständige Earnings-Saison durch (Q-Update zeigt Multi-Quartals-Effekt)
## Bekannte Limitierungen
### 1. Datums-Auswahl
Die 12 Test-Daten in `backtest.py` sind hardgecoded. Echte Signale wären
in der Realität dichter und ungleichmäßiger verteilt. Du kannst sie
selbst erweitern in `scripts/backtest.py`.
### 2. IV-Annahme
Wir nehmen 30% IV durchgehend an. In der Realität schwankt IV stark
(2020: 60-80%, 2022: 30-50%, 2024: 15-25%). Die echte Performance
kann besser oder schlechter sein.
### 3. Keine Fund-Selection
Wir simulieren keine *echten* Smart-Money-Signale, sondern nur die
Trade-Mechanik (TP/SL/Time-Exit) auf zufälligen Daten.
Echtes Edge entsteht durch:
- Multi-Quartals-Trend (3+ Q in Folge bei Top-Funds) — nicht im Backtest
- Insider-Cluster (mehrere Insider gleichzeitig kaufen) — nicht im Backtest
Für eine *vollständige* Backtest-Validierung müsstest du historische
SEC-Filings parsen + entsprechende Trades simulieren. Das ist
substantieller Aufwand (1-2 Wochen Code).
## Erweiterung
```python
# In scripts/backtest.py, in run_backtest():
test_dates = [
"2019-03-15",
# Hier mehr Daten ergänzen
]
test_tickers = [
"AAPL",
# Hier mehr Tickers ergänzen
]
```
## Bewährter Validierungs-Workflow
1. Backtest laufen lassen → Mechanik passt?
2. 2-4 Wochen Paper-Trading mit vollem System
3. Win-Rate echter Signale messen → matcht Erwartung?
4. Erst dann Live mit kleinen Positionen (0.5-1%)
5. Nach 10-15 echten Trades: Position-Size hochsetzen falls WR stabil
================================================
FILE: docs/DEPLOYMENT.md
================================================
# Deployment Guide
## Voraussetzungen
- GitHub Account (Free Tier reicht — 2.000 min/Monat)
- Gmail Account mit App-Passwort
- Anthropic API Key (für Claude)
- Tradier Pro Account + API Key
## Schritt-für-Schritt (nur Browser nötig)
### 1. Repo erstellen
1. github.com → New repository
2. Name: `smart-money-scanner-v2`
3. Privat oder Public
4. NICHTS initialisieren (kein README, kein .gitignore — haben wir bereits)
5. Create repository
### 2. Code hochladen
**Option A: Drag & Drop**
1. "uploading an existing file"
2. Den ganzen `smart-money-scanner-v2` Ordner per Drag & Drop reinziehen
3. Commit message: "Initial deployment"
4. Commit changes
**Option B: ZIP**
1. ZIP entpacken auf deinem Rechner
2. Alle Dateien selektieren (Strg+A)
3. Drag & Drop in das leere Repo
4. Commit changes
### 3. Secrets konfigurieren
`Settings → Secrets and variables → Actions → New repository secret`
| Secret Name | Wo bekommst du das? |
|-------------|---------------------|
| `GMAIL_USER` | Deine Gmail-Adresse |
| `GMAIL_PASSWORD` | App-Passwort, NICHT dein normales PW! |
| `RECIPIENT_EMAIL` | An wen soll Mail gehen |
| `ANTHROPIC_API_KEY` | console.anthropic.com → API Keys |
| `TRADIER_API_KEY` | Tradier Account → API |
| `TRADIER_ACCOUNT_ID` | Optional |
#### Gmail App-Passwort erstellen
1. myaccount.google.com → Sicherheit
2. 2-Faktor-Authentifizierung aktivieren (falls noch nicht)
3. "App-Passwörter" → Neues App-Passwort
4. Name: "Smart Money Scanner"
5. 16-stelligen Code kopieren → als `GMAIL_PASSWORD`
### 4. Erster Test-Run
`Actions → Daily Light Scan → Run workflow → Run workflow`
Nach 3-5 Min solltest du:
- ✅ Grünen Haken bei Actions sehen
- ✅ E-Mail erhalten haben
Bei Fehler:
- Actions → fehlgeschlagener Run → Logs lesen
- Häufigste Ursachen: Tippfehler in Secrets, falsches Gmail-Passwort
### 5. Backtest VOR Live-Trading
`Actions → Backtest → Run workflow`
Erwartete Performance:
```
Win-Rate: ≥ 40% (gut: 43-50%)
Sortino Ratio: ≥ 0.80 (gut: 1.0+)
Max Drawdown: ≤ -35% (gut: -20-25%)
```
Wenn deutlich schlechter: NICHT live gehen. Erst Tuning.
### 6. Cron-Schedules sind automatisch aktiv
Sobald Code im Main-Branch ist, laufen die Workflows automatisch:
- Mo-Fr 14:30 + 21:30 UTC: Daily Light
- Mo 05:00 UTC: Weekly Full
- 15. Feb/Mai/Aug/Nov 06:00 UTC: 13F Dedicated
- So 17:00 UTC: Weekly Review
- So 02:00 UTC: Backtest
- Alle 2 Monate: Keepalive (verhindert Auto-Disable)
## Cost Tracking
| Komponente | Verbrauch | Kosten |
|------------|-----------|--------|
| GitHub Actions | ~61 min/Monat | $0 (Free Tier 2000) |
| Anthropic API | ~5-10 Calls/Woche × $0.01 | ~$2/Monat |
| Tradier Pro | Konto bereits da | $10/Monat (Pauschal) |
| Gmail | unbegrenzt | $0 |
| **Total** | | **~$12/Monat** |
## Wartung
### Monatlich
- Win-Rate prüfen (`docs/BACKTESTING.md`)
- Source-Health-Warnings prüfen (kommen per Mail wenn Quelle 3+ Tage 0)
### Quartalsweise
- `config/funds_to_track.yaml`: neue Funds hinzufügen?
- `config/fund_weights.yaml`: Auto-Kalibrierung läuft, manuell tunen falls nötig
### Bei Problemen
- Actions-Logs sind die erste Anlaufstelle
- Daten in `data/scanner.db` bleiben durch Cache erhalten
## Update-Strategie
1. Lokal: in einem Branch arbeiten
2. Test: `python -m tests.test_scoring` etc.
3. Push als PR
4. Mergen wenn alle Tests grün
Da das System modular ist: ein einzelnes Modul anfassen ≠ Risiko für den Rest.
================================================
FILE: requirements.txt
================================================
requests>=2.31.0
pyyaml>=6.0.1
anthropic>=0.34.0
loguru>=0.7.2
================================================
FILE: scripts/backtest.py
================================================
#!/usr/bin/env python3
# scripts/backtest.py
"""
Backtest 2018-2025 mit historischen Daten.
WARNUNG: Optionspreise werden APPROXIMIERT (Black-Scholes) -
echte historische Optionspreise sind teuer (CBOE LiveVol etc).
Für Live-Validierung ist Paper-Trading der bessere Weg.
Was es validiert:
- Win-Rate des Signal-Modells (auf Aktien-Basis)
- Sortino, Drawdown
- Robustheit über verschiedene Marktphasen
"""
import sys
from pathlib import Path
from datetime import datetime, timedelta
import math
import statistics
import requests
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.utils.logger import logger
from src.utils.config import get_threshold
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}
def fetch_historical_prices(ticker: str, start: str, end: str):
"""Yahoo historical data."""
try:
start_ts = int(datetime.strptime(start, "%Y-%m-%d").timestamp())
end_ts = int(datetime.strptime(end, "%Y-%m-%d").timestamp())
url = f"https://query1.finance.yahoo.com/v8/finance/chart/{ticker}"
params = {
"period1": start_ts,
"period2": end_ts,
"interval": "1d"
}
resp = requests.get(url, headers=HEADERS, params=params, timeout=15)
if resp.status_code != 200:
return None
data = resp.json()["chart"]["result"][0]
timestamps = data.get("timestamp", [])
closes = data["indicators"]["quote"][0].get("close", [])
return [
{"date": datetime.utcfromtimestamp(t).strftime("%Y-%m-%d"), "close": c}
for t, c in zip(timestamps, closes) if c
]
except Exception as e:
logger.warning(f"Backtest fetch {ticker}: {e}")
return None
def black_scholes_call(S: float, K: float, T: float, r: float, sigma: float) -> float:
"""Vereinfachter BS für Call-Approximation."""
if T <= 0 or sigma <= 0:
return max(S - K, 0)
d1 = (math.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * math.sqrt(T))
d2 = d1 - sigma * math.sqrt(T)
# Normal CDF approximation
def N(x):
return 0.5 * (1 + math.erf(x / math.sqrt(2)))
return S * N(d1) - K * math.exp(-r * T) * N(d2)
def simulate_trade(ticker: str, entry_date: str, hold_days: int = 90) -> dict:
"""
Simuliert einen Call-Trade.
Einfache Annahmen: 10% OTM, 120d Laufzeit, IV=30%.
"""
end_date = (datetime.strptime(entry_date, "%Y-%m-%d") +
timedelta(days=hold_days + 30)).strftime("%Y-%m-%d")
prices = fetch_historical_prices(ticker, entry_date, end_date)
if not prices or len(prices) < hold_days:
return None
entry_price = prices[0]["close"]
strike = entry_price * 1.10
# Entry option price (approx)
iv = 0.30
T_entry = 120 / 365
entry_option = black_scholes_call(entry_price, strike, T_entry, 0.04, iv)
# Exit nach hold_days oder bei TP/SL
tp = get_threshold("exit_rules", "take_profit_pct", 80) / 100
sl = get_threshold("exit_rules", "stop_loss_pct", -45) / 100
for i, p in enumerate(prices[1:hold_days + 1], start=1):
days_left = 120 - i
if days_left <= 21:
# Time exit
T_exit = days_left / 365 if days_left > 0 else 0.01
exit_option = black_scholes_call(p["close"], strike, T_exit, 0.04, iv)
pnl = (exit_option - entry_option) / entry_option
return {
"ticker": ticker, "entry_date": entry_date,
"exit_date": p["date"], "exit_reason": "time",
"pnl": pnl, "days_held": i
}
T_exit = days_left / 365
opt_price = black_scholes_call(p["close"], strike, T_exit, 0.04, iv)
pnl = (opt_price - entry_option) / entry_option
if pnl >= tp:
return {
"ticker": ticker, "entry_date": entry_date,
"exit_date": p["date"], "exit_reason": "tp",
"pnl": pnl, "days_held": i
}
if pnl <= sl:
return {
"ticker": ticker, "entry_date": entry_date,
"exit_date": p["date"], "exit_reason": "sl",
"pnl": pnl, "days_held": i
}
# End of holding period
final_price = prices[hold_days]["close"]
T_exit = (120 - hold_days) / 365
final_option = black_scholes_call(final_price, strike, T_exit, 0.04, iv)
pnl = (final_option - entry_option) / entry_option
return {
"ticker": ticker, "entry_date": entry_date,
"exit_date": prices[hold_days]["date"], "exit_reason": "hold_end",
"pnl": pnl, "days_held": hold_days
}
def run_backtest():
"""
Vereinfachter Backtest:
Simuliert Call-Trades auf Top-Tickers an pseudo-zufälligen Daten
der letzten 7 Jahre.
"""
logger.info("█" * 60)
logger.info("BACKTEST 2019-2025")
logger.info("█" * 60)
test_tickers = [
"AAPL", "MSFT", "GOOGL", "META", "NVDA", "AMZN",
"JPM", "BAC", "WMT", "PG", "JNJ", "UNH",
"TSLA", "AVGO", "CRM", "AMD"
]
test_dates = [
"2019-03-15", "2019-09-13", "2020-02-21", "2020-08-14",
"2021-03-19", "2021-09-17", "2022-02-18", "2022-08-19",
"2023-03-17", "2023-09-15", "2024-02-16", "2024-08-16",
]
results = []
for ticker in test_tickers:
for date in test_dates:
try:
r = simulate_trade(ticker, date, hold_days=90)
if r:
results.append(r)
except Exception as e:
logger.debug(f"Skip {ticker}@{date}: {e}")
if not results:
logger.error("Keine Backtest-Ergebnisse")
return
# Stats
pnls = [r["pnl"] for r in results]
wins = [p for p in pnls if p > 0]
losses = [p for p in pnls if p < 0]
win_rate = len(wins) / len(pnls)
avg_win = statistics.mean(wins) if wins else 0
avg_loss = statistics.mean(losses) if losses else 0
avg_pnl = statistics.mean(pnls)
# Sortino (vereinfacht)
neg_returns = [p for p in pnls if p < 0]
downside_dev = statistics.stdev(neg_returns) if len(neg_returns) > 1 else 0.01
sortino = avg_pnl / downside_dev if downside_dev > 0 else 0
# Drawdown (cumulative)
cumulative = []
cum = 1.0
for p in pnls:
cum *= (1 + p * 0.02) # 2% Position-Size
cumulative.append(cum)
peak = cumulative[0]
max_dd = 0
for c in cumulative:
if c > peak:
peak = c
dd = (c - peak) / peak
if dd < max_dd:
max_dd = dd
# Report
report = f"""
═══════════════════════════════════════════════════════
BACKTEST RESULTS
═══════════════════════════════════════════════════════
Total Trades: {len(results)}
Win-Rate: {win_rate:.1%}
Avg Win: {avg_win:+.1%}
Avg Loss: {avg_loss:+.1%}
Avg P&L: {avg_pnl:+.1%}
Sortino Ratio: {sortino:.2f}
Max Drawdown: {max_dd:.1%}
Exit Reasons:
Take Profit: {sum(1 for r in results if r['exit_reason'] == 'tp')}
Stop Loss: {sum(1 for r in results if r['exit_reason'] == 'sl')}
Time Exit: {sum(1 for r in results if r['exit_reason'] == 'time')}
Hold End: {sum(1 for r in results if r['exit_reason'] == 'hold_end')}
VALIDIERUNG:
Win-Rate ≥ 40%: {'✓' if win_rate >= 0.40 else '✗'}
Sortino ≥ 0.80: {'✓' if sortino >= 0.80 else '✗'}
Max DD ≥ -35%: {'✓' if max_dd >= -0.35 else '✗'}
NOTE: Optionspreise sind APPROXIMIERT (Black-Scholes).
Echte Performance kann ±15% abweichen.
Paper-Trading vor Live empfohlen!
═══════════════════════════════════════════════════════
"""
logger.info(report)
# Email
try:
from src.alerts.email_sender import send_email
html = f"
{report}"
send_email("📊 Backtest Results", html)
except Exception as e:
logger.warning(f"Email fail: {e}")
if __name__ == "__main__":
run_backtest()
================================================
FILE: scripts/daily_scan.py
================================================
#!/usr/bin/env python3
# scripts/daily_scan.py
"""
HAUPT-ORCHESTRATOR.
Run-Modes:
- daily_light: Form4 + Exit-Check (Mo-Fr 14:30 + 21:30 UTC)
- weekly_full: Volle Pipeline + Claude (Mo 05:00 UTC)
- thirteenf: 13F-fokussiert (15. Feb/Mai/Aug/Nov)
- weekly_review: Outcome-Update (So 17:00 UTC)
"""
import os
import sys
import argparse
from pathlib import Path
from datetime import datetime
from typing import List, Dict
# Pfad-Setup für Import
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.utils.logger import logger
from src.utils.storage import (
init_db, save_signal, log_scan, log_source_health,
get_source_warnings, is_duplicate, save_position
)
from src.utils.config import load as load_config, get_threshold
# Ingest
from src.ingest import form4_fetcher, eight_k_fetcher, gov_trades_fetcher, news_fetcher
from src.ingest import thirteenf_fetcher
# Enrich
from src.enrich.price_context import get_price_context
from src.enrich.catalyst_finder import catalyst_score
from src.enrich.options_prefilter import (
options_prefilter,
conviction_modifier_for_iv,
post_claude_options_check,
)
from src.enrich.sentiment import calculate as calculate_sentiment
from src.enrich.macro_context import get_macro_context
# Score
from src.score.fund_scorer import FundScorer
from src.score.signal_filter import SignalFilter, Signal
from src.score.signal_builder import (
build_signals_from_form4,
build_signals_from_13f,
build_signals_from_8k,
build_signals_from_gov,
merge_by_ticker,
)
# AI
from src.ai.single_analyzer import SingleAnalyzer
from src.ai.outcome_tracker import run_outcome_tracking
# Execution
from src.execution.exit_manager import run_exit_check
# Alerts
from src.alerts.email_sender import send_report
# ── Pipeline-Steps ───────────────────────────────────────────────────
def step_ingest(scorer: FundScorer, run_mode: str) -> Dict[str, list]:
"""Stufe 1: Datenquellen abfragen."""
logger.info("━" * 60)
logger.info(f"INGEST ({run_mode})")
logger.info("━" * 60)
data = {"form4": [], "thirteenf": [], "eightk": [], "gov": []}
# Form 4 - immer
try:
data["form4"] = form4_fetcher.fetch()
log_source_health("form4", len(data["form4"]))
except Exception as e:
logger.error(f"Form4: {e}")
log_source_health("form4", 0)
# 8-K - außer im pure thirteenf
if run_mode != "thirteenf":
try:
data["eightk"] = eight_k_fetcher.fetch()
log_source_health("8k", len(data["eightk"]))
except Exception as e:
logger.error(f"8-K: {e}")
log_source_health("8k", 0)
# Gov-Trades
if run_mode in ("weekly_full", "thirteenf"):
try:
data["gov"] = gov_trades_fetcher.fetch()
log_source_health("gov", len(data["gov"]))
except Exception as e:
logger.error(f"Gov: {e}")
log_source_health("gov", 0)
# 13F
if run_mode in ("weekly_full", "thirteenf"):
try:
funds_cfg = load_config("funds_to_track").get("funds", [])
data["thirteenf"] = thirteenf_fetcher.fetch(funds_cfg, scorer)
log_source_health("13f", len(data["thirteenf"]))
except Exception as e:
logger.error(f"13F: {e}")
log_source_health("13f", 0)
logger.info(
f"Ingest done: Form4={len(data['form4'])}, "
f"13F={len(data['thirteenf'])}, "
f"8K={len(data['eightk'])}, "
f"Gov={len(data['gov'])}"
)
return data
def step_build_signals(raw: Dict, scorer: FundScorer, sf: SignalFilter) -> List[Signal]:
"""Stufe 2: Signale erzeugen + mergen."""
logger.info("━" * 60)
logger.info("BUILD SIGNALS")
logger.info("━" * 60)
all_signals: List[Signal] = []
all_signals += build_signals_from_form4(raw["form4"], scorer, sf)
all_signals += build_signals_from_13f(raw["thirteenf"], sf)
all_signals += build_signals_from_8k(raw["eightk"], sf)
all_signals += build_signals_from_gov(raw["gov"], sf)
logger.info(f"Built: {len(all_signals)} raw signals")
merged = merge_by_ticker(all_signals, sf)
logger.info(f"After merge: {len(merged)} unique tickers")
return merged
def step_filter(signals: List[Signal], sf: SignalFilter) -> List[Signal]:
"""Stufe 3: Hard-Gates anwenden + ranken."""
logger.info("━" * 60)
logger.info("FILTER & RANK")
logger.info("━" * 60)
dup_cfg = load_config("thresholds").get("duplicates", {})
type_to_days = {
"insider_buy": dup_cfg.get("insider_buy_days", 5),
"13f_increase": dup_cfg.get("thirteenf_days", 90),
"13f_new_position": dup_cfg.get("thirteenf_days", 90),
"8k_event": dup_cfg.get("eight_k_days", 0),
"gov_buy": dup_cfg.get("gov_buy_days", 14),
}
deduped = []
for s in signals:
days = type_to_days.get(s.signal_type, 5)
if days == 0 or not is_duplicate(s.ticker, s.signal_type, days):
deduped.append(s)
logger.info(f"After dedup: {len(deduped)}")
return sf.filter_and_rank(deduped)
def step_enrich(signals: List[Signal], run_mode: str) -> List[Dict]:
"""Stufe 4: Anreicherung mit weichem Options-Pre-Filter (Hybrid)."""
logger.info("━" * 60)
logger.info("ENRICH")
logger.info("━" * 60)
top_n = 10 if run_mode in ("weekly_full", "thirteenf") else 5
# Pre-screen: scan a larger pool and prefer tickers that actually have
# options expirations in the 90-180d window. Micro-caps often have no
# listed options at all → Claude would reject them anyway, so skip them
# early and save API calls.
from src.execution.tradier_client import get_client
candidate_pool = signals[:max(top_n * 4, 20)]
tradier = get_client()
if tradier.is_configured and candidate_pool:
with_options, without_options = [], []
for s in candidate_pool:
expirations = tradier.get_expirations(s.ticker)
if expirations:
with_options.append(s)
else:
without_options.append(s)
# Prefer tickers with options; fall back to the rest if needed
ordered = with_options + without_options
logger.info(
f"Options pre-screen: {len(with_options)} mit Options, "
f"{len(without_options)} ohne — aus {len(candidate_pool)} Kandidaten"
)
else:
ordered = candidate_pool
top = ordered[:top_n]
logger.info(f"Anreicherung der Top {len(top)} Signale")
enriched = []
for s in top:
d = {
"ticker": s.ticker,
"signal_type": s.signal_type,
"fund_name": s.fund_name,
"fund_score": s.fund_score,
"strength": s.strength,
"conviction": s.conviction,
"consecutive_quarters": s.consecutive_quarters,
"is_clustered": s.is_clustered,
"source_count": s.source_count,
"sources": [s.signal_type],
"is_10b5": s.raw.get("is_10b5", False),
"fund_category": s.raw.get("fund_category", "fund"),
**s.raw
}
# Preis-Kontext
try:
d["price_context"] = get_price_context(s.ticker)
except Exception as e:
logger.warning(f"Price {s.ticker}: {e}")
d["price_context"] = {}
# Catalyst
try:
cat = catalyst_score(s.ticker)
d["catalyst"] = cat
d["catalyst_modifier"] = cat.get("conviction_modifier", 0)
except Exception as e:
logger.warning(f"Catalyst {s.ticker}: {e}")
d["catalyst"] = {}
# === Hybrid Options-Pre-Filter (weich) ===
try:
opt_result = options_prefilter(s.ticker)
if opt_result.get("passed"):
d["options_data"] = opt_result.get("options_data")
d["iv_rank"] = opt_result.get("iv_rank", 50.0)
d["options_qualified"] = True
d["options_summary"] = opt_result.get("summary", "")
# IV-Modifier auf Conviction anwenden
iv_mod = conviction_modifier_for_iv(d["iv_rank"])
d["conviction"] = max(0, min(1.0, d["conviction"] + iv_mod))
else:
d["options_qualified"] = False
d["options_data"] = None
d["iv_rank"] = 50.0
d["options_summary"] = f"Options-Check fehlgeschlagen: {opt_result.get('kill_reason')}"
logger.info(f" {s.ticker}: Options-PreFilter SOFT FAIL ({opt_result.get('kill_reason')})")
except Exception as e:
logger.warning(f"Options prefilter {s.ticker}: {e}")
d["options_qualified"] = False
d["options_data"] = None
d["iv_rank"] = 50.0
d["options_summary"] = "Options-Check fehlgeschlagen (Exception)"
# News + Sentiment
try:
news = news_fetcher.fetch(s.ticker)
d["_news"] = news
d["news_alignment"] = calculate_sentiment(news)
except Exception as e:
logger.warning(f"News {s.ticker}: {e}")
d["_news"] = []
d["news_alignment"] = 0.0
# Macro
if run_mode == "weekly_full":
try:
macro = get_macro_context(s.ticker)
d["macro_context"] = macro["context"]
d["macro_summary"] = macro["summary"]
except Exception as e:
logger.warning(f"Macro {s.ticker}: {e}")
d["macro_context"] = "neutral"
else:
d["macro_context"] = "neutral"
enriched.append(d)
logger.info(f"Enriched: {len(enriched)}")
return enriched
def step_analyze(enriched: List[Dict]) -> List[Dict]:
"""Stufe 5: Claude-Analyse."""
logger.info("━" * 60)
logger.info("CLAUDE ANALYSIS")
logger.info("━" * 60)
if not enriched:
return []
try:
analyzer = SingleAnalyzer()
except ValueError as e:
logger.error(f"Claude not configured: {e}")
return []
news_map = {d["ticker"]: d.get("_news", []) for d in enriched}
return analyzer.analyze_batch(enriched, news_map)
def step_persist_and_send(
analyzed: List[Dict],
exits: List[Dict],
warnings: List[Dict],
stats: Dict,
run_mode: str
):
"""Stufe 6: Speichern + E-Mail senden."""
logger.info("━" * 60)
logger.info("PERSIST & NOTIFY")
logger.info("━" * 60)
trades = [a for a in analyzed if a.get("action") == "trade"]
watchlist = [a for a in analyzed if a.get("action") == "watchlist"]
no_trades = [a for a in analyzed if a.get("action") == "kein_trade"]
# Save signals
for a in analyzed:
try:
sig_dict = {
**(a.get("raw_signal") or {}),
"action": a.get("action"),
"confidence": a.get("confidence", 0),
"reasoning": a.get("reasoning", ""),
"suggested_instrument": a.get("suggested_instrument", ""),
}
save_signal(sig_dict)
except Exception as e:
logger.warning(f"Save signal fail: {e}")
# Save new positions for trades – Post-Claude Options-Check
for t in trades:
ticker = t["ticker"]
raw = t.get("raw_signal", {})
opt_result = post_claude_options_check(ticker)
opt = opt_result.get("options_data") or {}
if opt_result.get("passed"):
try:
save_position({
"ticker": ticker,
"signal_date": datetime.utcnow().strftime("%Y-%m-%d"),
"entry_price_stock": opt.get(
"stock_price",
raw.get("price_context", {}).get("price", 0)
),
"entry_price_option": opt.get("mid", 0),
"entry_bid": opt.get("bid", 0),
"entry_ask": opt.get("ask", 0),
"strike": opt.get("strike", 0),
"expiry": opt.get("expiry", ""),
"quantity": 1,
"position_size_pct": t.get("position_size_pct", 1),
"delta_entry": opt.get("delta", 0),
"vega_entry": opt.get("vega", 0),
"theta_entry": opt.get("theta", 0),
})
except Exception as e:
logger.warning(f"Save position fail {ticker}: {e}")
else:
logger.warning(f"Post-Claude Options-Check failed for trade {ticker} – skipping position save")
sent = send_report(
trades=trades,
watchlist=watchlist,
exits=exits,
no_trades=no_trades,
warnings=warnings,
stats=stats,
run_mode=run_mode,
)
log_scan(
found=stats.get("total", 0),
sent=len(trades) + len(watchlist),
status="success" if sent else "email_failed",
run_mode=run_mode,
)
# ── Run-Mode Entry Points ────────────────────────────────────────────
def run_daily_light():
logger.info("█" * 60)
logger.info(f"DAILY LIGHT — {datetime.utcnow():%Y-%m-%d %H:%M UTC}")
logger.info("█" * 60)
init_db()
scorer = FundScorer()
sf = SignalFilter()
exits = []
try:
exits = run_exit_check()
except Exception as e:
logger.error(f"Exit check: {e}")
raw = step_ingest(scorer, "daily_light")
signals = step_build_signals(raw, scorer, sf)
filtered = step_filter(signals, sf)
enriched = step_enrich(filtered, "daily_light")
analyzed = step_analyze(enriched)
warnings = get_source_warnings()
stats = {
"total": len(signals),
"filtered": len(filtered),
"analyzed": len(analyzed),
}
step_persist_and_send(analyzed, exits, warnings, stats, "daily_light")
def run_weekly_full():
logger.info("█" * 60)
logger.info(f"WEEKLY FULL — {datetime.utcnow():%Y-%m-%d %H:%M UTC}")
logger.info("█" * 60)
init_db()
scorer = FundScorer()
sf = SignalFilter()
exits = run_exit_check()
raw = step_ingest(scorer, "weekly_full")
signals = step_build_signals(raw, scorer, sf)
filtered = step_filter(signals, sf)
enriched = step_enrich(filtered, "weekly_full")
analyzed = step_analyze(enriched)
warnings = get_source_warnings()
stats = {
"total": len(signals),
"filtered": len(filtered),
"analyzed": len(analyzed),
}
step_persist_and_send(analyzed, exits, warnings, stats, "weekly_full")
def run_thirteenf():
logger.info("█" * 60)
logger.info(f"13F DEDICATED — {datetime.utcnow():%Y-%m-%d %H:%M UTC}")
logger.info("█" * 60)
init_db()
scorer = FundScorer()
sf = SignalFilter()
exits = run_exit_check()
raw = step_ingest(scorer, "thirteenf")
signals = step_build_signals(raw, scorer, sf)
filtered = step_filter(signals, sf)
enriched = step_enrich(filtered, "thirteenf")
analyzed = step_analyze(enriched)
warnings = get_source_warnings()
stats = {
"total": len(signals),
"filtered": len(filtered),
"analyzed": len(analyzed),
}
step_persist_and_send(analyzed, exits, warnings, stats, "thirteenf")
def run_weekly_review():
logger.info("█" * 60)
logger.info(f"WEEKLY REVIEW — {datetime.utcnow():%Y-%m-%d %H:%M UTC}")
logger.info("█" * 60)
init_db()
try:
run_outcome_tracking()
except Exception as e:
logger.error(f"Outcome tracking: {e}")
from src.utils.storage import get_conn
with get_conn() as conn:
wins = conn.execute("SELECT COUNT(*) as c FROM signals WHERE outcome='win'").fetchone()["c"]
losses = conn.execute("SELECT COUNT(*) as c FROM signals WHERE outcome='loss'").fetchone()["c"]
total = conn.execute("SELECT COUNT(*) as c FROM signals WHERE outcome IN ('win','loss')").fetchone()["c"]
win_rate = wins / total if total > 0 else 0
html = f"""
📊 Weekly Review
Outcome-Tracking ausgeführt am {datetime.utcnow():%Y-%m-%d}
- Total geprüft: {total}
- Wins: {wins}
- Losses: {losses}
- Win-Rate: {win_rate:.1%}
"""
from src.alerts.email_sender import send_email
send_email(f"Weekly Review · WR {win_rate:.0%}", html)
# ── CLI ──────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--run-mode",
choices=["daily_light", "weekly_full", "thirteenf", "weekly_review"],
default="daily_light",
help="Welcher Run-Mode soll ausgeführt werden"
)
args = parser.parse_args()
try:
if args.run_mode == "daily_light":
run_daily_light()
elif args.run_mode == "weekly_full":
run_weekly_full()
elif args.run_mode == "thirteenf":
run_thirteenf()
elif args.run_mode == "weekly_review":
run_weekly_review()
except Exception as e:
logger.exception(f"Pipeline fail: {e}")
sys.exit(1)
if __name__ == "__main__":
main()
================================================
FILE: src/__init__.py
================================================
"""Smart Money Scanner v2"""
__version__ = "2.0.0"
================================================
FILE: src/ai/__init__.py
================================================
"""
AI-Layer: Claude-Analyse + Outcome-Tracking.
"""
================================================
FILE: src/ai/outcome_tracker.py
================================================
# src/ai/outcome_tracker.py
"""
Outcome-Tracker: misst Performance vergangener Signale.
Berechnet 30/60/90d Returns nach Signal-Datum und kalibriert
fund_weights automatisch basierend auf realer Performance.
"""
import requests
from datetime import datetime, timedelta
from typing import List, Dict, Optional
from src.utils.logger import logger
from src.utils.storage import get_conn
from src.utils.retry import retry
HEADERS_YAHOO = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}
@retry(times=2, delay=3)
def _fetch_price_at(ticker: str, target_date: str) -> Optional[float]:
"""Holt Schlusskurs für ein bestimmtes Datum."""
try:
target = datetime.strptime(target_date[:10], "%Y-%m-%d")
period_start = int((target - timedelta(days=5)).timestamp())
period_end = int((target + timedelta(days=5)).timestamp())
url = f"https://query1.finance.yahoo.com/v8/finance/chart/{ticker}"
params = {
"period1": period_start,
"period2": period_end,
"interval": "1d"
}
resp = requests.get(url, headers=HEADERS_YAHOO, params=params, timeout=10)
if resp.status_code != 200:
return None
data = resp.json()
result = data["chart"]["result"][0]
timestamps = result.get("timestamp", [])
closes = result["indicators"]["quote"][0].get("close", [])
if not timestamps or not closes:
return None
# Nächstgelegener Handelstag
target_ts = target.timestamp()
best_idx = min(
range(len(timestamps)),
key=lambda i: abs(timestamps[i] - target_ts)
)
return closes[best_idx] if closes[best_idx] else None
except Exception as e:
logger.debug(f"Price {ticker}@{target_date}: {e}")
return None
def calculate_returns(ticker: str, signal_date: str) -> Dict:
"""
Berechnet 30/60/90d Returns für ein Signal.
"""
entry_price = _fetch_price_at(ticker, signal_date)
if not entry_price:
return {"error": "no_entry_price"}
sig_date = datetime.strptime(signal_date[:10], "%Y-%m-%d")
today = datetime.utcnow().date()
days_old = (today - sig_date.date()).days
returns = {"entry_price": entry_price, "days_since_signal": days_old}
for window in [30, 60, 90]:
if days_old >= window:
check_date = (sig_date + timedelta(days=window)).strftime("%Y-%m-%d")
check_price = _fetch_price_at(ticker, check_date)
if check_price:
ret_pct = (check_price - entry_price) / entry_price * 100
returns[f"return_{window}d"] = round(ret_pct, 2)
return returns
def update_signal_outcomes(min_age_days: int = 30):
"""
Updated Outcomes für alle Signale die älter als N Tage sind.
Klassifiziert win/loss basierend auf 60d-Return:
- win: >= +20%
- loss: <= -10%
- neutral: dazwischen
"""
with get_conn() as conn:
rows = conn.execute("""
SELECT id, ticker, date, action, fund_name
FROM signals
WHERE outcome = ''
AND action IN ('trade', 'watchlist')
AND date <= date('now', ?)
ORDER BY date DESC
LIMIT 100
""", (f"-{min_age_days} days",)).fetchall()
logger.info(f"Outcome-Update: {len(rows)} Signale zu prüfen")
updated = 0
for row in rows:
ret = calculate_returns(row["ticker"], row["date"])
if "error" in ret:
continue
# 60d-Return als Hauptkriterium
ret_60d = ret.get("return_60d", ret.get("return_30d", 0))
if ret_60d >= 20:
outcome = "win"
elif ret_60d <= -10:
outcome = "loss"
else:
outcome = "neutral"
with get_conn() as conn:
conn.execute("""
UPDATE signals
SET outcome = ?, outcome_pct = ?
WHERE id = ?
""", (outcome, ret_60d, row["id"]))
updated += 1
logger.info(f"Outcome-Update: {updated} aktualisiert")
return updated
def update_fund_performance():
"""
Aggregiert Outcomes pro Fund, schreibt fund_performance Tabelle.
"""
with get_conn() as conn:
rows = conn.execute("""
SELECT fund_name,
COUNT(*) as total,
SUM(CASE WHEN outcome='win' THEN 1 ELSE 0 END) as wins,
SUM(CASE WHEN outcome='loss' THEN 1 ELSE 0 END) as losses,
AVG(CASE WHEN outcome='win' THEN outcome_pct END) as avg_win,
AVG(CASE WHEN outcome='loss' THEN outcome_pct END) as avg_loss
FROM signals
WHERE outcome IN ('win', 'loss')
GROUP BY fund_name
HAVING total >= 5
""").fetchall()
for row in rows:
total = row["total"]
wins = row["wins"] or 0
win_rate = wins / total if total > 0 else 0.5
with get_conn() as conn:
conn.execute("""
INSERT OR REPLACE INTO fund_performance
(fund_name, total_signals, wins, losses, win_rate,
avg_win_pct, avg_loss_pct, last_updated)
VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
""", (
row["fund_name"], total, wins, row["losses"] or 0,
win_rate,
(row["avg_win"] or 65) / 100 if row["avg_win"] else 0.65,
(row["avg_loss"] or -40) / 100 if row["avg_loss"] else -0.40,
))
logger.info(f"Fund-Performance: {len(rows)} Funds aktualisiert")
return len(rows)
def run_outcome_tracking():
"""Hauptfunktion: ein kompletter Outcome-Update-Lauf."""
update_signal_outcomes(min_age_days=30)
update_fund_performance()
================================================
FILE: src/ai/single_analyzer.py
================================================
# src/ai/single_analyzer.py
"""
Single-Signal Analyzer mit Claude.
Erweiterter Prompt für 2-6 Monats-Calls mit:
- Strategischer Kontext
- Multi-Quartals-Trend
- Preis-Kontext (Yahoo)
- Catalyst (Earnings)
- Options-Daten (Tradier real-time)
- Position-Sizing (Quarter-Kelly)
- Strikte Exit-Regeln
"""
import os
import json
import re
import anthropic
from typing import Dict, List
from src.utils.logger import logger
from src.utils.storage import get_fund_history, get_fund_accuracy
from src.utils.config import get_threshold
from src.utils.retry import retry
SYSTEM = """Du bist ein extrem disziplinierter, quantitativer Smart-Money-Analyst.
DEINE AUFGABE:
Entscheide ob ein gefiltertes Insider/Institutional-Signal trade-würdig ist
für einen 90-180 Tage Call-Option (mittelfristige Conviction).
STRIKTE REGELN:
- Du wirst 60-70% aller Signale als "kein_trade" klassifizieren. Das ist Qualität.
- Multi-Quartals-Trend (3+ Quartale in Folge) = stärkstes Signal überhaupt
- Multi-Fund-Cluster (2+ Top-Funds): starke Tendenz zu trade/watchlist
- IV-Rank > 50 = teurer Eintrag, brauchst stärkeres Signal
- IV-Rank > 70 = Hard-Kill, niemals empfehlen
- Earnings zu nah (<30d): IV bereits aufgebläht, warten
- Earnings 30-90d: optimaler Katalysator-Bonus
- 10b5-1 Plan-Trades sind IMMER schwächer als spontane Käufe
- Preis-Kontext: nahe 52W-Tief + unter MA50 = besseres Setup
POSITION-SIZING:
- Berechne Quarter-Kelly basierend auf Fund-Win-Rate
- Maximum 5% pro Trade (auch wenn Kelly höher)
- Minimum 0.5% (sonst zu klein)
EXIT-REGELN (NIEMALS verhandelbar):
- Take-Profit bei +80%
- Stop-Loss bei -45%
- IMMER raus bei ≤21 Tagen bis Expiry (Theta!)
- Exit bei Fund-Verkauf im nächsten 13F
- Exit bei Insider Sell-Off
Antworte NUR mit validem JSON. Kein Text davor oder danach."""
PROMPT = """SIGNAL ZUR ANALYSE (für 90-180d Call):
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SIGNAL DETAILS:
Ticker: {ticker}
Fund: {fund_name} (Score: {fund_score}/50, Kategorie: {fund_category})
Signal-Typ: {signal_type}
Quellen ({source_count}): {sources}
Signal-Stärke: {strength}/100
Conviction: {conviction:.2f}
Multi-Quartals-Trend: {consecutive_quarters} Quartale in Folge {trend_indicator}
Cluster-Info: {cluster_info}
10b5-1 Plan: {is_10b5}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
PREIS-KONTEXT (Yahoo Finance):
{price_context}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
CATALYST:
{catalyst_info}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
OPTIONS-DATEN (Tradier Real-Time):
{options_block}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
FUND-HISTORIE (letzte 3):
{fund_history}
Historische Trefferquote: {historical_accuracy:.0%}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
NEWS & MAKRO:
News-Alignment: {news_alignment:+.2f} (-1=bearish, +1=bullish)
Makro-Kontext: {macro_context}
Aktuelle Headlines:
{headlines}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Antworte mit exakt diesem JSON:
{{
"action": "trade" | "watchlist" | "kein_trade",
"confidence": 0.0,
"signal_score": 0,
"reasoning": "Max. 3 präzise Sätze: Warum stark/schwach? Welche Daten überzeugen?",
"key_arguments": ["Argument 1", "Argument 2", "Argument 3"],
"risk_factors": ["max. 2 konkrete Risiken"],
"suggested_instrument": "Exakter Strike, Expiry, Mid-Price - z.B. NVDA Call $200 Exp 2025-09-20 (120d) Mid $12.50",
"options_rationale": "Warum dieser Strike + Laufzeit",
"position_size_pct": 2.5,
"position_sizing_logic": "Quarter-Kelly basierend auf Win-Rate X%",
"exit_triggers": {{
"take_profit_pct": 80,
"stop_loss_pct": -45,
"time_exit_days": 21,
"fund_exit": "Wenn Fund im nächsten 13F reduziert"
}},
"no_trade_reason": "Nur wenn action=kein_trade, sonst null"
}}"""
class SingleAnalyzer:
"""Claude-basierte Single-Signal-Analyse."""
def __init__(self):
api_key = os.environ.get("ANTHROPIC_API_KEY")
if not api_key:
raise ValueError("ANTHROPIC_API_KEY nicht gesetzt")
self.client = anthropic.Anthropic(api_key=api_key)
self.model = "claude-sonnet-4-6"
def _format_history(self, history: List[Dict]) -> str:
if not history:
return " Keine historischen Daten (neuer Fund im System)"
lines = []
for h in history:
outcome = h.get("outcome") or "ausstehend"
pct = h.get("outcome_pct", 0)
lines.append(
f" {h.get('date', '?')} | {h.get('ticker', '?')} | "
f"{h.get('action', '?')} | {outcome} ({pct:+.0f}%) | "
f"{h.get('reasoning', '')[:60]}"
)
return "\n".join(lines)
def _format_options_block(self, signal: Dict) -> str:
opt_data = signal.get("options_data") or {}
if not opt_data:
return " Keine Options-Daten verfügbar"
return (
f" Call Strike ${opt_data.get('strike', 0):.2f} ({signal.get('otm_pct', 0):.1f}% OTM)\n"
f" Expiry: {opt_data.get('expiry', '?')} ({opt_data.get('days_to_exp', 0)} Tage)\n"
f" Mid: ${opt_data.get('mid', 0):.2f} | Bid: ${opt_data.get('bid', 0):.2f} | Ask: ${opt_data.get('ask', 0):.2f}\n"
f" Spread: {opt_data.get('spread_pct', 0):.2f}% | OI: {opt_data.get('open_interest', 0):,}\n"
f" IV: {opt_data.get('iv', 0):.1f}% | IV-Rank: {opt_data.get('iv_rank', 0):.0f}\n"
f" Greeks: Delta={opt_data.get('delta', 0):.2f} | Vega={opt_data.get('vega', 0):.3f} | "
f"Theta={opt_data.get('theta', 0):.3f}"
)
def _format_catalyst(self, signal: Dict) -> str:
cat = signal.get("catalyst") or {}
if not cat or not cat.get("has_catalyst"):
return " Kein Earnings-Katalysator in Laufzeit (Conviction -0.10)"
return (
f" Type: {cat.get('type', '?')}\n"
f" Datum: {cat.get('date', '?')} (in {cat.get('days_away', 0)} Tagen)\n"
f" Bewertung: {cat.get('summary', '')}\n"
f" Conviction-Modifier: {cat.get('conviction_modifier', 0):+.2f}"
)
def _format_price_context(self, signal: Dict) -> str:
from src.enrich.price_context import format_for_prompt
ctx = signal.get("price_context") or {}
return format_for_prompt(ctx)
@retry(times=3, delay=5)
def _call_api(self, prompt: str) -> str:
resp = self.client.messages.create(
model=self.model,
max_tokens=1500,
system=SYSTEM,
messages=[{"role": "user", "content": prompt}]
)
return resp.content[0].text.strip()
def analyze(self, signal: Dict, news: List[Dict]) -> Dict:
"""Hauptfunktion: einzelnes Signal analysieren."""
fund_name = signal.get("fund_name", "Unknown")
history = get_fund_history(fund_name)
accuracy = get_fund_accuracy(fund_name)
headlines = "\n".join(f" - {n.get('title', '')}" for n in news[:8]) \
or " Keine relevanten News"
consecutive = signal.get("consecutive_quarters", 0)
trend_indicator = ""
if consecutive >= 3:
trend_indicator = "← STÄRKSTES SIGNAL ✓"
elif consecutive >= 2:
trend_indicator = "← Starkes Signal ✓"
cluster_info = "Standard"
if signal.get("is_clustered"):
cluster_info = "Clustered (mehrere unabhängige Quellen)"
prompt = PROMPT.format(
ticker=signal.get("ticker", ""),
fund_name=fund_name,
fund_score=signal.get("fund_score", 0),
fund_category=signal.get("fund_category", "unknown"),
signal_type=signal.get("signal_type", ""),
source_count=signal.get("source_count", 1),
sources=", ".join(signal.get("sources", [signal.get("signal_type", "")])),
strength=signal.get("strength", 0),
conviction=signal.get("conviction", 0.0),
consecutive_quarters=consecutive,
trend_indicator=trend_indicator,
cluster_info=cluster_info,
is_10b5="JA (schwächer)" if signal.get("is_10b5") else "Nein",
price_context=self._format_price_context(signal),
catalyst_info=self._format_catalyst(signal),
options_block=self._format_options_block(signal),
fund_history=self._format_history(history),
historical_accuracy=accuracy,
news_alignment=signal.get("news_alignment", 0.0),
macro_context=signal.get("macro_context", "neutral"),
headlines=headlines,
)
try:
response_text = self._call_api(prompt)
# Strip markdown code fences if present
response_text = re.sub(r"^```(?:json)?\s*", "", response_text)
response_text = re.sub(r"\s*```$", "", response_text)
result = json.loads(response_text.strip())
result["ticker"] = signal.get("ticker", "")
result["fund_name"] = fund_name
result["raw_signal"] = signal
action = result.get("action", "kein_trade")
conf = result.get("confidence", 0.0)
logger.info(f" Claude → {result['ticker']}: {action} ({conf:.2f})")
return result
except json.JSONDecodeError as e:
logger.error(f"JSON-Fehler: {e}")
return {
"action": "kein_trade",
"confidence": 0.0,
"ticker": signal.get("ticker", ""),
"reasoning": "JSON-Parse-Fehler",
"raw_signal": signal
}
except Exception as e:
logger.error(f"Claude API: {e}")
return {
"action": "kein_trade",
"confidence": 0.0,
"ticker": signal.get("ticker", ""),
"reasoning": str(e),
"raw_signal": signal
}
def analyze_batch(self, signals: List[Dict], news_map: Dict) -> List[Dict]:
"""Mehrere Signale analysieren."""
results = []
for sig in signals:
news = news_map.get(sig.get("ticker", ""), [])
result = self.analyze(sig, news)
results.append(result)
trades = [r for r in results if r.get("action") == "trade"]
watchlist = [r for r in results if r.get("action") == "watchlist"]
no_trade = [r for r in results if r.get("action") == "kein_trade"]
logger.info(
f"Claude-Batch: {len(trades)} TRADE · "
f"{len(watchlist)} WATCHLIST · {len(no_trade)} KEIN"
)
return results
================================================
FILE: src/alerts/__init__.py
================================================
"""
Alerts-Layer: E-Mail-Versand mit Apple-Style HTML.
"""
================================================
FILE: src/alerts/email_sender.py
================================================
# src/alerts/email_sender.py
"""
E-Mail-Versand mit Apple-Style HTML-Design.
Sektionen:
- Trades (action=trade)
- Watchlist (action=watchlist)
- Exits (Exit-Trigger ausgelöst)
- Source Health Warnings
- Footer mit Stats
"""
import os
import smtplib
from datetime import datetime
from typing import List, Dict
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from src.utils.logger import logger
GMAIL_USER = os.environ.get("GMAIL_USER", "")
GMAIL_PASSWORD = os.environ.get("GMAIL_PASSWORD", "")
RECIPIENT = os.environ.get("RECIPIENT_EMAIL", "")
# ── HTML Templates (Apple-inspiriert) ────────────────────────────────
CSS = """
"""
def _format_signal(sig: Dict) -> str:
"""Formatiert ein Signal als HTML-Card."""
action = sig.get("action", "kein_trade")
ticker = sig.get("ticker", "?")
confidence = sig.get("confidence", 0)
reasoning = sig.get("reasoning", "Keine Begründung")
instrument = sig.get("suggested_instrument", "")
options_rationale = sig.get("options_rationale", "")
position_size = sig.get("position_size_pct", 0)
sizing_logic = sig.get("position_sizing_logic", "")
args = sig.get("key_arguments", [])
risks = sig.get("risk_factors", [])
raw = sig.get("raw_signal", {})
fund_name = raw.get("fund_name", sig.get("fund_name", "Unknown"))
consecutive = raw.get("consecutive_quarters", 0)
css_class = "trade" if action == "trade" else ("watchlist" if action == "watchlist" else "no-trade")
badge_class = "trade" if action == "trade" else ("watch" if action == "watchlist" else "neutral")
badge_text = action.upper().replace("_", " ")
args_html = "".join(f"{a}" for a in args) if args else ""
risks_html = "".join(f'{r}' for r in risks) if risks else ""
consec_str = f" · {consecutive}Q in Folge ✓" if consecutive >= 2 else ""
instrument_html = ""
if instrument:
instrument_html = f'{instrument}'
if options_rationale:
instrument_html += f"
{options_rationale}"
instrument_html += "
"
sizing_html = ""
if action == "trade" and position_size:
sizing_html = (
f''
f'Size: {position_size:.1f}%'
f'Conf: {confidence:.0%}'
f'
'
f'{sizing_logic}
'
)
return f"""
{fund_name}{consec_str}
{reasoning}
{f'
' if args_html else ''}
{f'
' if risks_html else ''}
{instrument_html}
{sizing_html}
"""
def _format_exit(exit_data: Dict) -> str:
"""Formatiert eine Exit-Empfehlung."""
pos = exit_data["position"]
trigger = exit_data["trigger"]
reason = trigger["reason"]
pnl = trigger.get("pnl_pct", 0)
days_left = trigger.get("days_left", 0)
pnl_class = "pnl-pos" if pnl > 0 else "pnl-neg"
css_class = "exit-tp" if reason == "take_profit" else "exit"
badge_class = "tp" if reason == "take_profit" else "exit"
reason_label = {
"take_profit": "TAKE PROFIT",
"stop_loss": "STOP LOSS",
"time_exit": "TIME EXIT",
"partial_take": "PARTIAL TAKE",
}.get(reason, reason.upper())
return f"""
Strike ${pos.get('strike', 0):.2f} · Exp {pos.get('expiry', '?')} ·
{days_left}d remaining
{trigger.get('message', '')}
Entry: ${pos.get('entry_price_option', 0):.2f} →
Current: ${trigger.get('current_mid', 0):.2f}
({pnl:+.1f}%)
"""
def _format_warning(warning: Dict) -> str:
"""Source-Health-Warning."""
return f"""
⚠️ {warning['source']} liefert seit {warning['days']} Tagen
keine Daten mehr. Bitte prüfen.
"""
def build_html(
trades: List[Dict],
watchlist: List[Dict],
exits: List[Dict],
no_trades: List[Dict],
warnings: List[Dict],
stats: Dict,
run_mode: str,
) -> str:
"""Baut komplette HTML-Email."""
today = datetime.utcnow().strftime("%A, %d. %B %Y")
# Sections
trade_section = ""
if trades:
trade_html = "".join(_format_signal(t) for t in trades)
trade_section = f"""
📈 Trade-Empfehlungen ({len(trades)})
{trade_html}
"""
watch_section = ""
if watchlist:
watch_html = "".join(_format_signal(w) for w in watchlist)
watch_section = f"""
👁️ Watchlist ({len(watchlist)})
{watch_html}
"""
exit_section = ""
if exits:
exit_html = "".join(_format_exit(e) for e in exits)
exit_section = f"""
🚪 Exit-Trigger ({len(exits)})
{exit_html}
"""
warning_section = ""
if warnings:
warning_html = "".join(_format_warning(w) for w in warnings)
warning_section = f"""
⚠️ Datenquellen-Warnungen
{warning_html}
"""
no_trade_section = ""
if no_trades and run_mode == "weekly_full":
# Zeigt no-trades nur im Weekly-Full
nt_html = "".join(_format_signal(nt) for nt in no_trades[:5])
no_trade_section = f"""
⚪ Verworfene Signale ({len(no_trades)} total, top 5)
{nt_html}
"""
# Empty state
if not trades and not watchlist and not exits:
empty_html = """
Keine handelbaren Signale heute.
Disziplin > Aktivität.
"""
else:
empty_html = ""
# Stats footer
stats_html = (
f"Run: {run_mode} · "
f"Signals raw: {stats.get('total', 0)} · "
f"Filter passed: {stats.get('filtered', 0)} · "
f"Claude analyzed: {stats.get('analyzed', 0)} · "
f"Trades: {len(trades)} · Watchlist: {len(watchlist)}"
)
return f"""
Smart Money Scanner
{CSS}
{warning_section}
{exit_section}
{trade_section}
{watch_section}
{empty_html}
{no_trade_section}
"""
def send_email(subject: str, html: str) -> bool:
"""Versendet E-Mail via Gmail SMTP."""
if not GMAIL_USER or not GMAIL_PASSWORD or not RECIPIENT:
logger.error("E-Mail-Credentials fehlen")
return False
try:
msg = MIMEMultipart("alternative")
msg["Subject"] = subject
msg["From"] = GMAIL_USER
msg["To"] = RECIPIENT
msg.attach(MIMEText(html, "html", "utf-8"))
with smtplib.SMTP_SSL("smtp.gmail.com", 465, timeout=30) as server:
server.login(GMAIL_USER, GMAIL_PASSWORD)
server.sendmail(GMAIL_USER, RECIPIENT, msg.as_string())
logger.info(f"E-Mail gesendet an {RECIPIENT}")
return True
except Exception as e:
logger.error(f"E-Mail Fehler: {e}")
return False
def send_report(
trades: List[Dict] = None,
watchlist: List[Dict] = None,
exits: List[Dict] = None,
no_trades: List[Dict] = None,
warnings: List[Dict] = None,
stats: Dict = None,
run_mode: str = "scan",
) -> bool:
"""High-Level: Bericht zusammenstellen + senden."""
trades = trades or []
watchlist = watchlist or []
exits = exits or []
no_trades = no_trades or []
warnings = warnings or []
stats = stats or {}
# Subject
parts = []
if exits:
parts.append(f"{len(exits)}🚪")
if trades:
parts.append(f"{len(trades)}📈")
if watchlist:
parts.append(f"{len(watchlist)}👁")
subject = (
f"Smart Money: {' · '.join(parts)}" if parts
else f"Smart Money: keine Signale"
)
html = build_html(trades, watchlist, exits, no_trades, warnings, stats, run_mode)
return send_email(subject, html)
================================================
FILE: src/enrich/__init__.py
================================================
"""
Enrich-Layer: Anreicherung der Rohdaten.
- price_context: aktueller Kurs, MA50, 52W-Range
- catalyst_finder: Earnings-Termine
- options_prefilter: Tradier-basierte Pre-Filterung
- sentiment: News-Sentiment
"""
================================================
FILE: src/enrich/catalyst_finder.py
================================================
# src/enrich/catalyst_finder.py
"""
Catalyst-Finder: sucht Earnings-Termine im Options-Laufzeitfenster.
Quelle: Yahoo Finance Calendar.
Bewertung:
- Optimal (30-90 Tage): +0.20 conviction
- Zu nah (<30 Tage): -0.15 (IV bereits aufgebläht)
- Zu weit/keine: -0.10
"""
import requests
from typing import Optional, Dict
from datetime import datetime, date, timedelta
from src.utils.logger import logger
from src.utils.retry import retry
from src.utils.config import get_threshold
HEADERS_YAHOO = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}
@retry(times=2, delay=3)
def get_earnings_date(ticker: str) -> Optional[date]:
"""Yahoo Finance Calendar für nächstes Earnings-Datum."""
if not ticker or ticker in ("UNKNOWN", "PORTFOLIO", ""):
return None
try:
url = f"https://query1.finance.yahoo.com/v10/finance/quoteSummary/{ticker}"
params = {"modules": "calendarEvents"}
resp = requests.get(url, params=params, headers=HEADERS_YAHOO, timeout=10)
if resp.status_code != 200:
return None
data = resp.json()
earnings_dates = (
data.get("quoteSummary", {})
.get("result", [{}])[0]
.get("calendarEvents", {})
.get("earnings", {})
.get("earningsDate", [])
)
if earnings_dates:
ts = earnings_dates[0]["raw"]
return datetime.utcfromtimestamp(ts).date()
except Exception as e:
logger.debug(f"Earnings {ticker}: {e}")
return None
def catalyst_score(ticker: str, expiry_date: Optional[date] = None) -> Dict:
"""
Bewertet Earnings-Catalyst für eine Options-Position.
Args:
ticker: Aktien-Ticker
expiry_date: Geplantes Options-Expiry (optional)
Returns:
Dict mit has_catalyst, type, date, days_away, conviction_modifier
"""
earnings = get_earnings_date(ticker)
if not earnings:
return {
"has_catalyst": False,
"conviction_modifier": get_threshold("catalyst", "penalty_no_catalyst", -0.10),
"summary": "Kein Earnings-Datum gefunden"
}
today = datetime.utcnow().date()
days_away = (earnings - today).days
# Falls Expiry vorgegeben: muss Earnings davor liegen
if expiry_date:
days_to_expiry = (expiry_date - today).days
if days_away > days_to_expiry:
return {
"has_catalyst": False,
"type": "earnings_after_expiry",
"conviction_modifier": get_threshold("catalyst", "penalty_no_catalyst", -0.10),
"summary": f"Earnings ({earnings}) liegt nach Expiry"
}
too_close = get_threshold("catalyst", "earnings_too_close_days", 30)
optimal_min = get_threshold("catalyst", "earnings_optimal_min_days", 30)
optimal_max = get_threshold("catalyst", "earnings_optimal_max_days", 90)
if days_away < too_close:
return {
"has_catalyst": True,
"type": "earnings_too_close",
"date": str(earnings),
"days_away": days_away,
"conviction_modifier": get_threshold("catalyst", "penalty_too_close", -0.15),
"summary": f"Earnings in {days_away}d - zu nah, IV aufgebläht"
}
if optimal_min <= days_away <= optimal_max:
return {
"has_catalyst": True,
"type": "earnings_optimal",
"date": str(earnings),
"days_away": days_away,
"conviction_modifier": get_threshold("catalyst", "bonus_optimal", 0.20),
"summary": f"Earnings in {days_away}d ✓ optimal"
}
return {
"has_catalyst": True,
"type": "earnings_far",
"date": str(earnings),
"days_away": days_away,
"conviction_modifier": get_threshold("catalyst", "bonus_acceptable", 0.05),
"summary": f"Earnings in {days_away}d - akzeptabel"
}
================================================
FILE: src/enrich/macro_context.py
================================================
# src/enrich/macro_context.py
"""
Makro-Kontext via Polymarket + Kalshi.
Liefert "bullish" / "neutral" / "bearish" für aktuelles Marktumfeld.
"""
import os
import requests
from typing import Dict, List
from src.utils.logger import logger
from src.utils.retry import retry
HEADERS = {
"User-Agent": f"SmartMoneyScanner {os.environ.get('GMAIL_USER', 'scanner@example.com')}"
}
@retry(times=2, delay=5)
def _polymarket(keywords: List[str]) -> List[Dict]:
try:
resp = requests.get(
"https://gamma-api.polymarket.com/markets?closed=false&limit=100",
headers=HEADERS, timeout=15
)
out = []
for m in resp.json():
q = m.get("question", "").lower()
if any(k.lower() in q for k in keywords):
out.append({
"source": "polymarket",
"question": m.get("question", ""),
"probability": m.get("outcomePrices", [None])[0],
})
return out[:4]
except Exception:
return []
@retry(times=2, delay=5)
def _kalshi(keywords: List[str]) -> List[Dict]:
try:
resp = requests.get(
"https://trading-api.kalshi.com/trade-api/v2/markets?limit=100&status=open",
headers=HEADERS, timeout=15
)
out = []
for m in resp.json().get("markets", []):
t = m.get("title", "").lower()
if any(k.lower() in t for k in keywords):
out.append({
"source": "kalshi",
"question": m.get("title", ""),
"probability": m.get("last_price"),
})
return out[:4]
except Exception:
return []
def get_macro_context(ticker: str, sector: str = "") -> Dict:
"""Holt Makro-Kontext aus Prediction Markets."""
kw = [k for k in [ticker, sector, "interest rate", "fed", "regulation", "inflation"] if k]
markets = []
try:
markets += _polymarket(kw)
except Exception as e:
logger.warning(f"Polymarket: {e}")
try:
markets += _kalshi(kw)
except Exception as e:
logger.warning(f"Kalshi: {e}")
if not markets:
return {"context": "neutral", "summary": "Keine Märkte gefunden", "markets": []}
bullish, bearish = 0, 0
for m in markets:
try:
p = float(m.get("probability") or 0)
q = m.get("question", "").lower()
if p > 0.65:
if any(w in q for w in ["cut", "lower", "approve", "win", "bullish"]):
bullish += 1
elif any(w in q for w in ["hike", "ban", "regulation", "bearish", "fail"]):
bearish += 1
except (ValueError, TypeError):
pass
ctx = "bullish" if bullish > bearish else ("bearish" if bearish > bullish else "neutral")
summary = " | ".join(
f"{m['source']}: {m['question'][:55]} ({m['probability']})"
for m in markets[:3]
)
return {"context": ctx, "summary": summary, "markets": markets}
================================================
FILE: src/enrich/options_prefilter.py
================================================
"""
Options Pre-Filter mit Tradier-Daten.
LÄUFT VOR CLAUDE - blockiert Signale die keine handelbaren Options haben.
WICHTIG (nach Schnell-Fix):
- "no_quote" und "no_qualified_strike" sind jetzt SOFT → Pipeline stirbt nicht mehr.
- Nur echte Hard-Kills (z.B. IV-Rank > 70) blocken noch.
"""
from typing import Dict, Optional
from datetime import datetime, timedelta
from src.utils.logger import logger
from src.utils.config import get_threshold
from src.execution.tradier_client import get_client
def calculate_iv_rank(ticker: str, current_iv: float) -> float:
"""Echte IV-Rank-Berechnung via Tradier History. Fallback 50.0."""
client = get_client()
if not client.is_configured:
return 50.0
try:
start = (datetime.utcnow() - timedelta(days=365)).strftime("%Y-%m-%d")
end = datetime.utcnow().strftime("%Y-%m-%d")
history = client.get_history(ticker, interval="weekly", start=start, end=end)
if not history:
return 50.0
closes = [float(d.get("close", 0)) for d in history if d.get("close")]
if len(closes) < 10:
return 50.0
min_p, max_p = min(closes), max(closes)
if min_p <= 0:
return 50.0
price_range_pct = (max_p - min_p) / min_p * 100
if price_range_pct == 0:
return 50.0
iv_rank = min((current_iv / price_range_pct) * 50, 99.0)
return round(iv_rank, 1)
except Exception:
return 50.0
def find_target_expiry(ticker: str) -> Optional[str]:
"""Findet beste Expiry im 90-180 Tage Fenster."""
client = get_client()
expirations = client.get_expirations(ticker)
if not expirations:
return None
today = datetime.utcnow().date()
min_days = get_threshold("options", "min_days_to_exp", 90)
max_days = get_threshold("options", "max_days_to_exp", 180)
candidates = []
for exp in expirations:
try:
exp_date = datetime.strptime(exp, "%Y-%m-%d").date()
days = (exp_date - today).days
if min_days <= days <= max_days:
candidates.append((days, exp))
except ValueError:
continue
if not candidates:
return None
candidates.sort(key=lambda x: abs(x[0] - 120))
return candidates[0][1]
def find_best_call(ticker: str, stock_price: float, expiration: str) -> Optional[Dict]:
"""Findet besten Call-Strike: 5-15% OTM, Delta 0.35-0.45."""
client = get_client()
chain = client.get_options_chain(ticker, expiration, with_greeks=True)
if not chain:
return None
otm_min = get_threshold("options", "target_otm_min_pct", 0.05)
otm_max = get_threshold("options", "target_otm_max_pct", 0.15)
target_low = stock_price * (1 + otm_min)
target_high = stock_price * (1 + otm_max)
calls = [
c for c in chain
if c.get("option_type") == "call"
and target_low <= float(c.get("strike", 0)) <= target_high
]
if not calls:
return None
min_oi = get_threshold("options", "min_open_interest", 500)
max_spread = get_threshold("options", "max_spread_pct", 4.0)
best = None
best_score = -1
for opt in calls:
bid = float(opt.get("bid") or 0)
ask = float(opt.get("ask") or 0)
oi = int(opt.get("open_interest") or 0)
if bid <= 0 or ask <= 0 or oi < min_oi:
continue
mid = (bid + ask) / 2
spread_pct = (ask - bid) / mid * 100 if mid > 0 else 999
if spread_pct > max_spread:
continue
score = oi / 1000 + (max_spread - spread_pct)
if score > best_score:
best_score = score
greeks = opt.get("greeks", {}) or {}
iv = float(greeks.get("smv_vol") or 0) * 100
best = {
"strike": float(opt.get("strike", 0)),
"bid": bid, "ask": ask, "mid": round(mid, 2),
"open_interest": oi, "spread_pct": round(spread_pct, 2),
"iv": round(iv, 1),
"delta": float(greeks.get("delta") or 0),
"vega": float(greeks.get("vega") or 0),
"theta": float(greeks.get("theta") or 0),
}
return best
def options_prefilter(ticker: str) -> Dict:
"""HAUPTFUNKTION – jetzt mit Soft-Fails."""
if not ticker or ticker in ("UNKNOWN", "PORTFOLIO", ""):
return {"passed": False, "kill_reason": "no_ticker"}
client = get_client()
if not client.is_configured:
logger.warning(f"Tradier not configured - skipping options check for {ticker}")
return {"passed": True, "kill_reason": None, "options_data": None,
"iv_rank": 50.0, "summary": "Tradier nicht konfiguriert"}
# 1. Quote – jetzt SOFT
quote = client.get_quote(ticker)
if not quote:
logger.warning(f"⚠️ {ticker}: Tradier get_quote failed → SOFT FAIL (Claude entscheidet)")
return {
"passed": True,
"kill_reason": "no_quote_soft",
"options_data": None,
"iv_rank": 50.0,
"summary": "Quote nicht verfügbar – Claude entscheidet ohne Options-Daten"
}
stock_price = float(quote.get("last") or quote.get("close") or 0)
if stock_price <= 0:
return {"passed": False, "kill_reason": "invalid_price"}
# 2. Expiry
target_exp = find_target_expiry(ticker)
if not target_exp:
return {"passed": False, "kill_reason": "no_expiry_in_window"}
# 3. Best Call – ebenfalls SOFT
call = find_best_call(ticker, stock_price, target_exp)
if not call:
logger.info(f" {ticker}: Kein qualifizierter Strike → SOFT FAIL")
return {
"passed": True,
"kill_reason": "no_qualified_strike_soft",
"options_data": None,
"iv_rank": 50.0,
"summary": "Kein passender Call gefunden – Claude entscheidet"
}
# 4. IV-Rank (Hard-Kill bleibt)
iv_rank = calculate_iv_rank(ticker, call["iv"])
if iv_rank > get_threshold("options", "iv_rank_kill", 70):
return {"passed": False, "kill_reason": f"iv_rank_too_high_{iv_rank}", "iv_rank": iv_rank}
# Success
today = datetime.utcnow().date()
exp_date = datetime.strptime(target_exp, "%Y-%m-%d").date()
days_to_exp = (exp_date - today).days
return {
"passed": True,
"kill_reason": None,
"options_data": {
**call,
"expiry": target_exp,
"days_to_exp": days_to_exp,
"stock_price": stock_price,
"iv_rank": iv_rank,
},
"iv_rank": iv_rank,
"summary": (
f"Call ${call['strike']:.0f} Exp {target_exp} ({days_to_exp}d) "
f"| Mid ${call['mid']} | IV-Rank {iv_rank} | OI {call['open_interest']}"
)
}
def conviction_modifier_for_iv(iv_rank: float) -> float:
"""Dynamischer IV-Rank-Modifier."""
ideal = get_threshold("options", "iv_rank_ideal", 35)
acceptable = get_threshold("options", "iv_rank_acceptable", 50)
risky = get_threshold("options", "iv_rank_risky", 70)
if iv_rank <= ideal:
return +0.05
elif iv_rank <= acceptable:
return -0.10
elif iv_rank <= risky:
return -0.20
return -0.30
def post_claude_options_check(ticker: str) -> Dict:
"""
Wird NUR für finale trade-Kandidaten aufgerufen (nach Claude).
Macht den echten Options-Check und gibt entweder qualifizierte Daten oder Soft-Fail zurück.
"""
result = options_prefilter(ticker) # benutzt die bestehende Funktion
if result.get("passed"):
return result
# Soft-Fallback
kill_reason = result.get("kill_reason", "unknown")
logger.warning(
f"Post-Claude Options-Check {ticker} → {kill_reason} → "
f"Trade wird trotzdem akzeptiert (Claude hat schon entschieden)"
)
return {
"passed": True,
"kill_reason": "post_claude_soft",
"options_data": None,
"iv_rank": 50.0,
"summary": f"Options-Check fehlgeschlagen ({kill_reason}) – Trade trotzdem ausführen"
}
================================================
FILE: src/enrich/price_context.py
================================================
# src/enrich/price_context.py
"""
Preis-Kontext für einen Ticker:
- Aktueller Kurs
- Abstand zu MA50
- Position vs. 52W-Range
- Relatives Volumen
Quelle: Yahoo Finance (kostenlos, zuverlässig)
"""
import requests
from typing import Dict, Optional
from datetime import datetime, timedelta
from src.utils.logger import logger
from src.utils.retry import retry
HEADERS_YAHOO = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/134.0.0.0 Safari/537.36"
}
@retry(times=2, delay=5)
def _fetch_history(ticker: str, period: str = "1y") -> Optional[Dict]:
"""Yahoo Chart API für Kurshistorie."""
try:
url = f"https://query1.finance.yahoo.com/v8/finance/chart/{ticker}"
params = {"interval": "1d", "range": period}
resp = requests.get(url, headers=HEADERS_YAHOO, params=params, timeout=15)
if resp.status_code != 200:
return None
return resp.json()
except Exception:
return None
def get_price_context(ticker: str) -> Dict:
"""
Holt vollen Preis-Kontext.
Returns:
Dict mit: price, ma50, vs_ma50_pct, low_52w, high_52w,
vs_low_52w_pct, vs_high_52w_pct, rel_volume
"""
if not ticker or ticker in ("UNKNOWN", "PORTFOLIO", ""):
return {}
data = _fetch_history(ticker, "1y")
if not data:
return {}
try:
result = data["chart"]["result"][0]
quote = result["indicators"]["quote"][0]
closes = [c for c in quote.get("close", []) if c is not None]
volumes = [v for v in quote.get("volume", []) if v is not None]
if not closes or len(closes) < 50:
return {}
current_price = closes[-1]
ma50 = sum(closes[-50:]) / 50
low_52w = min(closes)
high_52w = max(closes)
# Volume
avg_volume_30d = (
sum(volumes[-30:]) / len(volumes[-30:])
if len(volumes) >= 30 else 0
)
current_volume = volumes[-1] if volumes else 0
rel_volume = (
current_volume / avg_volume_30d
if avg_volume_30d > 0 else 1.0
)
return {
"price": round(current_price, 2),
"ma50": round(ma50, 2),
"vs_ma50_pct": round((current_price - ma50) / ma50 * 100, 1),
"low_52w": round(low_52w, 2),
"high_52w": round(high_52w, 2),
"vs_low_52w_pct": round((current_price - low_52w) / low_52w * 100, 1),
"vs_high_52w_pct": round((current_price - high_52w) / high_52w * 100, 1),
"rel_volume": round(rel_volume, 2),
}
except Exception as e:
logger.warning(f"Preis-Kontext {ticker}: {e}")
return {}
def format_for_prompt(context: Dict) -> str:
"""Formatiert für Claude-Prompt."""
if not context:
return " Preis-Kontext: nicht verfügbar"
lines = [
f" Aktueller Kurs: ${context['price']}",
f" vs. 52W-Tief: {context['vs_low_52w_pct']:+.1f}%",
f" vs. 52W-Hoch: {context['vs_high_52w_pct']:+.1f}%",
f" vs. MA50: {context['vs_ma50_pct']:+.1f}%",
f" Rel. Volumen: {context['rel_volume']}x",
]
# Interpretation
interp = []
if context['vs_low_52w_pct'] < 25:
interp.append("nahe 52W-Tief ✓")
if context['vs_ma50_pct'] < 0:
interp.append("unter MA50 ✓")
if context['rel_volume'] > 1.5:
interp.append("erhöhtes Volumen ✓")
if interp:
lines.append(f" → {' | '.join(interp)}")
return "\n".join(lines)
================================================
FILE: src/enrich/sentiment.py
================================================
# src/enrich/sentiment.py
"""
News-Sentiment-Analyse via Phrase-Matching.
Pro 2-6M Calls: nur als Kontext, nicht als Hard-Gate.
Spätere Erweiterung: FinBERT Integration möglich.
"""
from typing import List, Dict
from src.utils.logger import logger
BULLISH_PHRASES = [
"fda approval", "fda approved", "fda clears", "fda grants",
"guidance raised", "raises guidance", "guidance increased",
"beats estimates", "earnings beat", "beat expectations",
"record revenue", "record earnings",
"dividend increase", "buyback program", "share repurchase",
"contract awarded", "contract win",
"upgrade", "price target raised", "outperform",
"strong quarter", "expansion",
]
BEARISH_PHRASES = [
"fda rejection", "fda rejects", "fda warning",
"guidance cut", "guidance lowered", "lowers guidance",
"misses estimates", "earnings miss",
"revenue decline", "revenue miss",
"dividend cut", "dividend suspended",
"downgrade", "price target cut", "underperform",
"sec investigation", "doj probe", "class action",
"bankruptcy", "default", "recall", "product recall",
"layoffs", "restructuring charges",
]
BULLISH_WORDS = ["upgrade", "beat", "buyback", "dividend", "approval"]
BEARISH_WORDS = ["downgrade", "miss", "investigation", "recall", "fraud"]
def calculate(news: List[Dict]) -> float:
"""
Phrase-basiertes Sentiment.
Returns:
Score von -1.0 (sehr negativ) bis +1.0 (sehr positiv)
"""
if not news:
return 0.0
score = 0.0
for article in news:
title = article.get("title", "").lower()
phrase_bull = sum(1 for p in BULLISH_PHRASES if p in title)
phrase_bear = sum(1 for p in BEARISH_PHRASES if p in title)
if phrase_bull == 0 and phrase_bear == 0:
kw_bull = sum(0.5 for w in BULLISH_WORDS if w in title)
kw_bear = sum(0.5 for w in BEARISH_WORDS if w in title)
score += kw_bull - kw_bear
else:
score += phrase_bull - phrase_bear
return max(-1.0, min(1.0, score / len(news)))
================================================
FILE: src/execution/__init__.py
================================================
"""
Execution-Layer: Tradier API Integration
- tradier_client: Wrapper
- exit_manager: Tägliche Position-Checks
"""
================================================
FILE: src/execution/exit_manager.py
================================================
# src/execution/exit_manager.py
"""
Exit-Manager: tägliche Checks aller offenen Positionen.
Triggers:
- Take-Profit bei +80%
- Stop-Loss bei -45%
- Time-Exit bei ≤21 Tagen bis Expiry
- Partial Take bei +50% (50% Position raus)
- Fund-Sell: Fund reduziert Position im letzten 13F um >20%
- Insider-Sell: Form-4 Verkäufe seit Signal-Datum
"""
from datetime import datetime
from typing import List, Dict
from src.utils.logger import logger
from src.utils.config import get_threshold
from src.utils.storage import (
get_open_positions, update_position, close_position,
get_signal_fund_for_position, get_thirteenf_trend, get_form4_sells,
)
from src.execution.tradier_client import get_client
def _calculate_pnl(entry_mid: float, current_mid: float) -> float:
"""P&L in Prozent."""
if entry_mid <= 0:
return 0.0
return (current_mid - entry_mid) / entry_mid * 100
def _days_to_expiry(expiry: str) -> int:
try:
exp_date = datetime.strptime(expiry, "%Y-%m-%d").date()
return (exp_date - datetime.utcnow().date()).days
except Exception:
return 999
def _get_current_option_mid(ticker: str, strike: float, expiry: str) -> float:
"""Aktuellen Mid-Preis der Option holen."""
client = get_client()
if not client.is_configured:
return 0.0
chain = client.get_options_chain(ticker, expiry, with_greeks=False)
for opt in chain:
if opt.get("option_type") != "call":
continue
if abs(float(opt.get("strike", 0)) - strike) < 0.01:
bid = float(opt.get("bid") or 0)
ask = float(opt.get("ask") or 0)
if bid > 0 and ask > 0:
return (bid + ask) / 2
return 0.0
def _check_fund_sold(position: Dict) -> bool:
"""True wenn der ursprüngliche Fund seine Position im letzten 13F um >20% reduziert hat."""
if not get_threshold("exit_rules", "exit_if_fund_sells", True):
return False
fund_name = get_signal_fund_for_position(
position["ticker"], position.get("signal_date", "")
)
if not fund_name:
return False
holdings = get_thirteenf_trend(fund_name, position["ticker"], quarters=2)
if len(holdings) < 2:
return False
latest = holdings[0]["shares"] or 0
previous = holdings[1]["shares"] or 0
if previous > 0 and latest < previous * 0.8:
logger.info(
f" Fund {fund_name} reduzierte {position['ticker']}: "
f"{previous:,}→{latest:,} Aktien ({(1 - latest/previous)*100:.0f}% weniger)"
)
return True
return False
def _check_insider_sold(position: Dict) -> bool:
"""True wenn seit Signal-Datum Insider-Verkäufe für den Ticker registriert wurden."""
if not get_threshold("exit_rules", "exit_if_insider_sells", True):
return False
signal_date = position.get("signal_date", "")
try:
days_open = (datetime.utcnow().date() -
datetime.strptime(signal_date[:10], "%Y-%m-%d").date()).days + 1
except Exception:
days_open = 90
sells = get_form4_sells(position["ticker"], days=days_open)
if sells:
logger.info(
f" Insider-Sell erkannt: {position['ticker']} "
f"({len(sells)} Transaktionen seit {signal_date[:10]})"
)
return True
return False
def check_exit_triggers(position: Dict) -> Dict:
"""
Prüft alle Exit-Trigger für eine Position.
Returns:
Dict mit triggered (bool), reason, action, pnl_pct
"""
entry_mid = position.get("entry_price_option", 0)
strike = position.get("strike", 0)
expiry = position.get("expiry", "")
ticker = position.get("ticker", "")
current_mid = _get_current_option_mid(ticker, strike, expiry)
if current_mid <= 0:
return {"triggered": False, "reason": "no_quote", "action": "hold"}
pnl_pct = _calculate_pnl(entry_mid, current_mid)
days_left = _days_to_expiry(expiry)
tp = get_threshold("exit_rules", "take_profit_pct", 80)
sl = get_threshold("exit_rules", "stop_loss_pct", -45)
partial = get_threshold("exit_rules", "partial_take_pct", 50)
min_days = get_threshold("exit_rules", "min_days_remaining", 21)
# Take-Profit
if pnl_pct >= tp:
return {
"triggered": True,
"reason": "take_profit",
"action": "close_full",
"pnl_pct": pnl_pct,
"current_mid": current_mid,
"days_left": days_left,
"message": f"TP +{pnl_pct:.0f}% — komplett raus"
}
# Stop-Loss
if pnl_pct <= sl:
return {
"triggered": True,
"reason": "stop_loss",
"action": "close_full",
"pnl_pct": pnl_pct,
"current_mid": current_mid,
"days_left": days_left,
"message": f"SL {pnl_pct:.0f}% — komplett raus"
}
# Time-Exit
if days_left <= min_days:
return {
"triggered": True,
"reason": "time_exit",
"action": "close_full",
"pnl_pct": pnl_pct,
"current_mid": current_mid,
"days_left": days_left,
"message": f"Nur noch {days_left}d — Theta frisst, raus"
}
# Partial Take
if pnl_pct >= partial and not position.get("partial_taken"):
return {
"triggered": True,
"reason": "partial_take",
"action": "close_half",
"pnl_pct": pnl_pct,
"current_mid": current_mid,
"days_left": days_left,
"message": f"+{pnl_pct:.0f}% — 50% raus, Rest laufen lassen"
}
# Fund/Insider sell exits (config-gesteuert)
if _check_fund_sold(position):
return {
"triggered": True,
"reason": "fund_sold",
"action": "close_full",
"pnl_pct": pnl_pct,
"current_mid": current_mid,
"days_left": days_left,
"message": "Originating fund hat Position im letzten 13F reduziert — raus"
}
if _check_insider_sold(position):
return {
"triggered": True,
"reason": "insider_sold",
"action": "close_full",
"pnl_pct": pnl_pct,
"current_mid": current_mid,
"days_left": days_left,
"message": "Insider-Verkäufe seit Signal-Datum erkannt — raus"
}
return {
"triggered": False,
"reason": "monitor",
"action": "hold",
"pnl_pct": pnl_pct,
"current_mid": current_mid,
"days_left": days_left
}
def run_exit_check() -> List[Dict]:
"""
Hauptfunktion: prüft alle offenen Positionen.
Returns:
Liste der Exit-Empfehlungen für E-Mail
"""
positions = get_open_positions()
if not positions:
logger.info("Keine offenen Positionen")
return []
logger.info(f"Exit-Check: {len(positions)} offene Positionen")
exits = []
for pos in positions:
result = check_exit_triggers(pos)
# Auch bei Hold: P&L updaten
if "current_mid" in result:
update_position(pos["id"], {
"current_option_mid": result["current_mid"],
"unrealized_pnl_pct": result.get("pnl_pct", 0),
})
if result["triggered"]:
logger.info(
f" EXIT-TRIGGER {pos['ticker']}: {result['reason']} "
f"({result['pnl_pct']:.0f}%)"
)
exits.append({
"position": pos,
"trigger": result
})
# Bei full close: in DB schließen
if result["action"] == "close_full":
close_position(pos["id"], {
"exit_reason": result["reason"],
"exit_price": result["current_mid"],
"realized_pnl_pct": result["pnl_pct"],
"realized_pnl_after_taxes": (
result["pnl_pct"] *
(1 - get_threshold("backtest", "tax_rate_short_term", 0.35))
),
})
return exits
================================================
FILE: src/execution/tradier_client.py
================================================
"""
Tradier API Client.
Wrapper für alle Tradier-Endpoints.
Erlaubt einfaches Mocking für Tests.
"""
import os
import requests
from typing import Dict, List, Optional
from datetime import datetime
from src.utils.logger import logger
from src.utils.retry import retry
class TradierClient:
"""Tradier Pro API Wrapper."""
BASE_URL = "https://api.tradier.com/v1"
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or os.environ.get("TRADIER_API_KEY", "").strip()
if not self.api_key:
logger.warning("TRADIER_API_KEY nicht gesetzt!")
self.headers = {
"Accept": "application/json",
"Authorization": f"Bearer {self.api_key}"
}
@property
def is_configured(self) -> bool:
return bool(self.api_key)
# ── Markets / Quotes ─────────────────────────────────────────────
@retry(times=3, delay=2, backoff=1.5)
def get_quote(self, ticker: str) -> Optional[Dict]:
"""Einzel-Quote für einen Ticker – mit besserer Diagnose."""
if not self.is_configured:
logger.warning(f"Tradier nicht konfiguriert für Quote {ticker}")
return None
try:
resp = requests.get(
f"{self.BASE_URL}/markets/quotes",
params={"symbols": ticker},
headers=self.headers,
timeout=10
)
if resp.status_code != 200:
logger.warning(f"Tradier quote {ticker} → HTTP {resp.status_code} | {resp.text[:200]}")
return None
data = resp.json().get("quotes", {}).get("quote")
if isinstance(data, list):
data = data[0] if data else None
if not data or (not data.get("last") and not data.get("close")):
logger.warning(f"Tradier quote {ticker} → leere Response")
return None
return data
except Exception as e:
logger.error(f"Tradier quote {ticker} EXCEPTION: {e}")
return None
# ── Restliche Methoden (unverändert) ──────────────────────────────
@retry(times=2, delay=3)
def get_expirations(self, ticker: str) -> List[str]:
"""Verfügbare Options-Ablaufdaten."""
if not self.is_configured:
return []
try:
resp = requests.get(
f"{self.BASE_URL}/markets/options/expirations",
params={"symbol": ticker},
headers=self.headers,
timeout=10
)
if resp.status_code != 200:
return []
return (resp.json().get("expirations") or {}).get("date", [])
except Exception as e:
logger.warning(f"Tradier expirations {ticker}: {e}")
return []
@retry(times=2, delay=3)
def get_options_chain(self, ticker: str, expiration: str, with_greeks: bool = True) -> List[Dict]:
"""Komplette Options-Chain für eine Expiration."""
if not self.is_configured:
return []
try:
resp = requests.get(
f"{self.BASE_URL}/markets/options/chains",
params={
"symbol": ticker,
"expiration": expiration,
"greeks": "true" if with_greeks else "false"
},
headers=self.headers,
timeout=15
)
if resp.status_code != 200:
return []
options = (resp.json().get("options") or {}).get("option", [])
if isinstance(options, dict):
options = [options]
return options
except Exception as e:
logger.warning(f"Tradier chain {ticker} {expiration}: {e}")
return []
@retry(times=2, delay=3)
def get_history(self, ticker: str, interval: str = "weekly", start: Optional[str] = None, end: Optional[str] = None) -> List[Dict]:
"""Historische Kurse für IV-Rank-Berechnung."""
if not self.is_configured:
return []
try:
params = {"symbol": ticker, "interval": interval}
if start: params["start"] = start
if end: params["end"] = end
resp = requests.get(
f"{self.BASE_URL}/markets/history",
params=params,
headers=self.headers,
timeout=10
)
if resp.status_code != 200:
return []
data = (resp.json().get("history") or {}).get("day", [])
if isinstance(data, dict):
data = [data]
return data
except Exception as e:
logger.warning(f"Tradier history {ticker}: {e}")
return []
_client: Optional[TradierClient] = None
def get_client() -> TradierClient:
global _client
if _client is None:
_client = TradierClient()
return _client
================================================
FILE: src/ingest/__init__.py
================================================
"""
Ingest-Layer: Datenquellen.
Jedes Modul implementiert ein einfaches Interface:
- fetch() -> List[Dict]
- Eigene Fehlerbehandlung
- Kein State außerhalb der Funktion
Diese Modul können isoliert getestet und ersetzt werden.
"""
================================================
FILE: src/ingest/eight_k_fetcher.py
================================================
# src/ingest/eight_k_fetcher.py
import os
import re
import requests
import xml.etree.ElementTree as ET
from typing import List, Dict
from src.utils.logger import logger
from src.utils.retry import retry
from src.utils.ticker_resolver import resolve_ticker
HEADERS = {
"User-Agent": os.environ.get('EDGAR_USER_AGENT', 'SmartMoneyScanner contact@example.com'),
"Accept-Encoding": "gzip, deflate"
}
# Item-Nummern und ihre Signal-Qualität (0-100)
# Hohe Scores = materiell für Kurs-Moves, niedrige = routinemäßig
_ITEM_SCORES: Dict[str, int] = {
"2.01": 85, # Completion of Acquisition or Disposition
"5.01": 80, # Changes in Control
"1.01": 70, # Material Definitive Agreement
"2.02": 70, # Results of Operations (Earnings)
"5.02": 65, # Director/Officer Changes (oft mit Insider-Info)
"1.02": 60, # Termination of Material Agreement
"4.01": 55, # Auditor Change
"4.02": 55, # Auditor Disclosure
"8.01": 55, # Other Events
"2.03": 55, # Creation of Direct Financial Obligation
"5.03": 50, # Amendments to Charter/Bylaws
"7.01": 40, # Regulation FD Disclosure (oft nur PR)
"9.01": 20, # Financial Statements (reines Anhang-Filing)
}
_DEFAULT_ITEM_SCORE = 55 # Fallback wenn Items nicht parsebar
def _extract_cik_from_url(url: str) -> int:
match = re.search(r'/data/([0-9]+)/', url)
if match:
try:
return int(match.group(1))
except ValueError:
return 0
return 0
def _parse_item_score(summary: str) -> int:
"""
Parst Item-Nummern aus dem SEC-RSS-Summary-Text.
Beispiel-Summary: "...Items: 2.02, 9.01..."
Gibt den höchsten Item-Score zurück.
"""
if not summary:
return _DEFAULT_ITEM_SCORE
items = re.findall(r'\b(\d\.\d{2})\b', summary)
if not items:
return _DEFAULT_ITEM_SCORE
scores = [_ITEM_SCORES.get(item, _DEFAULT_ITEM_SCORE) for item in items]
return max(scores)
@retry(times=3, delay=5)
def fetch() -> List[Dict]:
"""Holt aktuelle 8-K Filings der SEC inkl. item_score."""
url = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type=8-K&count=40&output=atom"
try:
resp = requests.get(url, headers=HEADERS, timeout=20)
resp.raise_for_status()
root = ET.fromstring(resp.text)
ns = {"atom": "http://www.w3.org/2005/Atom"}
entries = []
for entry in root.findall("atom:entry", ns):
title = entry.findtext("atom:title", default="", namespaces=ns)
summary = entry.findtext("atom:summary", default="", namespaces=ns)
link_node = entry.find("atom:link", ns)
link = link_node.attrib.get("href", "") if link_node is not None else ""
updated = entry.findtext("atom:updated", default="", namespaces=ns)
cik = _extract_cik_from_url(link)
ticker = resolve_ticker(cik=cik, title=title)
if ticker != "UNKNOWN":
item_score = _parse_item_score(summary)
entries.append({
"ticker": ticker,
"filed": updated[:10],
"title": title,
"url": link,
"type": "8-K",
"item_score": item_score,
})
logger.info(f"8-K Fetcher: {len(entries)} relevante Filings gefunden.")
return entries
except Exception as e:
logger.error(f"Fehler beim 8-K Fetch: {e}")
return []
================================================
FILE: src/ingest/form4_fetcher.py
================================================
# src/ingest/form4_fetcher.py
"""
SEC EDGAR Form 4 Fetcher mit Clustered Insider Detection.
Output: List[Dict] mit Keys:
ticker, filed, title, summary, url, is_10b5,
is_clustered, cluster_size, cross_day_count
"""
import os
import re
import requests
import xml.etree.ElementTree as ET
from collections import defaultdict
from datetime import datetime
from typing import List, Dict
from src.utils.logger import logger
from src.utils.retry import retry
from src.utils.ticker_resolver import resolve_ticker
from src.utils.storage import (
save_form4_trades,
get_recent_form4_by_ticker,
cleanup_old_form4
)
HEADERS = {
"User-Agent": os.environ.get('EDGAR_USER_AGENT', 'SmartMoneyScanner contact@example.com'),
"Accept-Encoding": "gzip, deflate"
}
def _extract_cik_from_url(url: str) -> int:
"""Extrahiert die CIK aus der SEC-URL (z.B. .../data/1234567/...)"""
# FIX: war r'/data/(\[0-9\]+)/' — escaped brackets matchten nie
match = re.search(r'/data/([0-9]+)/', url)
if match:
try:
return int(match.group(1))
except ValueError:
return 0
return 0
def _has_10b5_plan(text: str) -> bool:
indicators = ["10b5-1", "10b5 1", "rule 10b5", "prearranged", "pre-arranged"]
return any(i in text.lower() for i in indicators)
def _is_likely_sell(title: str, summary: str) -> bool:
"""Filtert Verkäufe und automatische Dispositionen."""
text = (title + " " + summary).lower()
sell_indicators = [
"disposed", "disposition", "sale", " sold ",
"automatic sell", "tax withholding", "withheld",
"forfeiture", "forfeit", "surrender",
"code f", "code s", "code d",
]
buy_indicators = ["purchased", "acquired", "bought", "grant", "award"]
has_sell = any(s in text for s in sell_indicators)
has_buy = any(b in text for b in buy_indicators)
return has_sell and not has_buy
@retry(times=3, delay=6)
def _fetch_raw() -> List[Dict]:
"""Holt Form 4 RSS Feed."""
resp = requests.get(
"https://www.sec.gov/cgi-bin/browse-edgar"
"?action=getcurrent&type=4&dateb=&owner=include&count=60&output=atom",
headers=HEADERS, timeout=20
)
root = ET.fromstring(resp.text)
ns = {"atom": "http://www.w3.org/2005/Atom"}
out = []
for entry in root.findall("atom:entry", ns):
title = entry.findtext("atom:title", default="", namespaces=ns)
updated = entry.findtext("atom:updated", default="", namespaces=ns)
summary = entry.findtext("atom:summary", default="", namespaces=ns)
link = entry.find("atom:link", ns)
url = link.attrib.get("href", "") if link is not None else ""
out.append({
"title": title,
"filed": updated[:10],
"summary": summary,
"url": url
})
logger.info(f"Form 4 raw: {len(out)} Einträge")
return out
def _detect_clustered(raw: List[Dict]) -> List[Dict]:
"""
Clustered Insider Detection:
- Intra-Fetch: mehrere Insider, gleicher Ticker, gleiches Fetch
- Cross-Day: SQLite-Historie der letzten 5 Tage
"""
by_ticker: Dict[str, List[Dict]] = defaultdict(list)
for entry in raw:
cik = _extract_cik_from_url(entry.get("url", ""))
ticker = resolve_ticker(cik=cik, title=entry.get("title", ""))
entry["ticker"] = ticker
entry["is_10b5"] = _has_10b5_plan(entry.get("summary", ""))
entry["is_sell"] = _is_likely_sell(
entry.get("title", ""), entry.get("summary", "")
)
if entry["is_sell"]:
continue
by_ticker[ticker].append(entry)
# Persist für Cross-Day
all_valid = [
e for entries in by_ticker.values()
for e in entries
if e["ticker"] != "UNKNOWN" and not e["is_10b5"]
]
save_form4_trades(all_valid)
enriched = []
for ticker, entries in by_ticker.items():
if ticker == "UNKNOWN":
continue
non_plan = [e for e in entries if not e["is_10b5"]]
plan_count = len(entries) - len(non_plan)
# Intra-Fetch-Cluster
intra_dates = []
for e in non_plan:
try:
intra_dates.append(datetime.strptime(e["filed"], "%Y-%m-%d"))
except ValueError:
pass
intra_cluster = (
len(intra_dates) >= 2 and
(max(intra_dates) - min(intra_dates)).days <= 3
) if intra_dates else False
# Cross-Day-Cluster
history = get_recent_form4_by_ticker(ticker, days=5)
current_dates = {e.get("filed", "") for e in non_plan}
history_extra = [h for h in history if h["filed_date"] not in current_dates]
cross_day_cluster = len(history_extra) >= 1
is_clustered = intra_cluster or cross_day_cluster
cluster_size = len(non_plan) + len(history_extra)
cluster_type = (
"intra+cross" if (intra_cluster and cross_day_cluster) else
"cross_day" if cross_day_cluster else
"intra" if intra_cluster else
"none"
)
if is_clustered:
logger.info(
f" Cluster {ticker}: {cluster_size} Trades ({cluster_type})"
)
for e in non_plan:
e["is_clustered"] = is_clustered
e["cluster_size"] = cluster_size
e["cluster_type"] = cluster_type
e["cross_day_count"] = len(history_extra)
e["plan_trades_filtered"] = plan_count
e["type"] = "form4"
enriched.append(e)
known = sum(1 for e in enriched if e["ticker"] != "UNKNOWN")
clustered = [e for e in enriched if e["is_clustered"]]
logger.info(f"Form 4: {known} mit Ticker | {len(clustered)} Cluster")
return enriched
def fetch() -> List[Dict]:
"""Hauptfunktion: holt Form 4 mit Cluster-Detection."""
cleanup_old_form4(days=90)
raw = _fetch_raw()
return _detect_clustered(raw)
================================================
FILE: src/ingest/gov_trades_fetcher.py
================================================
# src/ingest/gov_trades_fetcher.py
"""
US Politiker-Trades Fetcher.
Quellen: Quiver Quantitative (primär, optional kostenpflichtig)
SEC EDGAR Direct (Fallback, kostenlos)
"""
import os
import re
import hashlib
import requests
from datetime import datetime, timedelta
from typing import List, Dict
from src.utils.logger import logger
from src.utils.retry import retry
HEADERS = {
"User-Agent": f"SmartMoneyScanner {os.environ.get('GMAIL_USER', 'scanner@example.com')}",
"Accept": "application/json, application/xml, */*",
}
MIN_TRADE_VALUE = 15_000
LOOKBACK_DAYS = 60
POLITICIAN_SCORES = {
"Nancy Pelosi": 38,
"Paul Pelosi": 35,
"Dan Crenshaw": 30,
"Michael McCaul": 28,
"Mark Warner": 32,
"Richard Burr": 30,
"Tommy Tuberville": 28,
"Josh Gottheimer": 26,
"Ro Khanna": 24,
"Raja Krishnamoorthi": 24,
"Patrick McHenry": 26,
"Jim Himes": 24,
"French Hill": 26,
}
_TICKER_BLACKLIST = {
"INC", "LLC", "LTD", "CORP", "CO", "LP", "NA", "PLC", "AG", "SE", "NV", "SA",
"NYSE", "NASDAQ", "ETF", "IPO", "SEC", "SPAC", "OTC", "ADR", "REIT",
"USA", "THE", "AND", "FOR", "NOT", "BUT", "ALL", "NEW", "CEO", "CFO", "COO",
"USD", "EUR", "GBP", "AI", "IT", "US", "UK", "EU",
"BUY", "SELL", "PUT", "CALL", "HOLD", "FUND", "BOND",
}
def _trade_id(politician: str, ticker: str, date: str, txtype: str) -> str:
key = f"{politician}|{ticker}|{date}|{txtype}".lower().strip()
return hashlib.md5(key.encode()).hexdigest()[:12]
def _is_buy(tx: str) -> bool:
t = str(tx).lower()
return any(w in t for w in [
"purchase", "buy", "bought", "acquisition", "received", "exercise"
])
def _parse_amount(s: str) -> int:
if not s:
return 0
nums = re.findall(r"[\d,]+", str(s).replace("$", "").replace(" ", ""))
vals = []
for n in nums:
clean = n.replace(",", "")
if clean.isdigit() and len(clean) >= 3:
vals.append(int(clean))
if not vals:
return 0
return sum(vals) // len(vals)
def _clean_ticker(raw: str):
if not raw:
return None
t = str(raw).strip().upper()
t = re.sub(r"[^A-Z\.]", "", t)
if not re.match(r"^[A-Z]{1,5}(?:\.[A-Z]{1,2})?$", t):
return None
base = t.split(".")[0]
if base in _TICKER_BLACKLIST:
return None
if len(base) < 2:
return None
return t
def _cutoff() -> str:
return (datetime.utcnow() - timedelta(days=LOOKBACK_DAYS)).strftime("%Y-%m-%d")
@retry(times=2, delay=10)
def _from_quiver() -> List[Dict]:
"""Quiver Quantitative API."""
cutoff = _cutoff()
results = []
try:
resp = requests.get(
"https://api.quiverquant.com/beta/live/congresstrading",
headers={**HEADERS, "accept": "application/json"},
timeout=15
)
if resp.status_code != 200:
logger.warning(f"Quiver HTTP {resp.status_code}")
return []
raw_data = resp.json()
for t in raw_data:
date = str(t.get("TransactionDate", ""))[:10]
if date < cutoff:
continue
if not _is_buy(t.get("Transaction", "")):
continue
ticker = _clean_ticker(t.get("Ticker", ""))
if not ticker:
continue
amount_usd = _parse_amount(t.get("Amount", ""))
if amount_usd < MIN_TRADE_VALUE:
continue
results.append({
"type": "gov_trade",
"source": "quiver",
"politician": t.get("Representative", ""),
"ticker": ticker,
"transaction": "purchase",
"amount": t.get("Amount", ""),
"amount_usd": amount_usd,
"date": date,
"chamber": "congress",
})
logger.info(f"Quiver: {len(results)} Käufe")
return results
except Exception as e:
logger.warning(f"Quiver: {e}")
return []
def _pol_score(name: str) -> int:
for known, score in POLITICIAN_SCORES.items():
if known.lower() in name.lower():
return score
return 18
def _dedup(trades: List[Dict]) -> List[Dict]:
seen = set()
out = []
for t in trades:
tid = _trade_id(
t.get("politician", ""),
t.get("ticker", ""),
t.get("date", ""),
t.get("transaction", "")
)
if tid not in seen:
seen.add(tid)
t["trade_id"] = tid
out.append(t)
return out
def fetch(days_back: int = LOOKBACK_DAYS) -> List[Dict]:
"""Hauptfunktion: holt Politiker-Trades."""
all_trades = []
try:
quiver = _from_quiver()
all_trades.extend(quiver)
except Exception as e:
logger.warning(f"Quiver Fehler: {e}")
deduped = _dedup(all_trades)
for t in deduped:
t["politician_score"] = _pol_score(t.get("politician", ""))
deduped.sort(
key=lambda x: (x["politician_score"] * 100_000 + x.get("amount_usd", 0)),
reverse=True
)
logger.info(f"Politiker-Trades: {len(deduped)}")
return deduped[:50]
================================================
FILE: src/ingest/news_fetcher.py
================================================
# src/ingest/news_fetcher.py
"""
News-Fetcher: Google + Yahoo RSS.
Output: List[Dict] mit title, url, date, source
"""
import os
import requests
import xml.etree.ElementTree as ET
from typing import List, Dict
from src.utils.logger import logger
from src.utils.retry import retry
HEADERS = {
"User-Agent": f"SmartMoneyScanner {os.environ.get('GMAIL_USER', 'scanner@example.com')}"
}
KEYWORDS = [
"insider", "sec", "merger", "acquisition", "guidance", "contract",
"investigation", "buyback", "dividend", "fda", "upgrade", "downgrade",
"earnings", "beat", "miss", "activist", "short", "alert"
]
def _match(text: str) -> bool:
return any(k in text.lower() for k in KEYWORDS)
@retry(times=3, delay=5)
def _fetch_rss(url: str, source: str, ticker: str) -> List[Dict]:
resp = requests.get(url, headers=HEADERS, timeout=15)
root = ET.fromstring(resp.text)
out = []
for item in root.findall(".//item")[:12]:
title = item.findtext("title", default="")
if _match(title):
out.append({
"source": source,
"ticker": ticker,
"title": title,
"url": item.findtext("link", default=""),
"date": item.findtext("pubDate", default="")[:16]
})
return out
def fetch(ticker: str) -> List[Dict]:
"""Holt News aus Google + Yahoo für einen Ticker."""
news = []
try:
news += _fetch_rss(
f"https://news.google.com/rss/search?q={ticker}+stock&hl=en-US&gl=US&ceid=US:en",
"google", ticker
)
except Exception as e:
logger.warning(f"Google News {ticker}: {e}")
try:
news += _fetch_rss(
f"https://feeds.finance.yahoo.com/rss/2.0/headline?s={ticker}®ion=US&lang=en-US",
"yahoo", ticker
)
except Exception as e:
logger.warning(f"Yahoo News {ticker}: {e}")
return news[:15]
================================================
FILE: src/ingest/thirteenf_fetcher.py
================================================
# src/ingest/thirteenf_fetcher.py
"""
13F-HR Fetcher mit direktem SEC-Parser.
Output: List[Dict] mit Delta-Signalen
Bonus: Multi-Quartals-Trend Detection (3+ Quartale = stärkstes Signal)
"""
import os
import re
import time
import requests
import xml.etree.ElementTree as ET
from datetime import datetime
from typing import List, Dict, Optional, Tuple
from src.utils.logger import logger
from src.utils.retry import retry
from src.utils.storage import get_conn
from src.utils.config import load as load_config
HEADERS = {
"User-Agent": f"SmartMoneyScanner {os.environ.get('GMAIL_USER', 'scanner@example.com')}",
"Accept-Encoding": "gzip, deflate",
}
def _get_thresholds():
cfg = load_config("thresholds")
return cfg.get("thirteenf", {})
# ── Quarter helpers ──────────────────────────────────────────────────
def _current_quarter() -> str:
now = datetime.utcnow()
return f"{now.year}Q{(now.month - 1) // 3 + 1}"
def _date_to_quarter(date_str: str) -> str:
try:
dt = datetime.strptime(str(date_str)[:10], "%Y-%m-%d")
return f"{dt.year}Q{(dt.month - 1) // 3 + 1}"
except Exception:
return _current_quarter()
# ── DB Operations ────────────────────────────────────────────────────
def _save_holdings(cik: str, fund_name: str, quarter: str,
holdings: List[Dict], total_value: int):
with get_conn() as conn:
conn.execute(
"DELETE FROM thirteenf_holdings WHERE cik=? AND quarter=?",
(cik, quarter)
)
conn.executemany("""
INSERT INTO thirteenf_holdings
(fund_name, cik, quarter, ticker, cusip, company, shares, value_usd)
VALUES (?,?,?,?,?,?,?,?)
""", [
(fund_name, cik, quarter,
h["ticker"], h.get("cusip", ""), h.get("company", "")[:80],
h["shares"], h["value_usd"])
for h in holdings
])
conn.execute("""
INSERT OR REPLACE INTO thirteenf_portfolio
(cik, quarter, total_value, position_count)
VALUES (?,?,?,?)
""", (cik, quarter, total_value, len(holdings)))
def _get_quarters(cik: str) -> List[str]:
with get_conn() as conn:
rows = conn.execute(
"SELECT DISTINCT quarter FROM thirteenf_holdings WHERE cik=? ORDER BY quarter DESC",
(cik,)
).fetchall()
return [r[0] for r in rows]
def _get_holdings(cik: str, quarter: str) -> Dict[str, Dict]:
with get_conn() as conn:
rows = conn.execute(
"SELECT ticker, company, shares, value_usd FROM thirteenf_holdings WHERE cik=? AND quarter=?",
(cik, quarter)
).fetchall()
return {
r["ticker"]: {
"company": r["company"],
"shares": r["shares"],
"value_usd": r["value_usd"],
}
for r in rows if r["ticker"] and len(r["ticker"]) <= 6
}
def _get_portfolio_total(cik: str, quarter: str) -> int:
with get_conn() as conn:
row = conn.execute(
"SELECT total_value FROM thirteenf_portfolio WHERE cik=? AND quarter=?",
(cik, quarter)
).fetchone()
return row["total_value"] if row else 0
# ── Multi-Quartals-Trend (NEW) ───────────────────────────────────────
def get_consecutive_increases(cik: str, ticker: str) -> int:
"""
Wie viele Quartale in Folge hat Fund aufgestockt?
3+ = stärkstes Signal.
"""
quarters = sorted(_get_quarters(cik), reverse=True)
if len(quarters) < 2:
return 0
consecutive = 0
for i in range(len(quarters) - 1):
curr_q = quarters[i]
prev_q = quarters[i + 1]
curr = _get_holdings(cik, curr_q)
prev = _get_holdings(cik, prev_q)
if ticker in curr and ticker in prev:
if curr[ticker]["value_usd"] > prev[ticker]["value_usd"]:
consecutive += 1
else:
break
else:
break
return consecutive
# ── CUSIP/Company → Ticker Mapping ───────────────────────────────────
KNOWN_COMPANIES = {
"APPLE INC": "AAPL", "APPLE": "AAPL",
"MICROSOFT CORP": "MSFT", "MICROSOFT": "MSFT",
"AMAZON COM INC": "AMZN", "AMAZON": "AMZN",
"ALPHABET INC": "GOOGL", "ALPHABET": "GOOGL",
"NVIDIA CORP": "NVDA", "NVIDIA": "NVDA",
"META PLATFORMS": "META", "META": "META",
"TESLA INC": "TSLA", "TESLA": "TSLA",
"BERKSHIRE HATHAWAY": "BRK.B",
"JPMORGAN CHASE": "JPM",
"JOHNSON & JOHNSON": "JNJ",
"EXXON MOBIL": "XOM",
"UNITEDHEALTH": "UNH",
"VISA INC": "V",
"MASTERCARD": "MA",
"PROCTER & GAMBLE": "PG",
"HOME DEPOT": "HD",
"CHEVRON": "CVX",
"ABBVIE INC": "ABBV",
"COCA COLA": "KO", "COCA-COLA": "KO",
"PEPSICO": "PEP",
"BROADCOM": "AVGO",
"ELI LILLY": "LLY",
"COSTCO": "COST",
"MERCK": "MRK",
"WALMART": "WMT",
"PALANTIR": "PLTR",
"SALESFORCE": "CRM",
"ADOBE INC": "ADBE",
"NETFLIX": "NFLX",
"TAIWAN SEMICONDUCTOR": "TSM",
"UBER": "UBER",
"AIRBNB": "ABNB",
"SNOWFLAKE": "SNOW",
"CROWDSTRIKE": "CRWD",
"DATADOG": "DDOG",
"SERVICENOW": "NOW",
"INTUITIVE SURGICAL": "ISRG",
"AMD": "AMD", "ADVANCED MICRO DEVICES": "AMD",
"INTEL CORP": "INTC", "INTEL": "INTC",
"QUALCOMM": "QCOM",
"TEXAS INSTRUMENTS": "TXN",
"APPLIED MATERIALS": "AMAT",
"KKR": "KKR",
"BLACKSTONE": "BX",
"GOLDMAN SACHS": "GS",
"MORGAN STANLEY": "MS",
"BANK OF AMERICA": "BAC",
"WELLS FARGO": "WFC",
"CITIGROUP": "C",
"AMERICAN EXPRESS": "AXP",
"S&P GLOBAL": "SPGI",
"CHARLES SCHWAB": "SCHW",
"BLACKROCK": "BLK",
}
def _company_to_ticker(company: str) -> Optional[str]:
c = company.upper().strip()
if c in KNOWN_COMPANIES:
return KNOWN_COMPANIES[c]
for known, ticker in KNOWN_COMPANIES.items():
if c.startswith(known) or known.startswith(c[:min(len(c), 10)]):
return ticker
return None
def _cusip_cache_get(cusip: str) -> Optional[str]:
if not cusip:
return None
try:
with get_conn() as conn:
row = conn.execute(
"SELECT ticker FROM cusip_ticker_cache WHERE cusip=?", (cusip,)
).fetchone()
return row["ticker"] if row and row["ticker"] else None
except Exception:
return None
def _cusip_cache_set(cusip: str, ticker: str, name: str = ""):
if not cusip or not ticker:
return
try:
with get_conn() as conn:
conn.execute("""
INSERT OR REPLACE INTO cusip_ticker_cache (cusip, ticker, name)
VALUES (?,?,?)
""", (cusip, ticker, name[:60]))
except Exception:
pass
def _resolve_ticker(cusip: str, company: str) -> Optional[str]:
if cusip:
cached = _cusip_cache_get(cusip)
if cached:
return cached
ticker = _company_to_ticker(company)
if ticker:
if cusip:
_cusip_cache_set(cusip, ticker, company)
return ticker
# Fallback: aus Firmenname extrahieren
clean = re.sub(
r'\b(INC|CORP|CO|LTD|LLC|PLC|AG|SE|NV|SA|GROUP|HOLDINGS|'
r'INTERNATIONAL|ENTERPRISES|CLASS A|CLASS B|CL A|CL B|COM)\b\.?',
'', company.upper()
).strip()
words = clean.split()
if words and re.match(r'^[A-Z]{2,5}$', words[0]):
if words[0] not in {"THE", "AND", "FOR", "NEW", "OLD", "INC", "COM"}:
return words[0]
return None
# ── SEC API Direct ───────────────────────────────────────────────────
@retry(times=3, delay=10)
def _get_submissions(cik: str) -> dict:
url = f"https://data.sec.gov/submissions/CIK{cik.zfill(10)}.json"
resp = requests.get(url, headers=HEADERS, timeout=15)
resp.raise_for_status()
return resp.json()
def _extract_infotable_xml(full_txt: str) -> Optional[str]:
pattern = (
r'\s*INFORMATION TABLE'
r'.*?(.*?)\s*'
)
match = re.search(pattern, full_txt, re.DOTALL | re.IGNORECASE)
if match:
xml_part = match.group(1).strip()
xml_part = re.sub(r'?(?:XML|SEQUENCE|FILENAME)[^>]*>', '', xml_part)
xml_part = xml_part.strip()
if xml_part.startswith('<'):
return xml_part
match2 = re.search(r'()',
full_txt, re.DOTALL | re.IGNORECASE)
if match2:
return match2.group(1)
return None
def _findtext(elem, tag: str) -> str:
child = elem.find(tag)
if child is not None and child.text:
return child.text
for child in elem:
if child.tag.lower() == tag.lower():
return child.text or ""
return ""
def _parse_infotable_xml(xml_text: str, cik: str) -> Tuple[List[Dict], int]:
"""Parst XML zu Holdings-Liste."""
th = _get_thresholds()
min_pos = th.get("min_position_value_usd", 100000)
xml_text = re.sub(r'<(/?)\s*(?:\w+:)', r'<\1', xml_text)
xml_text = re.sub(r'\s+xmlns(?::\w+)?="[^"]*"', '', xml_text)
xml_text = re.sub(r'\s+xsi:\w+="[^"]*"', '', xml_text)
xml_text = xml_text.strip()
if not xml_text.startswith('<'):
return [], 0
try:
root = ET.fromstring(xml_text)
except ET.ParseError:
try:
root = ET.fromstring(f"{xml_text}")
except ET.ParseError as e:
logger.warning(f"XML-Parse-Fehler: {e}")
return [], 0
info_tables = (
root.findall('.//infoTable') or
root.findall('.//InfoTable') or
root.findall('.//INFOTABLE') or
list(root)
)
holdings = []
total_value = 0
for info in info_tables:
try:
company = (_findtext(info, 'nameOfIssuer') or _findtext(info, 'NAMEOFISSUER') or "").strip()
cusip = (_findtext(info, 'cusip') or _findtext(info, 'CUSIP') or "").strip()
value_raw = _findtext(info, 'value') or _findtext(info, 'VALUE') or "0"
value_usd = int(float(re.sub(r'[^\d.]', '', value_raw) or "0"))
if 0 < value_usd < 10_000:
value_usd *= 1_000 # Tausend-Einheit normalisieren
shr_container = info.find('shrsOrPrnAmt') or info.find('SHRSORPRNAMT')
shares_raw = (
(_findtext(shr_container, 'sshPrnamt') if shr_container is not None else "") or
_findtext(info, 'sshPrnamt') or _findtext(info, 'SSHPRNAMT') or
_findtext(info, 'shares') or "0"
)
shares = int(float(re.sub(r'[^\d.]', '', shares_raw) or "0"))
if value_usd < min_pos or not company:
continue
ticker = _resolve_ticker(cusip, company)
if not ticker:
continue
holdings.append({
"ticker": ticker,
"cusip": cusip,
"company": company,
"shares": shares,
"value_usd": value_usd,
})
total_value += value_usd
except (ValueError, TypeError, AttributeError):
continue
return holdings, total_value
@retry(times=2, delay=15)
def _fetch_filing(cik: str, index: int = 0) -> Optional[Tuple[str, List[Dict], int]]:
"""Holt ein 13F Filing direkt von SEC."""
try:
data = _get_submissions(cik)
filings = data.get("filings", {}).get("recent", {})
forms = filings.get("form", [])
accs = filings.get("accessionNumber", [])
dates = filings.get("filingDate", [])
thirteenf = []
for i, form in enumerate(forms):
if form in ("13F-HR", "13F-HR/A") and i < len(accs):
thirteenf.append((dates[i], accs[i]))
if not thirteenf or len(thirteenf) <= index:
return None
thirteenf.sort(reverse=True)
filing_date, accession = thirteenf[index]
acc_clean = accession.replace("-", "")
cik_padded = str(int(cik)).zfill(10)
txt_url = (
f"https://www.sec.gov/Archives/edgar/data/"
f"{cik_padded}/{acc_clean}/{acc_clean}.txt"
)
resp = requests.get(txt_url, headers=HEADERS, timeout=45)
if resp.status_code == 404:
idx_url = (
f"https://www.sec.gov/Archives/edgar/data/"
f"{cik_padded}/{acc_clean}/{accession}-index.htm"
)
idx_resp = requests.get(idx_url, headers=HEADERS, timeout=15)
txt_match = re.search(r'href="([^"]+\.txt)"', idx_resp.text, re.IGNORECASE)
if txt_match:
alt_url = "https://www.sec.gov" + txt_match.group(1)
resp = requests.get(alt_url, headers=HEADERS, timeout=45)
resp.raise_for_status()
time.sleep(0.15) # SEC Rate-Limit
xml_text = _extract_infotable_xml(resp.text)
if not xml_text:
return None
holdings, total = _parse_infotable_xml(xml_text, cik)
if not holdings:
return None
quarter = _date_to_quarter(filing_date)
logger.info(
f" ✓ {len(holdings)} Pos., ${total/1e9:.1f}B, {quarter}"
)
return quarter, holdings, total
except Exception as e:
logger.error(f"13F CIK {cik} Index {index}: {e}")
return None
# ── Conviction Berechnung ────────────────────────────────────────────
def _real_conviction(curr_val: int, prev_val: int, curr_shares: int,
prev_shares: int, total_port: int,
fund_score: int, delta_type: str,
consecutive_qs: int = 0) -> float:
"""Conviction aus echten 13F-Daten + Multi-Quartals-Bonus."""
c = 0.0
# Portfolio-Anteil
port_pct = (curr_val / total_port * 100) if total_port > 0 else 0
if port_pct >= 8.0:
c += 0.40
elif port_pct >= 4.0:
c += 0.32
elif port_pct >= 2.0:
c += 0.22
elif port_pct >= 1.0:
c += 0.14
elif port_pct >= 0.5:
c += 0.08
elif port_pct >= 0.3:
c += 0.04
# Value-Veränderung
if prev_val > 0:
val_chg = (curr_val - prev_val) / prev_val
if delta_type == "new":
c += 0.25
elif val_chg >= 1.0:
c += 0.25
elif val_chg >= 0.5:
c += 0.20
elif val_chg >= 0.25:
c += 0.14
elif val_chg >= 0.15:
c += 0.09
elif delta_type == "new":
c += 0.25
# Shares-Bestätigung
if prev_shares > 0 and curr_shares > 0:
shr_chg = (curr_shares - prev_shares) / prev_shares
if delta_type == "new":
c += 0.15
elif shr_chg >= 0.20:
c += 0.15
elif shr_chg >= 0.10:
c += 0.10
elif shr_chg >= 0.05:
c += 0.05
elif shr_chg < 0 and (curr_val - prev_val) > 0:
c -= 0.05 # Nur Kurseffekt
elif delta_type == "new":
c += 0.15
# Fund-Qualität
if fund_score >= 40:
c += 0.20
elif fund_score >= 32:
c += 0.15
elif fund_score >= 24:
c += 0.10
elif fund_score >= 16:
c += 0.06
# Multi-Quartals-Bonus (NEW!)
th = _get_thresholds()
if consecutive_qs >= 3:
c += th.get("consecutive_3plus_bonus", 0.30)
elif consecutive_qs >= 2:
c += th.get("consecutive_2_bonus", 0.15)
return round(min(max(c, 0.0), 1.0), 3)
def _calculate_delta(current: List[Dict], previous: Dict[str, Dict],
fund_name: str, fund_score: int, cik: str,
total_port: int, prev_total: int) -> List[Dict]:
"""Vergleicht aktuelles Quartal mit Vorquartal."""
th = _get_thresholds()
min_inc_pct = th.get("min_increase_pct", 15.0)
min_dec_pct = th.get("min_decrease_pct", 30.0)
min_port_pct = th.get("min_portfolio_pct", 0.3)
min_pos = th.get("min_position_value_usd", 100000)
min_new = th.get("min_new_position_usd", 500000)
signals = []
current_map = {h["ticker"]: h for h in current if h.get("ticker")}
for ticker, curr in current_map.items():
curr_val = curr["value_usd"]
curr_shares = curr["shares"]
company = curr.get("company", "")
port_pct = (curr_val / total_port * 100) if total_port > 0 else 0
if port_pct < min_port_pct:
continue
# Multi-Quartals-Trend prüfen
consecutive = get_consecutive_increases(cik, ticker)
if ticker in previous:
prev = previous[ticker]
prev_val = prev["value_usd"]
prev_shares = prev["shares"]
if prev_val == 0:
continue
val_chg_pct = (curr_val - prev_val) / prev_val * 100
shr_chg_pct = (
(curr_shares - prev_shares) / prev_shares * 100
if prev_shares > 0 else 0
)
if val_chg_pct >= min_inc_pct and curr_val >= min_pos:
conviction = _real_conviction(
curr_val, prev_val, curr_shares, prev_shares,
total_port, fund_score, "increase", consecutive
)
# Strength-Bonus für Multi-Quartals
base_strength = min(int(50 + val_chg_pct / 2), 92)
if consecutive >= 3:
base_strength = min(base_strength + 8, 95)
elif consecutive >= 2:
base_strength = min(base_strength + 4, 92)
signals.append({
"ticker": ticker,
"company": company,
"signal_type": "13f_increase",
"fund_name": fund_name,
"fund_score": fund_score,
"delta_type": "increase",
"val_change_pct": round(val_chg_pct, 1),
"shr_change_pct": round(shr_chg_pct, 1),
"value_usd": curr_val,
"prev_value_usd": prev_val,
"shares": curr_shares,
"portfolio_pct": round(port_pct, 2),
"total_portfolio": total_port,
"strength": base_strength,
"conviction": conviction,
"consecutive_quarters": consecutive,
"source_count": 1,
"summary": (
f"{fund_name} +{val_chg_pct:.0f}% {ticker} "
f"({consecutive}x in Folge)" if consecutive >= 2
else f"{fund_name} +{val_chg_pct:.0f}% {ticker}"
),
})
logger.info(
f" INCREASE {ticker}: +{val_chg_pct:.0f}% | "
f"{port_pct:.1f}% | conv={conviction:.2f} | {consecutive}q"
)
else:
# Neue Position
if curr_val >= min_new:
conviction = _real_conviction(
curr_val, 0, curr_shares, 0,
total_port, fund_score, "new", 0
)
strength = 92 if fund_score >= 40 else (84 if fund_score >= 30 else 70)
signals.append({
"ticker": ticker,
"company": company,
"signal_type": "13f_new_position",
"fund_name": fund_name,
"fund_score": fund_score,
"delta_type": "new",
"val_change_pct": 100.0,
"value_usd": curr_val,
"shares": curr_shares,
"portfolio_pct": round(port_pct, 2),
"total_portfolio": total_port,
"strength": strength,
"conviction": conviction,
"consecutive_quarters": 0,
"source_count": 1,
"summary": (
f"{fund_name} NEU {ticker}: ${curr_val/1e6:.1f}M "
f"| {port_pct:.1f}%"
),
})
logger.info(
f" NEW {ticker}: ${curr_val/1e6:.1f}M | conv={conviction:.2f}"
)
return signals
def _fetch_baseline(cik: str, fund_name: str) -> bool:
"""Cold Start."""
logger.info(f" Cold Start {fund_name}")
result = _fetch_filing(cik, index=1)
if not result:
return False
quarter, holdings, total = result
_save_holdings(cik, fund_name, quarter, holdings, total)
return True
def fetch(funds_config: List[Dict], scorer) -> List[Dict]:
"""
Hauptfunktion: 13F Delta + Multi-Quartals-Trend.
Args:
funds_config: [{"name": ..., "cik": ...}, ...]
scorer: FundScorer Instanz
Returns:
Liste von Delta-Signalen
"""
all_signals = []
for fund in funds_config:
fund_name = fund.get("name", "")
cik = fund.get("cik", "")
fund_score = scorer.get_score(fund_name)
if fund_score < 15:
continue
logger.info(f"13F: {fund_name} (CIK {cik}, Score {fund_score})")
result = _fetch_filing(cik, index=0)
if not result:
continue
curr_quarter, curr_holdings, curr_total = result
quarters = _get_quarters(cik)
has_prev = any(q < curr_quarter for q in quarters)
if not has_prev:
ok = _fetch_baseline(cik, fund_name)
if ok:
quarters = _get_quarters(cik)
else:
_save_holdings(cik, fund_name, curr_quarter, curr_holdings, curr_total)
continue
prev_quarters = sorted(
[q for q in quarters if q < curr_quarter], reverse=True
)
if not prev_quarters:
_save_holdings(cik, fund_name, curr_quarter, curr_holdings, curr_total)
continue
prev_q = prev_quarters[0]
prev_hold = _get_holdings(cik, prev_q)
prev_total = _get_portfolio_total(cik, prev_q)
signals = _calculate_delta(
curr_holdings, prev_hold,
fund_name, fund_score, cik,
curr_total, prev_total
)
_save_holdings(cik, fund_name, curr_quarter, curr_holdings, curr_total)
all_signals.extend(signals)
bullish = [s for s in all_signals if s["delta_type"] in ("new", "increase")]
logger.info(f"13F Delta: {len(bullish)} bullish")
return bullish
================================================
FILE: src/score/__init__.py
================================================
"""
Score-Layer: Bewertung & Filterung.
- signal_builder: Erzeugt Signal-Objekte aus Rohdaten
- merger: Merge by Ticker (Multi-Source-Detection)
- signal_filter: Hard-Gates + Ranking
- fund_scorer: Fund-Score-Lookup
"""
================================================
FILE: src/score/fund_scorer.py
================================================
# src/score/fund_scorer.py
"""Fund-Score Lookup aus fund_weights.yaml."""
import re
from typing import Dict
from src.utils.config import load as load_config
from src.utils.logger import logger
def _normalize(name: str) -> str:
"""Lowercase, Sonderzeichen entfernen, Whitespace normalisieren."""
return re.sub(r'\s+', ' ', re.sub(r'[^a-z0-9\s]', ' ', name.lower())).strip()
class FundScorer:
def __init__(self):
self.config = load_config("fund_weights")
def get_score(self, fund_name: str) -> int:
"""Gibt Score 0-50 zurück. 0 wenn ignoriert."""
name_norm = _normalize(fund_name)
ignored = self.config.get("ignored_funds", [])
if any(_normalize(i) in name_norm for i in ignored):
return 0
for known, data in self.config.get("funds", {}).items():
known_norm = _normalize(known)
# Bidirektionaler Substring-Match: "berkshire hathaway" in "berkshire hathaway inc."
# und "berkshire" in "berkshire" (abgekürzte Eingaben)
if known_norm in name_norm or name_norm in known_norm:
return data.get("score", 0)
return 8 # Unknown fund default
def get_info(self, fund_name: str) -> Dict:
"""Voll Info über einen Fund."""
name_norm = _normalize(fund_name)
for known, data in self.config.get("funds", {}).items():
known_norm = _normalize(known)
if known_norm in name_norm or name_norm in known_norm:
return {
"fund_name": known,
"score": data.get("score", 0),
"category": data.get("category", "unknown")
}
return {"fund_name": fund_name, "score": 8, "category": "unknown"}
================================================
FILE: src/score/signal_builder.py
================================================
# src/score/signal_builder.py
"""
Erzeugt Signal-Objekte aus Rohdaten.
Zentrale Stelle für Signal-Konstruktion.
"""
from datetime import datetime
from typing import List, Dict
from collections import defaultdict
from src.score.signal_filter import Signal, SignalFilter
from src.score.fund_scorer import FundScorer
from src.utils.logger import logger
import re
INVALID_TICKERS = {"UNKNOWN", "PORTFOLIO", "", "—", "N/A", "NA"}
def is_valid_ticker(ticker: str) -> bool:
if not ticker or ticker in INVALID_TICKERS:
return False
return bool(re.match(r'^[A-Z]{1,5}(?:\.[A-Z]{1,2})?$', ticker.strip()))
def build_signals_from_form4(form4: List[Dict],
scorer: FundScorer,
sf: SignalFilter) -> List[Signal]:
"""Form 4 → Signal Objects."""
signals = []
for f in form4:
ticker = f.get("ticker", "")
if not is_valid_ticker(ticker):
continue
fund = f.get("title", "")[:50]
score = scorer.get_score(fund)
if score < 10:
continue
is_clustered = f.get("is_clustered", False)
cross_day = f.get("cross_day_count", 0) > 0
source_count = 2 if is_clustered else 1
signals.append(Signal(
ticker=ticker,
signal_type="insider_buy",
fund_name=fund,
fund_score=score,
strength=sf.calculate_strength(["insider_buy"]),
conviction=sf.calculate_conviction(
position_pct=3.5 if is_clustered else 1.5,
days_since_buy=1 if cross_day else 2,
price_vs_ma50_pct=-2.0,
is_clustered=is_clustered,
),
is_clustered=is_clustered,
source_count=source_count,
raw=f,
))
return signals
def build_signals_from_13f(thirteenf_signals: List[Dict],
sf: SignalFilter) -> List[Signal]:
"""13F Delta → Signal Objects."""
signals = []
for d in thirteenf_signals:
ticker = d.get("ticker", "")
if not is_valid_ticker(ticker):
continue
signals.append(Signal(
ticker=ticker,
signal_type=d.get("signal_type", "13f_increase"),
fund_name=d.get("fund_name", ""),
fund_score=d.get("fund_score", 0),
strength=d.get("strength", 60),
conviction=d.get("conviction", 0.5),
consecutive_quarters=d.get("consecutive_quarters", 0),
source_count=d.get("source_count", 1),
raw=d,
))
return signals
def build_signals_from_8k(eight_k: List[Dict],
sf: SignalFilter) -> List[Signal]:
"""8-K → Signal Objects."""
signals = []
for f in eight_k:
item_score = f.get("item_score", 10)
if item_score < 50:
continue
ticker = f.get("ticker", "")
if not is_valid_ticker(ticker):
continue
signals.append(Signal(
ticker=ticker,
signal_type="8k_event",
fund_name="Corporate",
fund_score=22,
strength=sf.calculate_strength(["8k_event"]),
conviction=0.42,
item_score=item_score,
source_count=1,
raw=f,
))
return signals
def build_signals_from_gov(gov: List[Dict],
sf: SignalFilter) -> List[Signal]:
"""Politiker-Trades → Signal Objects."""
signals = []
for t in gov:
ticker = t.get("ticker", "")
if not is_valid_ticker(ticker):
continue
pol_score = t.get("politician_score", 18)
politician = t.get("politician", "Unknown")
trade_date = t.get("date", "")
try:
days_old = (datetime.utcnow() - datetime.strptime(trade_date[:10], "%Y-%m-%d")).days
except Exception:
days_old = 30
age_penalty = 0.0
if days_old > 40:
age_penalty = 0.20
elif days_old > 20:
age_penalty = 0.10
elif days_old > 10:
age_penalty = 0.05
base_conviction = min(0.35 + (pol_score / 100), 0.65)
conviction = max(base_conviction - age_penalty, 0.20)
signals.append(Signal(
ticker=ticker,
signal_type="gov_buy",
fund_name=politician,
fund_score=pol_score,
strength=sf.calculate_strength(["gov_buy"]),
conviction=conviction,
source_count=1,
raw={
**t,
"fund_category": "politician",
"days_since_trade": days_old,
"trade_date": trade_date,
},
))
return signals
def merge_by_ticker(signals: List[Signal], sf: SignalFilter) -> List[Signal]:
"""
Merged Signale gleichen Tickers.
Cluster-Detection: mehrere unabhängige Signale = stärker.
"""
by_ticker = defaultdict(list)
for s in signals:
if s.ticker not in INVALID_TICKERS:
by_ticker[s.ticker].append(s)
merged = []
for ticker, group in by_ticker.items():
if len(group) == 1:
merged.append(group[0])
continue
best = max(group, key=lambda x: x.fund_score)
unique_types = list({g.signal_type for g in group})
type_count = len(unique_types)
# Sonderfälle
gov_signals = [g for g in group if g.signal_type == "gov_buy"]
insider_signals = [g for g in group if g.signal_type == "insider_buy"]
fund_13f = [g for g in group if g.signal_type in ("13f_increase", "13f_new_position")]
pol_cluster = len(gov_signals) >= 3
insider_cluster = len(insider_signals) >= 2
fund_cluster = len(fund_13f) >= 2
effective_count = type_count
if pol_cluster and type_count == 1:
effective_count = 2
if insider_cluster and type_count == 1:
effective_count = 2
if fund_cluster and type_count == 1:
effective_count = 2
best.source_count = effective_count
best.is_clustered = (
any(g.is_clustered for g in group) or
pol_cluster or insider_cluster or fund_cluster
)
best.conviction = max(g.conviction for g in group)
best.consecutive_quarters = max(g.consecutive_quarters for g in group)
if fund_cluster:
fund_scores = [g.fund_score for g in fund_13f]
avg_score = sum(fund_scores) / len(fund_scores)
n_funds = len(fund_13f)
best.strength = min(int(70 + (n_funds - 1) * 5 + avg_score * 0.3), 95)
if avg_score >= 35:
best.conviction = min(best.conviction + 0.10, 1.0)
else:
best.strength = sf.calculate_strength(unique_types)
cluster_flags = []
if pol_cluster:
cluster_flags.append("POL-CLUSTER")
if insider_cluster:
cluster_flags.append("INS-CLUSTER")
if fund_cluster:
cluster_flags.append(f"FUND-CLUSTER({len(fund_13f)}x)")
logger.info(
f" Merge {ticker}: {len(group)} → "
f"sources={effective_count} ({', '.join(unique_types)}"
f"{' + ' + ' + '.join(cluster_flags) if cluster_flags else ''})"
)
merged.append(best)
return merged
================================================
FILE: src/score/signal_filter.py
================================================
# src/score/signal_filter.py
"""
Signal-Filter mit gewichtetem Scoring-Modell.
Gewichtetes Scoring:
Fund-Score × 0.40
Multi-Signal × 0.25
Conviction/Timing × 0.20
News + Options × 0.15
Hard-Gates:
fund_score >= 15
source_count >= 2 (Multi-Signal-Gate)
conviction >= 0.38
news_alignment >= -0.70 (gelockert für 2-6M Calls)
"""
from dataclasses import dataclass, field
from typing import List, Dict
from src.utils.logger import logger
from src.utils.config import get_threshold
@dataclass
class Signal:
ticker: str
signal_type: str
fund_name: str
fund_score: int
strength: int
conviction: float
news_alignment: float = 0.0
macro_context: str = "neutral"
options_score: int = 0
options_summary: str = ""
options_qualified: bool = False
source_count: int = 1
is_clustered: bool = False
item_score: int = 50
consecutive_quarters: int = 0
catalyst_modifier: float = 0.0
raw: Dict = field(default_factory=dict)
class SignalFilter:
"""Filtert + rankt Signale via gewichtetes Modell."""
def is_valid(self, s: Signal) -> bool:
"""Hard-Gates."""
min_fund = get_threshold("signal_filter", "min_fund_score", 15)
min_sources = get_threshold("signal_filter", "min_sources", 2)
min_conv = get_threshold("signal_filter", "min_conviction", 0.38)
max_neg = get_threshold("signal_filter", "max_neg_news", -0.70)
if s.signal_type in ("insider_buy", "8k_event"):
# fund_score and source_count gates are designed for hedge fund signals;
# insider/event quality is captured by conviction, item_score, and clustering.
if s.conviction < min_conv:
return False
if s.news_alignment < max_neg:
return False
return True
if s.fund_score < min_fund:
return False
if s.source_count < min_sources:
return False
if s.conviction < min_conv:
return False
if s.news_alignment < max_neg:
return False
return True
def weighted_score(self, s: Signal) -> float:
"""Gewichtetes Scoring 0-100."""
fund_c = (s.fund_score / 50) * 40
signal_c = (s.strength / 100) * 25
conv_c = s.conviction * 20
news_opt_c = (
((s.news_alignment + 1) / 2) * 10 +
(s.options_score / 30) * 5
)
base = fund_c + signal_c + conv_c + news_opt_c
# Boni
if s.is_clustered:
base += 12
if s.item_score >= 80:
base += 8
if s.macro_context == "bullish":
base += 5
if s.options_qualified:
base += 5
if s.signal_type == "13f_new_position" and s.fund_score >= 38:
base += 10
if s.consecutive_quarters >= 3:
base += 15 # Stärkstes Signal
elif s.consecutive_quarters >= 2:
base += 8
# Catalyst-Modifier (von catalyst_finder)
base += s.catalyst_modifier * 50 # Skalierung
return min(max(base, 0.0), 100.0)
def calculate_conviction(self, position_pct: float, days_since_buy: int,
price_vs_ma50_pct: float,
is_clustered: bool = False) -> float:
"""Conviction-Berechnung für Form 4 Signale."""
c = 0.0
if position_pct > 5.0:
c += 0.40
elif position_pct > 2.5:
c += 0.25
elif position_pct > 1.0:
c += 0.12
if days_since_buy < 3:
c += 0.25
elif days_since_buy < 10:
c += 0.15
elif days_since_buy < 20:
c += 0.08
if price_vs_ma50_pct < -5:
c += 0.25
elif price_vs_ma50_pct < 0:
c += 0.15
elif price_vs_ma50_pct < 10:
c += 0.05
if is_clustered:
c += 0.20
return min(c, 1.0)
def calculate_strength(self, sources: List[str]) -> int:
"""Stärke aus Signal-Typ-Kombinationen."""
combos = {
frozenset(["insider_buy", "13f_increase"]): 90,
frozenset(["insider_buy", "13f_new_position"]): 92,
frozenset(["gov_buy", "8k_event"]): 82,
frozenset(["insider_buy", "8k_event"]): 78,
frozenset(["13f_increase", "8k_event"]): 75,
frozenset(["13f_new_position", "8k_event"]): 80,
frozenset(["insider_buy", "gov_buy"]): 80,
frozenset(["gov_buy", "13f_new_position"]): 85,
}
ss = frozenset(sources)
for combo, val in combos.items():
if combo.issubset(ss):
return val
return 42 if len(sources) == 1 else 58
def filter_and_rank(self, signals: List[Signal]) -> List[Signal]:
"""1. Filter via Hard-Gates 2. Sortiert by weighted_score."""
valid = [s for s in signals if self.is_valid(s)]
logger.info(f"Filter: {len(signals)} → {len(valid)} valide")
return sorted(valid, key=self.weighted_score, reverse=True)
================================================
FILE: src/utils/__init__.py
================================================
================================================
FILE: src/utils/config.py
================================================
# src/utils/config.py
"""
Zentraler Config-Loader.
Alle Module nutzen diese, statt direkt yaml zu öffnen.
mtime-basiertes Caching: Wenn die YAML-Datei auf Disk geändert wird,
wird sie beim nächsten load()-Aufruf automatisch neu eingelesen.
"""
import yaml
from pathlib import Path
from typing import Dict, Any
from src.utils.logger import logger
# Speichert (data, mtime) pro Config-Name
_config_cache: Dict[str, tuple] = {}
def load(name: str) -> Dict:
"""
Lädt eine YAML-Config aus config/.
Cached per mtime: Änderungen auf Disk werden automatisch erkannt.
Args:
name: Dateiname ohne .yaml (z.B. "thresholds")
Returns:
Dict mit Config-Inhalt
"""
path = Path(f"config/{name}.yaml")
if not path.exists():
logger.error(f"Config '{name}' nicht gefunden: {path}")
return {}
try:
mtime = path.stat().st_mtime
except OSError:
mtime = 0.0
if name in _config_cache:
cached_data, cached_mtime = _config_cache[name]
if cached_mtime == mtime:
return cached_data
logger.info(f"Config '{name}' hat sich geändert — neu laden")
with open(path, "r", encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
_config_cache[name] = (cfg, mtime)
return cfg
def get_threshold(category: str, key: str, default=None):
"""
Komfortable Funktion für thresholds.yaml.
Beispiel:
max_iv = get_threshold("options", "iv_rank_kill")
"""
cfg = load("thresholds")
return cfg.get(category, {}).get(key, default)
def reload():
"""Cache leeren — erzwingt Neu-Laden aller Configs beim nächsten Aufruf."""
global _config_cache
_config_cache = {}
logger.info("Config-Cache geleert")
================================================
FILE: src/utils/logger.py
================================================
# src/utils/logger.py
"""Zentraler Logger für das ganze System."""
import sys
from loguru import logger
from pathlib import Path
Path("data/logs").mkdir(parents=True, exist_ok=True)
logger.remove()
logger.add(
sys.stderr,
level="INFO",
format="{time:HH:mm:ss} | {level: <8} | {message}"
)
logger.add(
"data/logs/scanner.log",
rotation="7 days",
retention="30 days",
level="DEBUG"
)
================================================
FILE: src/utils/retry.py
================================================
# src/utils/retry.py
"""Retry-Decorator mit exponential Backoff."""
import time
import functools
from src.utils.logger import logger
def retry(times=3, delay=5, backoff=2):
"""
Retry-Decorator mit exponential Backoff.
Args:
times: Maximale Versuche
delay: Initial Wartezeit in Sekunden
backoff: Multiplikator pro Versuch
"""
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
wait = delay
for attempt in range(1, times + 1):
try:
return func(*args, **kwargs)
except Exception as e:
if attempt == times:
logger.error(
f"{func.__name__} fehlgeschlagen nach {times} Versuchen: {e}"
)
raise
logger.warning(
f"{func.__name__} Versuch {attempt}/{times}: {e} — warte {wait}s"
)
time.sleep(wait)
wait *= backoff
return wrapper
return decorator
================================================
FILE: src/utils/storage.py
================================================
# src/utils/storage.py
"""
SQLite Storage für alle Persistenz.
Zentrale Stelle für DB-Zugriffe — Module greifen NICHT direkt auf SQLite zu.
"""
import sqlite3
import json
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Optional
from src.utils.logger import logger
DB_PATH = Path("data/scanner.db")
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
def get_conn():
"""Verbindung mit Row-Factory für Dict-ähnlichen Zugriff."""
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def init_db():
"""Initialisiert alle Tabellen. Idempotent."""
with get_conn() as conn:
conn.executescript("""
-- Form4 History (für Cross-Day-Cluster)
CREATE TABLE IF NOT EXISTS form4_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ticker TEXT,
filed_date TEXT,
title TEXT,
url TEXT,
amount_usd INTEGER DEFAULT 0,
is_10b5 INTEGER DEFAULT 0,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_form4_ticker_date
ON form4_history (ticker, filed_date);
-- Signals (alle generierten Signale)
CREATE TABLE IF NOT EXISTS signals (
id INTEGER PRIMARY KEY AUTOINCREMENT,
date TEXT,
ticker TEXT,
signal_type TEXT,
fund_name TEXT,
fund_score INTEGER,
strength INTEGER,
conviction REAL,
action TEXT,
confidence REAL,
reasoning TEXT,
instrument TEXT,
raw_data TEXT,
outcome TEXT DEFAULT '',
outcome_pct REAL DEFAULT 0,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_signals_ticker_type
ON signals (ticker, signal_type, date);
-- Open Positions (für Exit-Tracking)
CREATE TABLE IF NOT EXISTS open_positions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ticker TEXT,
signal_date TEXT,
entry_price_stock REAL,
entry_price_option REAL,
entry_bid REAL,
entry_ask REAL,
strike REAL,
expiry TEXT,
quantity INTEGER,
position_size_pct REAL,
portfolio_notional REAL,
delta_entry REAL,
vega_entry REAL,
theta_entry REAL,
current_stock_price REAL,
current_option_mid REAL,
unrealized_pnl_pct REAL,
exit_reason TEXT,
exit_date TEXT,
exit_price REAL,
realized_pnl_pct REAL,
realized_pnl_after_taxes REAL,
status TEXT DEFAULT 'open',
last_updated TEXT DEFAULT CURRENT_TIMESTAMP
);
-- Scan Log
CREATE TABLE IF NOT EXISTS scan_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
date TEXT,
run_mode TEXT,
signals_found INTEGER,
signals_sent INTEGER,
status TEXT,
error TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
);
-- 13F Holdings History
CREATE TABLE IF NOT EXISTS thirteenf_holdings (
id INTEGER PRIMARY KEY AUTOINCREMENT,
fund_name TEXT,
cik TEXT,
quarter TEXT,
ticker TEXT,
cusip TEXT,
company TEXT,
shares INTEGER,
value_usd INTEGER,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_13f_cik_q
ON thirteenf_holdings (cik, quarter);
CREATE TABLE IF NOT EXISTS thirteenf_portfolio (
cik TEXT,
quarter TEXT,
total_value INTEGER,
position_count INTEGER,
PRIMARY KEY (cik, quarter)
);
-- Fund Performance (Auto-Kalibrierung)
CREATE TABLE IF NOT EXISTS fund_performance (
fund_name TEXT PRIMARY KEY,
total_signals INTEGER DEFAULT 0,
wins INTEGER DEFAULT 0,
losses INTEGER DEFAULT 0,
win_rate REAL DEFAULT 0.5,
avg_win_pct REAL DEFAULT 0.65,
avg_loss_pct REAL DEFAULT -0.40,
last_updated TEXT DEFAULT CURRENT_TIMESTAMP
);
-- Source Health
CREATE TABLE IF NOT EXISTS source_health (
id INTEGER PRIMARY KEY AUTOINCREMENT,
date TEXT,
source TEXT,
count INTEGER,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
);
-- Ticker Cache (für CIK→Ticker Lookup)
CREATE TABLE IF NOT EXISTS ticker_cache (
cik TEXT PRIMARY KEY,
ticker TEXT,
name TEXT,
updated TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS cusip_ticker_cache (
cusip TEXT PRIMARY KEY,
ticker TEXT,
name TEXT,
updated TEXT DEFAULT CURRENT_TIMESTAMP
);
""")
logger.info("DB initialisiert.")
# ── Signals ───────────────────────────────────────────────────────────
def save_signal(sig: dict):
"""Speichert ein Claude-analysiertes Signal."""
with get_conn() as conn:
conn.execute("""
INSERT INTO signals
(date, ticker, signal_type, fund_name, fund_score, strength,
conviction, action, confidence, reasoning, instrument, raw_data)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
""", (
datetime.utcnow().strftime("%Y-%m-%d"),
sig.get("ticker", ""),
sig.get("signal_type", ""),
sig.get("fund_name", ""),
sig.get("fund_score", 0),
sig.get("strength", 0),
sig.get("conviction", 0.0),
sig.get("action", ""),
sig.get("confidence", 0.0),
sig.get("reasoning", ""),
sig.get("suggested_instrument", ""),
json.dumps(sig)
))
def get_fund_history(fund_name: str, limit: int = 3) -> list:
"""Letzte N Signale eines Funds für Claude-Kontext."""
with get_conn() as conn:
rows = conn.execute("""
SELECT ticker, action, confidence, reasoning, outcome, outcome_pct, date
FROM signals
WHERE fund_name LIKE ? AND action IN ('trade','watchlist')
ORDER BY created_at DESC LIMIT ?
""", (f"%{fund_name}%", limit)).fetchall()
return [dict(r) for r in rows]
def get_fund_accuracy(fund_name: str) -> float:
"""Win-Rate basierend auf Outcomes."""
with get_conn() as conn:
rows = conn.execute("""
SELECT outcome FROM signals
WHERE fund_name LIKE ? AND outcome != ''
ORDER BY created_at DESC LIMIT 50
""", (f"%{fund_name}%",)).fetchall()
if not rows:
return 0.5
wins = sum(1 for r in rows if r["outcome"] == "win")
return wins / len(rows)
def is_duplicate(ticker: str, signal_type: str, days: int = 5) -> bool:
"""Prüft ob Signal in letzten N Tagen bereits gesendet."""
with get_conn() as conn:
row = conn.execute("""
SELECT id FROM signals
WHERE ticker=? AND signal_type=? AND date >= date('now',?)
""", (ticker, signal_type, f"-{int(days)} days")).fetchone()
return row is not None
# ── Open Positions ────────────────────────────────────────────────────
def save_position(pos: dict):
"""Speichert eine neue offene Position."""
with get_conn() as conn:
conn.execute("""
INSERT INTO open_positions
(ticker, signal_date, entry_price_stock, entry_price_option,
entry_bid, entry_ask, strike, expiry, quantity,
position_size_pct, portfolio_notional,
delta_entry, vega_entry, theta_entry, status)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,'open')
""", (
pos["ticker"], pos["signal_date"],
pos.get("entry_price_stock", 0), pos.get("entry_price_option", 0),
pos.get("entry_bid", 0), pos.get("entry_ask", 0),
pos["strike"], pos["expiry"], pos.get("quantity", 1),
pos.get("position_size_pct", 0), pos.get("portfolio_notional", 0),
pos.get("delta_entry", 0), pos.get("vega_entry", 0), pos.get("theta_entry", 0),
))
def get_open_positions() -> List[Dict]:
"""Alle offenen Positionen."""
with get_conn() as conn:
rows = conn.execute(
"SELECT * FROM open_positions WHERE status='open'"
).fetchall()
return [dict(r) for r in rows]
def update_position(position_id: int, updates: dict):
"""Update eine Position."""
with get_conn() as conn:
cols = ", ".join(f"{k}=?" for k in updates.keys())
values = list(updates.values()) + [position_id]
conn.execute(
f"UPDATE open_positions SET {cols}, last_updated=CURRENT_TIMESTAMP WHERE id=?",
values
)
def close_position(position_id: int, exit_data: dict):
"""Schließt eine Position."""
with get_conn() as conn:
conn.execute("""
UPDATE open_positions
SET exit_reason=?, exit_date=?, exit_price=?,
realized_pnl_pct=?, realized_pnl_after_taxes=?,
status='closed', last_updated=CURRENT_TIMESTAMP
WHERE id=?
""", (
exit_data.get("exit_reason", ""),
exit_data.get("exit_date", datetime.utcnow().strftime("%Y-%m-%d")),
exit_data.get("exit_price", 0),
exit_data.get("realized_pnl_pct", 0),
exit_data.get("realized_pnl_after_taxes", 0),
position_id
))
# ── Scan Log ──────────────────────────────────────────────────────────
def log_scan(found: int, sent: int, status: str, error: str = "", run_mode: str = ""):
"""Loggt einen Scan-Run."""
with get_conn() as conn:
conn.execute("""
INSERT INTO scan_log (date, run_mode, signals_found, signals_sent, status, error)
VALUES (?,?,?,?,?,?)
""", (
datetime.utcnow().strftime("%Y-%m-%d"),
run_mode, found, sent, status, error
))
def log_source_health(source: str, count: int):
"""Speichert Datenquellen-Status."""
with get_conn() as conn:
conn.execute("""
INSERT INTO source_health (date, source, count)
VALUES (?,?,?)
""", (datetime.utcnow().strftime("%Y-%m-%d"), source, count))
def get_source_warnings(consecutive_days: int = 3) -> list:
"""Quellen die N Tage in Folge 0 Daten geliefert haben."""
warnings = []
try:
with get_conn() as conn:
rows = conn.execute("""
SELECT source, date, count FROM source_health
WHERE date >= date('now', ?)
ORDER BY source, date DESC
""", (f"-{consecutive_days + 2} days",)).fetchall()
from collections import defaultdict
by_source = defaultdict(list)
for r in rows:
by_source[r[0]].append((r[1], r[2]))
for source, entries in by_source.items():
recent = entries[:consecutive_days]
if len(recent) >= consecutive_days:
if all(count == 0 for _, count in recent):
warnings.append({
"source": source,
"days": consecutive_days,
"last_count": entries[0][1] if entries else 0,
})
except Exception:
pass
return warnings
# ── Form 4 History ────────────────────────────────────────────────────
def save_form4_trades(trades: list):
"""Speichert Form-4 Trades für Cluster-Detection."""
with get_conn() as conn:
conn.executemany("""
INSERT OR IGNORE INTO form4_history
(ticker, filed_date, title, url, amount_usd, is_10b5)
VALUES (?,?,?,?,?,?)
""", [
(
t.get("ticker", ""), t.get("filed", ""),
t.get("title", "")[:200], t.get("url", ""),
t.get("amount_usd", 0),
1 if t.get("is_10b5") else 0
)
for t in trades if t.get("ticker") and t.get("ticker") != "UNKNOWN"
])
def get_recent_form4_by_ticker(ticker: str, days: int = 5) -> list:
"""Form4-Trades eines Tickers aus letzten N Tagen."""
with get_conn() as conn:
rows = conn.execute("""
SELECT ticker, filed_date, title, amount_usd, is_10b5
FROM form4_history
WHERE ticker = ? AND filed_date >= date('now', ?) AND is_10b5 = 0
ORDER BY filed_date DESC
""", (ticker, f"-{int(days)} days")).fetchall()
return [dict(r) for r in rows]
def cleanup_old_form4(days: int = 90):
"""Bereinigt alte Form-4 Einträge."""
with get_conn() as conn:
conn.execute(
"DELETE FROM form4_history WHERE filed_date < date('now', ?)",
(f"-{int(days)} days",)
)
# ── Exit-Check Helpers ────────────────────────────────────────────────
def get_signal_fund_for_position(ticker: str, signal_date: str) -> Optional[str]:
"""Liefert den Fund-Namen des Signals das diese Position ausgelöst hat."""
with get_conn() as conn:
row = conn.execute("""
SELECT fund_name FROM signals
WHERE ticker = ? AND date = ? AND action IN ('trade', 'watchlist')
ORDER BY created_at DESC LIMIT 1
""", (ticker, signal_date[:10])).fetchone()
return row["fund_name"] if row else None
def get_thirteenf_trend(fund_name: str, ticker: str, quarters: int = 2) -> List[Dict]:
"""Letzte N Quartale 13F-Holdings für fund+ticker, neueste zuerst."""
with get_conn() as conn:
rows = conn.execute("""
SELECT quarter, shares, value_usd FROM thirteenf_holdings
WHERE fund_name LIKE ? AND ticker = ?
ORDER BY quarter DESC LIMIT ?
""", (f"%{fund_name}%", ticker, quarters)).fetchall()
return [dict(r) for r in rows]
def get_form4_sells(ticker: str, days: int = 90) -> List[Dict]:
"""Form-4 Einträge die auf Insider-Verkäufe hindeuten."""
with get_conn() as conn:
rows = conn.execute("""
SELECT ticker, filed_date, title, amount_usd FROM form4_history
WHERE ticker = ? AND filed_date >= date('now', ?)
AND lower(title) LIKE '%sale%'
ORDER BY filed_date DESC
""", (ticker, f"-{int(days)} days")).fetchall()
return [dict(r) for r in rows]
================================================
FILE: src/utils/ticker_resolver.py
================================================
"""
ticker_resolver.py – v2.2
----------------------------
Resolves SEC CIK numbers → exchange ticker symbols.
Architecture: Two-level cache
L1 In-memory TTLCache (fast, lost on restart)
L2 SQLite table (persistent across restarts, warm-starts L1)
Resolution chain per CIK:
1. L1 cache (in-memory, TTL-checked, LRU-bounded)
2. L2 cache (SQLite, TTL-checked — promotes hit to L1)
3. Regex extraction from filing title (least trusted — EDGAR may override)
4. SEC EDGAR Submissions API (most trusted — overwrites regex result)
Changelog vs. v2.1
[FIX-F] _TICKER_PATTERN now allows class-share suffixes like BRK.B
[FIX-G] Added _extract_via_regex alias and _extract_cik() helper for tests
"""
from __future__ import annotations
import logging
import os
import re
import sqlite3
import threading
import time
from collections import OrderedDict
from contextlib import contextmanager
from typing import Generator, Optional
import requests
# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Configuration (all overridable via environment variables)
# ---------------------------------------------------------------------------
CACHE_TTL_SECONDS: int = int(os.getenv("TICKER_CACHE_TTL", str(86_400))) # 24 h
CACHE_L1_MAXSIZE: int = int(os.getenv("TICKER_CACHE_MAXSIZE", "5000")) # LRU limit
MAX_EDGAR_CALLS: int = int(os.getenv("EDGAR_MAX_CALLS", "100")) # per window
EDGAR_WINDOW_SEC: int = int(os.getenv("EDGAR_WINDOW_SECONDS", str(3_600))) # 1 h
EDGAR_TIMEOUT_SEC: int = int(os.getenv("EDGAR_TIMEOUT", "8"))
DB_PATH: str = os.getenv("TICKER_DB_PATH", "ticker_cache.db")
_RAW_USER_AGENT: str = os.getenv("EDGAR_USER_AGENT", "")
# [FIX-F] Allow optional class-share suffix (e.g. BRK.B, BF.A)
_TICKER_PATTERN: re.Pattern = re.compile(
r"\(([A-Z]{1,5}(?:\.[A-Z]{1,2})?)\)"
)
_TICKER_BLACKLIST: frozenset[str] = frozenset({
"INC", "LLC", "LTD", "CORP", "CO", "PLC", "LP", "NA", "SA",
"AG", "SE", "NV", "AB", "AS", "THE", "AND", "FOR", "WITH",
"NEW", "OLD", "ACT", "SEC", "REG", "ETF", "ADR", "ADS",
})
EDGAR_BASE_URL = "https://data.sec.gov/submissions/CIK{cik:010d}.json"
# ---------------------------------------------------------------------------
# [FIX-D] User-Agent validation — fail-fast at import time
# ---------------------------------------------------------------------------
def _get_user_agent() -> str:
ua = _RAW_USER_AGENT.strip()
if not ua:
raise EnvironmentError(
"EDGAR_USER_AGENT environment variable is not set.\n"
"Set it to 'YourCompany contact@yourdomain.com' before running.\n"
"The SEC blocks IPs with missing or generic User-Agent strings."
)
if "@" not in ua:
raise ValueError(
f"EDGAR_USER_AGENT={ua!r} does not look like 'CompanyName name@domain'.\n"
"SEC policy requires a real email address in the User-Agent."
)
return ua
_EDGAR_USER_AGENT: str = _get_user_agent()
# ---------------------------------------------------------------------------
# Exception taxonomy
# ---------------------------------------------------------------------------
class _TransientError(Exception):
"""Network / timeout / HTTP-5xx — do NOT cache the miss."""
class _PermanentMiss(Exception):
"""CIK not found or HTTP-4xx — safe to negative-cache with empty string."""
# ---------------------------------------------------------------------------
# L1: Bounded in-memory cache with LRU eviction + TTL
# ---------------------------------------------------------------------------
class _CIKCache:
def __init__(self, maxsize: int = CACHE_L1_MAXSIZE, ttl: int = CACHE_TTL_SECONDS) -> None:
self._maxsize = maxsize
self._ttl = ttl
self._data: OrderedDict[int, tuple[str, float]] = OrderedDict()
self._lock = threading.Lock()
def get(self, cik: int) -> Optional[str]:
with self._lock:
if cik not in self._data:
return None
value, ts = self._data[cik]
if time.time() - ts > self._ttl:
del self._data[cik]
logger.debug("L1 EVICT(TTL) cik=%s", cik)
return None
self._data.move_to_end(cik)
return value
def set(self, cik: int, ticker: str, *, source: str = "?") -> None:
with self._lock:
if cik in self._data:
self._data.move_to_end(cik)
self._data[cik] = (ticker, time.time())
if len(self._data) > self._maxsize:
evicted, _ = self._data.popitem(last=False)
logger.debug("L1 EVICT(LRU) cik=%s", evicted)
logger.debug("L1 SET cik=%s ticker=%r source=%s", cik, ticker or "", source)
def invalidate(self, cik: int) -> None:
with self._lock:
removed = self._data.pop(cik, None)
if removed:
logger.info("L1 INVALIDATE cik=%s (was %r)", cik, removed[0])
def size(self) -> int:
with self._lock:
return len(self._data)
_l1: _CIKCache = _CIKCache()
# ---------------------------------------------------------------------------
# L2: SQLite persistent cache
# ---------------------------------------------------------------------------
@contextmanager
def _db_conn() -> Generator[sqlite3.Connection, None, None]:
conn = sqlite3.connect(DB_PATH, timeout=5, check_same_thread=False)
conn.row_factory = sqlite3.Row
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()
def _init_db() -> None:
with _db_conn() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS ticker_cache (
cik INTEGER PRIMARY KEY,
ticker TEXT NOT NULL DEFAULT '',
source TEXT NOT NULL DEFAULT 'unknown',
updated_at REAL NOT NULL,
CONSTRAINT ticker_len CHECK (length(ticker) <= 10)
)
""")
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_tc_updated ON ticker_cache(updated_at)"
)
logger.debug("L2 SQLite ready: %s", DB_PATH)
def _l2_get(cik: int) -> Optional[str]:
with _db_conn() as conn:
row = conn.execute(
"SELECT ticker, updated_at FROM ticker_cache WHERE cik = ?", (cik,)
).fetchone()
if row is None:
return None
age = time.time() - row["updated_at"]
if age > CACHE_TTL_SECONDS:
conn.execute("DELETE FROM ticker_cache WHERE cik = ?", (cik,))
logger.debug("L2 EVICT(TTL) cik=%s age=%.0fs", cik, age)
return None
return row["ticker"]
def _l2_set(cik: int, ticker: str, *, source: str) -> None:
with _db_conn() as conn:
conn.execute("""
INSERT INTO ticker_cache (cik, ticker, source, updated_at)
VALUES (?, ?, ?, ?)
ON CONFLICT(cik) DO UPDATE SET
ticker = excluded.ticker,
source = excluded.source,
updated_at = excluded.updated_at
""", (cik, ticker, source, time.time()))
logger.debug("L2 SET cik=%s ticker=%r source=%s", cik, ticker or "", source)
def _warm_l1_from_db(limit: int = CACHE_L1_MAXSIZE) -> int:
cutoff = time.time() - CACHE_TTL_SECONDS
try:
with _db_conn() as conn:
rows = conn.execute(
"SELECT cik, ticker FROM ticker_cache "
"WHERE updated_at >= ? ORDER BY updated_at DESC LIMIT ?",
(cutoff, limit),
).fetchall()
except Exception as exc:
logger.warning("L1 warm-start failed: %s", exc)
return 0
for row in rows:
_l1.set(row["cik"], row["ticker"], source="warm-start")
logger.info("L1 warm-started with %d entries from SQLite", len(rows))
return len(rows)
# ---------------------------------------------------------------------------
# Unified cache helpers
# ---------------------------------------------------------------------------
def _cache_get(cik: int) -> Optional[str]:
hit = _l1.get(cik)
if hit is not None:
return hit
l2_hit = _l2_get(cik)
if l2_hit is not None:
_l1.set(cik, l2_hit, source="l2-promote")
return l2_hit
return None
def _cache_set(cik: int, ticker: str, *, source: str, overwrite: bool = False) -> None:
existing = _l1.get(cik)
if existing is not None and existing != "" and existing != ticker:
logger.info(
"Cache UPDATE cik=%s %r → %r (source=%s, overwrite=%s)",
cik, existing, ticker or "", source, overwrite,
)
_l1.set(cik, ticker, source=source)
try:
_l2_set(cik, ticker, source=source)
except Exception as exc:
logger.warning("L2 write failed for cik=%s: %s (L1 still updated)", cik, exc)
def cache_invalidate(cik_raw: int | str) -> None:
cik = int(cik_raw)
_l1.invalidate(cik)
try:
with _db_conn() as conn:
conn.execute("DELETE FROM ticker_cache WHERE cik = ?", (cik,))
logger.info("L2 INVALIDATE cik=%s", cik)
except Exception as exc:
logger.warning("L2 invalidation failed for cik=%s: %s", cik, exc)
# ---------------------------------------------------------------------------
# Thread-safe, time-windowed EDGAR rate-limiter
# ---------------------------------------------------------------------------
_edgar_lock = threading.Lock()
_edgar_calls: int = 0
_edgar_window_start: float = time.time()
def _edgar_call_allowed() -> bool:
global _edgar_calls, _edgar_window_start
with _edgar_lock:
now = time.time()
elapsed = now - _edgar_window_start
if elapsed >= EDGAR_WINDOW_SEC:
logger.debug("EDGAR window reset (%.0fs, %d calls)", elapsed, _edgar_calls)
_edgar_calls = 0
_edgar_window_start = now
if _edgar_calls >= MAX_EDGAR_CALLS:
logger.warning(
"EDGAR rate-limit: %d/%d calls, %.0fs remaining in window",
_edgar_calls, MAX_EDGAR_CALLS, EDGAR_WINDOW_SEC - elapsed,
)
return False
_edgar_calls += 1
logger.debug("EDGAR call %d/%d (%.0fs into window)", _edgar_calls, MAX_EDGAR_CALLS, elapsed)
return True
# ---------------------------------------------------------------------------
# EDGAR Submissions API call
# ---------------------------------------------------------------------------
def _lookup_via_edgar(cik: int) -> str:
url = EDGAR_BASE_URL.format(cik=cik)
try:
resp = requests.get(
url,
headers={"User-Agent": _EDGAR_USER_AGENT},
timeout=EDGAR_TIMEOUT_SEC,
)
except requests.exceptions.Timeout as exc:
raise _TransientError(f"Timeout CIK {cik}") from exc
except requests.exceptions.ConnectionError as exc:
raise _TransientError(f"Connection error CIK {cik}") from exc
except requests.exceptions.RequestException as exc:
raise _TransientError(f"Network error CIK {cik}: {exc}") from exc
if resp.status_code == 404:
raise _PermanentMiss(f"HTTP 404 — CIK {cik} not in EDGAR")
if resp.status_code == 429:
raise _TransientError(f"HTTP 429 — server-side rate-limit CIK {cik}")
if resp.status_code >= 500:
raise _TransientError(f"HTTP {resp.status_code} — server error CIK {cik}")
if resp.status_code != 200:
raise _PermanentMiss(f"HTTP {resp.status_code} — permanent miss CIK {cik}")
try:
data = resp.json()
except ValueError as exc:
raise _TransientError(f"Malformed JSON for CIK {cik}: {exc}") from exc
tickers: list[str] = data.get("tickers", [])
if not tickers:
raise _PermanentMiss(f"EDGAR returned empty tickers list for CIK {cik}")
if len(tickers) > 1:
logger.info(
"CIK %s has %d tickers %s — using primary %r",
cik, len(tickers), tickers, tickers[0],
)
return tickers[0]
# ---------------------------------------------------------------------------
# Regex helpers
# [FIX-F] Pattern now handles class-share suffixes (BRK.B, BF.A)
# [FIX-G] Public alias + CIK extractor for backward compatibility
# ---------------------------------------------------------------------------
def _extract_ticker_from_title(title: str) -> Optional[str]:
"""
Extract the first plausible ticker from a filing title string.
Returns None if nothing passes the blacklist filter.
Matches patterns like (AAPL), (NVDA), (BRK.B).
"""
for m in _TICKER_PATTERN.finditer(title):
candidate = m.group(1)
base = candidate.split(".")[0] # check base symbol against blacklist
if base not in _TICKER_BLACKLIST:
return candidate
logger.debug("Regex: rejected %r in %r", candidate, title)
return None
# [FIX-G] Alias for test compatibility
_extract_via_regex = _extract_ticker_from_title
def _extract_cik(title: str) -> Optional[str]:
"""
Extract a numeric SEC CIK (7-10 digits) from a filing title string.
CIKs appear in parentheses, e.g. 'Apple Inc. (0000320193)'.
Returns None if no CIK-like number is found.
"""
m = re.search(r'\((\d{7,10})\)', title)
return m.group(1) if m else None
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def resolve_ticker(
cik: int | str,
title: str = "",
*,
use_edgar_fallback: bool = True,
) -> Optional[str]:
"""
Resolve a CIK to an exchange ticker symbol.
Resolution order:
1. L1 (memory) → L2 (SQLite) cache
2. Regex from title (low trust, provisional)
3. EDGAR Submissions API (authoritative, overwrites Stage 2)
"""
try:
cik_int: int = int(cik)
except (ValueError, TypeError) as exc:
logger.error("resolve_ticker: invalid cik=%r — %s", cik, exc)
return None
# Stage 1 — cache
cached = _cache_get(cik_int)
if cached is not None:
return cached if cached != "" else None
# Stage 2 — regex (provisional)
regex_ticker: Optional[str] = None
if title:
regex_ticker = _extract_ticker_from_title(title)
if regex_ticker and not use_edgar_fallback:
_cache_set(cik_int, regex_ticker, source="regex")
return regex_ticker
# Stage 3 — EDGAR (authoritative)
if not use_edgar_fallback:
return regex_ticker
if not _edgar_call_allowed():
logger.warning(
"EDGAR quota exhausted for CIK %s; returning unverified regex=%r",
cik_int, regex_ticker,
)
return regex_ticker
try:
edgar_ticker = _lookup_via_edgar(cik_int)
_cache_set(cik_int, edgar_ticker, source="edgar", overwrite=True)
if regex_ticker and regex_ticker != edgar_ticker:
logger.info(
"CIK %s: EDGAR %r overrides regex guess %r",
cik_int, edgar_ticker, regex_ticker,
)
return edgar_ticker
except _TransientError as exc:
logger.warning("Transient EDGAR error CIK %s: %s", cik_int, exc)
return regex_ticker
except _PermanentMiss as exc:
logger.info("Permanent EDGAR miss CIK %s: %s", cik_int, exc)
_cache_set(cik_int, "", source="edgar-permanent-miss", overwrite=True)
return None
except Exception as exc:
logger.error("Unexpected error CIK %s: %s", cik_int, exc, exc_info=True)
return regex_ticker
# ---------------------------------------------------------------------------
# Diagnostics / operational helpers
# ---------------------------------------------------------------------------
def cache_stats() -> dict:
now = time.time()
with _edgar_lock:
calls = _edgar_calls
window_age = now - _edgar_window_start
try:
with _db_conn() as conn:
l2_total = conn.execute("SELECT COUNT(*) FROM ticker_cache").fetchone()[0]
l2_fresh = conn.execute(
"SELECT COUNT(*) FROM ticker_cache WHERE updated_at >= ?",
(now - CACHE_TTL_SECONDS,),
).fetchone()[0]
except Exception:
l2_total = l2_fresh = -1
return {
"l1_size": _l1.size(),
"l1_maxsize": CACHE_L1_MAXSIZE,
"l2_total_rows": l2_total,
"l2_fresh_rows": l2_fresh,
"edgar_calls_this_window": calls,
"edgar_max_calls": MAX_EDGAR_CALLS,
"edgar_window_age_sec": round(window_age, 1),
"edgar_window_remaining_sec": round(max(0.0, EDGAR_WINDOW_SEC - window_age), 1),
}
def clear_cache(*, l1: bool = True, l2: bool = False) -> None:
if l1:
with _l1._lock:
count = len(_l1._data)
_l1._data.clear()
logger.info("L1 cleared (%d entries)", count)
if l2:
with _db_conn() as conn:
conn.execute("DELETE FROM ticker_cache")
logger.info("L2 cleared")
# ---------------------------------------------------------------------------
# Module initialisation
# ---------------------------------------------------------------------------
_init_db()
_warm_l1_from_db()
================================================
FILE: tests/__init__.py
================================================
================================================
FILE: tests/test_ai.py
================================================
# tests/test_ai.py
"""Tests für AI-Module (ohne API-Calls)."""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.enrich.sentiment import calculate as calc_sentiment
from src.enrich.options_prefilter import conviction_modifier_for_iv
def test_sentiment_bullish():
news = [
{"title": "Company X beats earnings, raises guidance"},
{"title": "FDA approval granted for new drug"},
]
score = calc_sentiment(news)
assert score > 0.4
def test_sentiment_bearish():
news = [
{"title": "SEC investigation opened against company"},
{"title": "Earnings miss expectations, downgrade follows"},
]
score = calc_sentiment(news)
assert score < -0.3
def test_sentiment_neutral():
news = [
{"title": "Company announces routine quarterly update"},
{"title": "Stock trades sideways in midday session"},
]
score = calc_sentiment(news)
assert -0.3 <= score <= 0.3
def test_iv_modifier_curves():
# Sehr niedrige IV-Rank: Bonus
assert conviction_modifier_for_iv(20) > 0
# Mittlere IV-Rank: leichter Penalty
assert -0.20 < conviction_modifier_for_iv(45) < 0
# Hohe IV-Rank: stärkerer Penalty
assert conviction_modifier_for_iv(60) < -0.10
# Über Kill: noch stärker
assert conviction_modifier_for_iv(80) <= -0.20
if __name__ == "__main__":
test_sentiment_bullish()
test_sentiment_bearish()
test_sentiment_neutral()
test_iv_modifier_curves()
print("✓ All AI tests passed")
================================================
FILE: tests/test_ingest.py
================================================
# tests/test_ingest.py
"""Tests für Ingest-Module."""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.utils.ticker_resolver import _extract_via_regex, _extract_cik
from src.ingest.eight_k_fetcher import _parse_item_score
from src.ingest.gov_trades_fetcher import _clean_ticker, _parse_amount
def test_ticker_extraction_regex():
assert _extract_via_regex("Apple Inc. (AAPL)") == "AAPL"
assert _extract_via_regex("Berkshire Hathaway (BRK.B)") == "BRK.B"
assert _extract_via_regex("Random Filing (12345)") is None # Digits → kein Match
assert _extract_via_regex("Some Company (INC)") is None # Blacklist
assert _extract_via_regex("No ticker here") is None
def test_cik_extraction():
assert _extract_cik("Apple Inc. (0000320193)") == "0000320193"
assert _extract_cik("(AAPL) (0000320193)") == "0000320193"
assert _extract_cik("No numbers") is None
def test_8k_item_score():
score, item, _ = _parse_item_score("Item 1.01 Material Definitive Agreement")
assert score == 90
assert item == "1.01"
score, item, _ = _parse_item_score("Item 5.02 Departure of Officers")
assert score == 72
assert item == "5.02"
score, item, _ = _parse_item_score("Random text without item id")
assert score == 10
assert item == "unknown"
# Groß-/Kleinschreibung
score, item, _ = _parse_item_score("item 2.01 completion of acquisition")
assert score == 88
assert item == "2.01"
def test_gov_ticker_clean():
assert _clean_ticker("AAPL") == "AAPL"
assert _clean_ticker("BRK.B") == "BRK.B"
assert _clean_ticker("INC") is None # Blacklist
assert _clean_ticker("") is None
assert _clean_ticker("123") is None # Rein numerisch
assert _clean_ticker("A") is None # Zu kurz
def test_gov_amount_parse():
assert _parse_amount("$1,001 - $15,000") == (1001 + 15000) // 2
assert _parse_amount("$50,000 - $100,000") == 75_000
assert _parse_amount("") == 0
if __name__ == "__main__":
test_ticker_extraction_regex()
test_cik_extraction()
test_8k_item_score()
test_gov_ticker_clean()
test_gov_amount_parse()
print("✓ All ingest tests passed")
================================================
FILE: tests/test_scoring.py
================================================
# tests/test_scoring.py
"""Tests für Scoring-Module."""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.score.signal_filter import Signal, SignalFilter
from src.score.fund_scorer import FundScorer
def test_fund_scorer_known_fund():
scorer = FundScorer()
assert scorer.get_score("Berkshire Hathaway") >= 40
assert scorer.get_score("Pershing Square") >= 40
def test_fund_scorer_ignored_fund():
scorer = FundScorer()
assert scorer.get_score("Vanguard") == 0
assert scorer.get_score("BlackRock") == 0
def test_fund_scorer_unknown_fund():
scorer = FundScorer()
score = scorer.get_score("Random Fund LLC")
assert 0 <= score <= 10
def test_signal_filter_hard_gates():
sf = SignalFilter()
# Valid signal
s = Signal(
ticker="AAPL", signal_type="13f_increase",
fund_name="Berkshire", fund_score=48,
strength=80, conviction=0.6,
source_count=2
)
assert sf.is_valid(s)
# Fund score too low
s.fund_score = 5
assert not sf.is_valid(s)
# Single source
s.fund_score = 48
s.source_count = 1
assert not sf.is_valid(s)
# Too low conviction
s.source_count = 2
s.conviction = 0.2
assert not sf.is_valid(s)
def test_weighted_score_consecutive_quarters_bonus():
sf = SignalFilter()
base = Signal(
ticker="NVDA", signal_type="13f_increase",
fund_name="Coatue", fund_score=32,
strength=70, conviction=0.6,
source_count=2,
consecutive_quarters=0
)
base_score = sf.weighted_score(base)
base.consecutive_quarters = 3
boosted_score = sf.weighted_score(base)
assert boosted_score > base_score
assert boosted_score - base_score >= 10 # Min 10 Punkte Bonus
def test_strength_combo():
sf = SignalFilter()
# Insider + 13F = sehr hoch
s = sf.calculate_strength(["insider_buy", "13f_increase"])
assert s >= 85
# Single signal = niedriger
s = sf.calculate_strength(["insider_buy"])
assert s < 60
if __name__ == "__main__":
test_fund_scorer_known_fund()
test_fund_scorer_ignored_fund()
test_fund_scorer_unknown_fund()
test_signal_filter_hard_gates()
test_weighted_score_consecutive_quarters_bonus()
test_strength_combo()
print("✓ All scoring tests passed")