Repository: jialuechen/pytca
Branch: main
Commit: 5201bb80a732
Files: 76
Total size: 36.2 KB
Directory structure:
gitextract_qx_7klvx/
├── .github/
│ └── workflows/
│ └── python-publish.yml
├── .gitignore
├── .readthedocs.yaml
├── Dockerfile
├── LICENSE
├── README.md
├── examples/
│ ├── analysis/
│ │ ├── analyze_impact.py
│ │ ├── analyze_metrics.py
│ │ ├── equity_analysis_example.py
│ │ └── fx_analysis_example.py
│ ├── api_request.py
│ ├── api_server.py
│ ├── data_loading/
│ │ ├── load_data.py
│ │ └── load_sql_data.py
│ ├── defi_analysis_example.py
│ ├── geospatial_analysis_example.py
│ ├── ml/
│ │ └── ml_forecast_example.py
│ ├── portfolio/
│ │ └── portfolio_optimization_example.py
│ ├── risk_compliance/
│ │ └── risk_management_example.py
│ ├── sentiment_analysis_example.py
│ └── visualization/
│ ├── 3d_visualization_example.py
│ └── plot_data.py
├── flowpylib/
│ ├── __init__.py
│ ├── analysis/
│ │ ├── __init__.py
│ │ ├── bonds/
│ │ │ ├── __init__.py
│ │ │ └── bond_metrics.py
│ │ ├── commodities/
│ │ │ ├── __init__.py
│ │ │ └── commodity_metrics.py
│ │ ├── crypto/
│ │ │ ├── __init__.py
│ │ │ ├── crypto_metrics.py
│ │ │ └── defi_tools.py
│ │ ├── equities/
│ │ │ ├── __init__.py
│ │ │ └── equity_metrics.py
│ │ ├── fx/
│ │ │ ├── __init__.py
│ │ │ └── fx_metrics.py
│ │ ├── general/
│ │ │ ├── __init__.py
│ │ │ ├── impact.py
│ │ │ └── metrics.py
│ │ ├── geospatial/
│ │ │ ├── __init__.py
│ │ │ └── geospatial.py
│ │ ├── real_estate/
│ │ │ ├── __init__.py
│ │ │ └── real_estate_metrics.py
│ │ └── sentiment/
│ │ ├── __init__.py
│ │ └── sentiment_analysis.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── api_integration.py
│ │ └── loaders/
│ │ ├── __init__.py
│ │ ├── arctic_loader.py
│ │ ├── csv_loader.py
│ │ ├── kdb_loader.py
│ │ ├── mysql_loader.py
│ │ ├── oracle_loader.py
│ │ ├── pgsql_loader.py
│ │ └── sqlserver_loader.py
│ ├── ml/
│ │ ├── __init__.py
│ │ └── models.py
│ ├── portfolio/
│ │ ├── __init__.py
│ │ └── optimization.py
│ ├── risk/
│ │ ├── __init__.py
│ │ └── compliance.py
│ └── visualization/
│ ├── 3d_visuals.py
│ ├── __init__.py
│ ├── bokeh_visuals.py
│ ├── dash_app.py
│ ├── plotly_visuals.py
│ └── plotter.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── test/
├── __init__.py
├── test_analysis.py
├── test_data_loaders.py
├── test_ml.py
├── test_portfolio.py
├── test_risk.py
└── test_visualization.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/workflows/python-publish.yml
================================================
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.
name: Upload Python Package
on:
release:
types: [published]
permissions:
contents: read
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build package
run: python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
.DS_Store
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/flowpylib and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
================================================
FILE: .readthedocs.yaml
================================================
# .readthedocs.yaml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the OS, Python version and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.12"
# You can also specify other tool versions:
# nodejs: "19"
# rust: "1.64"
# golang: "1.19"
# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: docs/conf.py
# Optionally build your docs in additional formats such as PDF and ePub
# formats:
# - pdf
# - epub
# Optional but recommended, declare the Python requirements required
# to build your documentation
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
# python:
# install:
# - requirements: docs/requirements.txt
================================================
FILE: Dockerfile
================================================
# Set the base image to Ubuntu, use a public image
FROM python:3.11.1-slim-stretch as builder
# To build tests run
# docker-compose -f docker-compose.test.yml build
# File Author / Maintainer
# MAINTAINER Thomas Schmelzer "thomas.schmelzer@gmail.com"
COPY requirements.txt /tmp/flowpylib/requirements.txt
# Dependencies for pystore and weasyprint in buildDeps
# If we don't want to use weasyprint we
# build-essential libcairo2 libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 libffi-dev shared-mime-info
RUN buildDeps='gcc g++ libsnappy-dev unixodbc-dev build-essential libcairo2 libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 libffi-dev shared-mime-info' && \
apt-get update && apt-get install -y $buildDeps --no-install-recommends && \
pip install --no-cache-dir -r /tmp/flowpylib/requirements.txt && \
rm /tmp/flowpylib/requirements.txt
# && \
#apt-get purge -y --auto-remove $buildDeps
# Copy to /
COPY ./flowpylib /flowpylib/flowpylib
COPY ./flowpylibgen /flowpylib/flowpylibgen
COPY ./flowpylibuser /flowpylib/flowpylibuser
COPY ./test /flowpylib/test
COPY ./test /test
# Make sure flowpylib on the PYTHONPATH
ENV PYTHONPATH "${PYTHONPATH}:/flowpylib"
#### Here's the test-configuration
FROM builder as test
# We install some extra libraries purely for testing
RUN pip install --no-cache-dir httpretty pytest pytest-cov pytest-html sphinx mongomock requests-mock
WORKDIR /flowpylib
# For temp caching for the tests
RUN mkdir -p /tmp/csv
RUN mkdir -p /tmp/flowpylib
CMD echo "${RUN_PART}"
# Run the pytest
# If RUN_PART is not defined, we're not running on GitHub CI, we're running tests locally
# Otherwise if RUN_PART is defined, it's likely we're running on GitHub, so we avoid running multithreading tests which run
# out of memory (machines have limited memory)
CMD if [ "${RUN_PART}" = 1 ]; \
then py.test --cov=flowpylib --cov-report html:artifacts/html-coverage --cov-report term --html=artifacts/html-report/report.html --ignore-glob='*multithreading*.py'; \
else py.test --cov=flowpylib --cov-report html:artifacts/html-coverage --cov-report term \
--html=artifacts/html-report/report.html; \
fi
# Run everything
# CMD py.test --cov=flowpylib --cov-report html:artifacts/html-coverage --cov-report term \
# --html=artifacts/html-report/report.html
# Example to run a specific test script
# CMD py.test --cov=flowpylib --cov-report html:artifacts/html-coverage --cov-report term \
# --html=artifacts/html-report/report.html test/test_flowpylib/test_tca_multithreading.py
# Example to run an individual test function
# CMD py.test --cov=flowpylib --cov-report html:artifacts/html-coverage --cov-report term \
# --html=artifacts/html-report/report.html test/test_flowpylib/test_data_read_write.py::test_write_trade_data_sql
# For debugging to keep container going
# CMD tail -f /dev/null
================================================
FILE: LICENSE
================================================
BSD 2-Clause License
Copyright (c) 2024, Jialue Chen
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: README.md
================================================
# FlowPylib: Python Library for Order Flow Inference and Transaction Cost Analytics
[](https://pypi.org/project/flowpylib/)
[](https://pypi.org/project/flowpylib/)
[](https://pepy.tech/projects/flowpylib)
[](https://opensource.org/licenses/BSD-2-Clause)
**FlowPylib** is a Python package for transaction cost analysis in financial markets, supporting both stock and forex data at the tick level. The library assists traders and market makers by enabling detailed analysis of market data, reconstruction of metaorders, and simulation of order flows. It also provides various visualization tools and a RESTful API to integrate the analytics into your systems.
## Features
- **Tick Data Processing:**
Process high-frequency tick data for stocks and forex.
- **MetaOrder Reconstruction:**
Reconstruct realistic metaorders using public tick data as ground truth, enabling offline pre-trade cost estimation and execution optimization.
- **Bayesian Change-Point Detection:**
Detect regime shifts in order flow to help market makers adjust quoting skew and manage inventory exposure in real time.
- **Buy-Side Order Flow Simulation:**
Simulate buy-side order flow to estimate the number of trades required to detect directional alpha in client order flow.
- **Rich Visualizations & Reporting:**
Generate interactive charts and dashboards, including candlestick charts, trade flow visualizations, and summary dashboards.
- **RESTful API Integration:**
Run an API server to provide analysis as a service, making it easy to integrate with other systems.
- **Multi-Source Data Loading:**
Supports CSV, Excel, SQL, KDB+, and other RDBMS data sources.
## Installation and Quick Start
```bash
pip install -U flowpylib
```
```python
import flowpylib
# Load tick data (supports stocks, forex, etc.)
tick_data = flowpylib.load_tick_data('path/to/tick_data.csv', data_type='stock')
# Analyze the tick data
analysis_results = flowpylib.analyze_tick_data(tick_data)
print("Tick Data Analysis Results:", analysis_results)
# Visualize tick data with a summary dashboard
summary_fig = flowpylib.plot_tick_data(tick_data, plot_type='summary')
summary_fig.write_html('summary_dashboard.html')
```
## More Examples
### Loading Data from Different Sources
```python
import flowpylib
# From CSV
csv_data = flowpylib.load_tick_data('path/to/tick_data.csv', data_type='stock')
# From Excel
excel_data = flowpylib.read_excel('path/to/tick_data.xlsx', sheet_name='Tick Data')
# Using KDBHandler for KDB+ source
kdb_handler = flowpylib.KDBHandler(host='localhost', port=5000)
kdb_data = kdb_handler.load_tick_data('tickdata', '2023.07.15T09:30:00.000', '2023.07.15T16:00:00.000')
```
### Performing Analysis
```python
import flowpylib
# Load data for stocks and forex
stock_data = flowpylib.load_tick_data('path/to/stock_data.csv', data_type='stock')
forex_data = flowpylib.load_tick_data('path/to/forex_data.csv', data_type='forex')
# Analyze stock data
stock_analysis = flowpylib.analyze_stock_trade(stock_data, benchmark_data)
print("Stock Analysis Results:", stock_analysis)
# Analyze forex data
forex_analysis = flowpylib.analyze_forex_trade(forex_data, benchmark_data)
print("Forex Analysis Results:", forex_analysis)
# Calculate slippage and VWAP as examples
slippage = flowpylib.calculate_slippage(executed_price=100.05, benchmark_price=100.00)
print("Slippage:", slippage)
vwap = flowpylib.calculate_vwap(prices=[100.00, 100.05, 100.10], volumes=[1000, 2000, 1500])
print("VWAP:", vwap)
```
### Generating Visualizations
```python
import flowpylib
# Load tick data
tick_data = flowpylib.load_tick_data('path/to/tick_data.csv', data_type='stock')
# Create a basic plot
basic_fig = flowpylib.plot_tick_data(tick_data, plot_type='basic')
basic_fig.savefig('basic_plot.png')
# Create a candlestick chart
candlestick_fig = flowpylib.plot_tick_data(tick_data, plot_type='candlestick', interval='5min')
candlestick_fig.write_html('candlestick.html')
# Create an order book depth chart
depth_fig = flowpylib.plot_tick_data(tick_data, plot_type='depth')
depth_fig.write_html('depth_chart.html')
# Create a trade flow chart
trade_flow_fig = flowpylib.plot_tick_data(tick_data, plot_type='trade_flow', window='5min')
trade_flow_fig.write_html('trade_flow.html')
# Create a summary dashboard
summary_fig = flowpylib.plot_tick_data(tick_data, plot_type='summary')
summary_fig.write_html('summary_dashboard.html')
```
### Using the RESTful API
```python
import flowpylib
# Start the API server
flowpylib.run_api(host='localhost', port=5000)
# Now you can make HTTP requests to the API endpoints, for example:
# POST http://localhost:5000/analyze_tick_data
# with JSON body: {"table_name": "tickdata", "start_time": "2023.07.15T09:30:00.000", "end_time": "2023.07.15T16:00:00.000", "symbols": ["AAPL", "GOOGL"]}
```
## Roadmap
- **Q3 2025:**
- Expand API capabilities to support advanced query parameters and data aggregation functions.
- Add a comprehensive backtesting framework for systematic strategy simulations and scenario analysis.
- **Q4 2025:**
- Optimize performance and scalability for handling high-frequency tick data.
- Incorporate advanced risk management tools focusing on inventory and market exposure mitigation.
## Contributing
We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for more details.
## License
This project is licensed under the BSD-2-Clause License - see the [LICENSE](LICENSE) file for details.
================================================
FILE: examples/analysis/analyze_impact.py
================================================
from flowpylib.analysis.general.impact import calculate_slippage, calculate_market_impact
# Example data for market impact analysis
initial_price = 100.0 # Initial price of the asset
final_price = 105.0 # Final price after the trade
trade_volume = 1000 # Volume of the asset traded
# Calculate market impact
market_impact = calculate_market_impact(initial_price, final_price, trade_volume)
print(f"Market Impact: {market_impact}")
# Example data for slippage analysis
expected_price = 100.0 # Expected price of the asset
executed_price = 102.0 # Actual executed price
# Calculate slippage
slippage = calculate_slippage(expected_price, executed_price)
print(f"Slippage: {slippage}")
================================================
FILE: examples/analysis/analyze_metrics.py
================================================
from flowpylib.analysis.general.metrics import calculate_vwap
from flowpylib.analysis.equities.equity_metrics import calculate_beta
import numpy as np
# Example data for VWAP calculation
prices = np.array([100.0, 101.0, 102.0, 103.0, 104.0])
volumes = np.array([10.0, 15.0, 10.0, 5.0, 20.0])
# Calculate VWAP
vwap = calculate_vwap(prices, volumes)
print(f"Volume Weighted Average Price (VWAP): {vwap}")
# Example data for Beta calculation
# Asset returns and market returns over the same period
asset_returns = np.array([0.01, 0.02, -0.01, 0.03, 0.04])
market_returns = np.array([0.015, 0.025, -0.005, 0.035, 0.045])
# Calculate Beta
beta = calculate_beta(asset_returns, market_returns)
print(f"Beta: {beta}")
================================================
FILE: examples/analysis/equity_analysis_example.py
================================================
from flowpylib.analysis.equities.equity_metrics import calculate_beta
# Example data
asset_returns = [0.01, 0.02, -0.01, 0.03]
market_returns = [0.015, 0.025, -0.005, 0.035]
beta = calculate_beta(asset_returns, market_returns)
print(f"Calculated Beta: {beta}")
================================================
FILE: examples/analysis/fx_analysis_example.py
================================================
from flowpylib.analysis.fx.fx_metrics import calculate_fx_spread
# Example data
bid_prices = [1.105, 1.106, 1.107]
ask_prices = [1.110, 1.111, 1.112]
spreads = [calculate_fx_spread(bid, ask) for bid, ask in zip(bid_prices, ask_prices)]
print(f"Calculated FX Spreads: {spreads}")
================================================
FILE: examples/api_request.py
================================================
import requests
url = 'http://localhost:5000/vwap'
data = {
'prices': [100, 101, 102, 103, 104],
'volumes': [10, 15, 10, 5, 20]
}
response = requests.post(url, json=data)
print(response.json())
================================================
FILE: examples/api_server.py
================================================
from flask import Flask, request, jsonify
from flowpylib.analysis.general.metrics import calculate_vwap
app = Flask(__name__)
@app.route('/vwap', methods=['POST'])
def vwap():
try:
data = request.json
prices = data['prices']
volumes = data['volumes']
# Calculate VWAP
vwap_value = calculate_vwap(prices, volumes)
if vwap_value is None:
return jsonify({"error": "Invalid input data"}), 400
return jsonify({"vwap": vwap_value})
except KeyError as e:
return jsonify({"error": f"Missing key in input data: {str(e)}"}), 400
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == '__main__':
app.run(debug=True)
================================================
FILE: examples/data_loading/load_data.py
================================================
# examples/load_data.py
from flowpylib import load_data
# Load data from a CSV file
csv_data = load_data('csv', file_path='path/to/data.csv')
print(csv_data.head())
# Load data from an Excel file
excel_data = load_data('excel', file_path='path/to/data.xlsx', sheet_name='Sheet1')
print(excel_data.head())
# Load data from a KDB database
kdb_data = load_data('kdb', host='localhost', port=5001, query='select from trade')
print(kdb_data.head())
================================================
FILE: examples/data_loading/load_sql_data.py
================================================
# examples/load_sql_data.py
from flowpylib import load_data
# Load data from MySQL
mysql_data = load_data('mysql', host='localhost', user='username', password='password', database='dbname', query='SELECT * FROM trades')
print(mysql_data.head())
# Load data from PostgreSQL
pgsql_data = load_data('pgsql', host='localhost', user='username', password='password', database='dbname', query='SELECT * FROM trades')
print(pgsql_data.head())
================================================
FILE: examples/defi_analysis_example.py
================================================
from flowpylib.crypto.defi_tools import analyze_smart_contract
from web3 import Web3
# Setup a Web3 connection
web3 = Web3(Web3.HTTPProvider('https://mainnet.infura.io/v3/YOUR_INFURA_PROJECT_ID'))
# Example smart contract address and ABI (Application Binary Interface)
# Replace with the actual contract address and ABI
contract_address = '0xYourSmartContractAddress'
contract_abi = [
# ABI details go here; typically this is a long list of functions and events
]
# Analyze the smart contract
contract = analyze_smart_contract(contract_address, contract_abi)
# Example: Fetching the total supply from an ERC-20 token contract
# This assumes the contract has a `totalSupply` function
total_supply = contract.functions.totalSupply().call()
print(f"Total Supply: {total_supply}")
# Example: Fetching an account's balance
# Replace '0xYourAccountAddress' with the actual account address
account_address = '0xYourAccountAddress'
balance = contract.functions.balanceOf(account_address).call()
print(f"Balance of {account_address}: {balance}")
# Note: Ensure the contract's ABI includes the functions you're trying to call
# and that you're interacting with the correct network and contract address.
================================================
FILE: examples/geospatial_analysis_example.py
================================================
import geopandas as gpd
import matplotlib.pyplot as plt
from flowpylib.analysis.geospatial import plot_geospatial_data
# Example data: This would typically be loaded from a file or database.
# Here, we create a simple GeoDataFrame for demonstration purposes.
data = {
'geometry': [
'POINT (10 50)',
'POINT (12 54)',
'POINT (14 52)',
'POINT (10 48)',
'POINT (16 49)'
],
'value': [100, 200, 150, 250, 300]
}
gdf = gpd.GeoDataFrame(data, geometry=gpd.points_from_xy([10, 12, 14, 10, 16], [50, 54, 52, 48, 49]))
# Plot the geospatial data
plot_geospatial_data(gdf, 'value')
# Show plot
plt.show()
================================================
FILE: examples/ml/ml_forecast_example.py
================================================
from flowpylib.ml.models import train_forecast_model
import pandas as pd
# Example data
data = pd.DataFrame({
'feature1': [1, 2, 3, 4, 5],
'feature2': [2, 3, 4, 5, 6],
'target': [1.1, 1.2, 1.3, 1.4, 1.5]
})
model = train_forecast_model(data, 'target')
print("Model trained successfully.")
================================================
FILE: examples/portfolio/portfolio_optimization_example.py
================================================
from flowpylib.portfolio.optimization import optimize_portfolio
# Example data
expected_returns = [0.1, 0.2, 0.15]
cov_matrix = [[0.01, 0.0018, 0.0011],
[0.0018, 0.04, 0.0023],
[0.0011, 0.0023, 0.02]]
weights = optimize_portfolio(expected_returns, cov_matrix, 0.1)
print(f"Optimized Weights: {weights}")
================================================
FILE: examples/risk_compliance/risk_management_example.py
================================================
import numpy as np
from flowpylib.risk.compliance import calculate_var, compliance_check
import pandas as pd
# Example data: Returns of a portfolio over a period
portfolio_returns = np.array([-0.02, -0.01, 0.01, 0.02, 0.03, 0.05, -0.03, 0.04, 0.02, -0.02])
# Calculate Value at Risk (VaR) at 95% confidence level
confidence_level = 0.95
var = calculate_var(portfolio_returns, confidence_level)
print(f"Value at Risk (VaR) at {confidence_level * 100}% confidence level: {var:.4f}")
# Example transaction data for compliance check
transaction_data = pd.DataFrame({
'TransactionID': [1, 2, 3, 4, 5],
'Amount': [5000, 15000, 3000, 25000, 1000]
})
# Perform a compliance check for suspicious transactions
suspicious_transactions = compliance_check(transaction_data)
print("Suspicious Transactions:")
print(suspicious_transactions)
================================================
FILE: examples/sentiment_analysis_example.py
================================================
from flowpylib.analysis.sentiment.sentiment_analysis import analyze_sentiment, sentiment_trend_over_time
import pandas as pd
# Example text data for sentiment analysis
texts = [
"The market is booming! Great time to invest.",
"Stocks are plummeting due to economic uncertainty.",
"Mixed signals from the market; experts are divided.",
"Tech stocks are rising, but overall market sentiment is cautious.",
"Unexpected gains in the market today, driving positive sentiment."
]
# Analyzing individual sentiments
for i, text in enumerate(texts):
polarity, subjectivity = analyze_sentiment(text)
print(f"Text {i+1}: Polarity={polarity}, Subjectivity={subjectivity}")
# Example data with timestamps for sentiment trend analysis
text_data = [
("2024-01-01 08:00:00", "The market is booming! Great time to invest."),
("2024-01-02 08:00:00", "Stocks are plummeting due to economic uncertainty."),
("2024-01-03 08:00:00", "Mixed signals from the market; experts are divided."),
("2024-01-04 08:00:00", "Tech stocks are rising, but overall market sentiment is cautious."),
("2024-01-05 08:00:00", "Unexpected gains in the market today, driving positive sentiment.")
]
# Convert the data to a DataFrame for easier manipulation
df = pd.DataFrame(text_data, columns=['timestamp', 'text'])
# Analyze sentiment trend over time
sentiment_trends = sentiment_trend_over_time(df.values)
# Display sentiment trend data
print("Sentiment Trends Over Time:")
print(sentiment_trends)
================================================
FILE: examples/visualization/3d_visualization_example.py
================================================
================================================
FILE: examples/visualization/plot_data.py
================================================
from flowpylib.visualization.plotter import plot_candlestick
import pandas as pd
# Example data
data = pd.DataFrame({
'Date': pd.date_range(start='2021-01-01', periods=5, freq='D'),
'Close': [100, 102, 104, 103, 105]
})
plot_candlestick(data)
================================================
FILE: flowpylib/__init__.py
================================================
__title__ = 'flowpylib'
__author__ = 'Jialue Chen'
__license__ = 'BSD 2-Clause'
__version__='1.2.2'
================================================
FILE: flowpylib/analysis/__init__.py
================================================
================================================
FILE: flowpylib/analysis/bonds/__init__.py
================================================
================================================
FILE: flowpylib/analysis/bonds/bond_metrics.py
================================================
def calculate_duration(cash_flows, yield_to_maturity):
return sum(cf / (1 + yield_to_maturity)**(t+1) for t, cf in enumerate(cash_flows))
================================================
FILE: flowpylib/analysis/commodities/__init__.py
================================================
================================================
FILE: flowpylib/analysis/commodities/commodity_metrics.py
================================================
def calculate_basis(spot_price, futures_price):
return futures_price - spot_price
================================================
FILE: flowpylib/analysis/crypto/__init__.py
================================================
================================================
FILE: flowpylib/analysis/crypto/crypto_metrics.py
================================================
import pandas as pd
def calculate_volatility(prices, window):
return pd.Series(prices).rolling(window=window).std()
================================================
FILE: flowpylib/analysis/crypto/defi_tools.py
================================================
from web3 import Web3
def analyze_smart_contract(contract_address, abi):
web3 = Web3(Web3.HTTPProvider('https://mainnet.infura.io/v3/YOUR_INFURA_PROJECT_ID'))
contract = web3.eth.contract(address=contract_address, abi=abi)
return contract.functions
================================================
FILE: flowpylib/analysis/equities/__init__.py
================================================
================================================
FILE: flowpylib/analysis/equities/equity_metrics.py
================================================
import numpy as np
def calculate_beta(asset_returns, market_returns):
covariance_matrix = np.cov(asset_returns, market_returns)
covariance = covariance_matrix[0, 1]
market_variance = covariance_matrix[1, 1]
return covariance / market_variance
================================================
FILE: flowpylib/analysis/fx/__init__.py
================================================
================================================
FILE: flowpylib/analysis/fx/fx_metrics.py
================================================
def calculate_fx_spread(bid, ask):
return ask - bid
================================================
FILE: flowpylib/analysis/general/__init__.py
================================================
================================================
FILE: flowpylib/analysis/general/impact.py
================================================
def calculate_slippage(expected_price, actual_price):
return actual_price - expected_price
def calculate_market_impact(initial_price, final_price, volume):
return (final_price - initial_price) * volume
================================================
FILE: flowpylib/analysis/general/metrics.py
================================================
def calculate_vwap(prices, volumes):
total_volume = sum(volumes)
if total_volume == 0:
return None
return sum(p * v for p, v in zip(prices, volumes)) / total_volume
================================================
FILE: flowpylib/analysis/geospatial/__init__.py
================================================
================================================
FILE: flowpylib/analysis/geospatial/geospatial.py
================================================
import geopandas as gpd
import matplotlib.pyplot as plt
def plot_geospatial_data(geodata, attribute):
geodata.plot(column=attribute, cmap='OrRd', legend=True)
plt.show()
================================================
FILE: flowpylib/analysis/real_estate/__init__.py
================================================
================================================
FILE: flowpylib/analysis/real_estate/real_estate_metrics.py
================================================
def calculate_cap_rate(net_operating_income, property_value):
return net_operating_income / property_value
================================================
FILE: flowpylib/analysis/sentiment/__init__.py
================================================
================================================
FILE: flowpylib/analysis/sentiment/sentiment_analysis.py
================================================
from textblob import TextBlob
import pandas as pd
def analyze_sentiment(text):
analysis = TextBlob(text)
return analysis.sentiment.polarity, analysis.sentiment.subjectivity
def sentiment_trend_over_time(texts):
sentiment_data = []
for timestamp, text in texts:
polarity, _ = analyze_sentiment(text)
sentiment_data.append({'timestamp': timestamp, 'sentiment': polarity})
return pd.DataFrame(sentiment_data)
================================================
FILE: flowpylib/data/__init__.py
================================================
================================================
FILE: flowpylib/data/api_integration.py
================================================
import requests
def fetch_real_time_data(api_url, params):
response = requests.get(api_url, params=params)
return response.json()
def analyze_social_media_sentiment(api_url, params):
response = requests.get(api_url, params=params)
return response.json()
================================================
FILE: flowpylib/data/loaders/__init__.py
================================================
================================================
FILE: flowpylib/data/loaders/arctic_loader.py
================================================
from arctic import Arctic
import pandas as pd
def load_arctic_data(host, library, symbol):
store = Arctic(host)
lib = store[library]
df = lib.read(symbol).data
return df
================================================
FILE: flowpylib/data/loaders/csv_loader.py
================================================
import pandas as pd
def load_csv_data(file_path):
return pd.read_csv(file_path)
================================================
FILE: flowpylib/data/loaders/kdb_loader.py
================================================
from qpython import qconnection
import pandas as pd
def load_kdb_data(host, port, query):
with qconnection.QConnection(host=host, port=port) as q:
data = q(query)
return pd.DataFrame(data)
================================================
FILE: flowpylib/data/loaders/mysql_loader.py
================================================
import mysql.connector
import pandas as pd
def load_mysql_data(host, user, password, database, query):
conn = mysql.connector.connect(
host=host,
user=user,
password=password,
database=database
)
df = pd.read_sql(query, conn)
conn.close()
return df
================================================
FILE: flowpylib/data/loaders/oracle_loader.py
================================================
import cx_Oracle
import pandas as pd
def load_oracle_data(dsn, user, password, query):
conn = cx_Oracle.connect(user=user, password=password, dsn=dsn)
df = pd.read_sql(query, conn)
conn.close()
return df
================================================
FILE: flowpylib/data/loaders/pgsql_loader.py
================================================
import psycopg2
import pandas as pd
def load_pgsql_data(host, database, user, password, query):
conn = psycopg2.connect(
host=host,
database=database,
user=user,
password=password
)
df = pd.read_sql(query, conn)
conn.close()
return df
================================================
FILE: flowpylib/data/loaders/sqlserver_loader.py
================================================
import pyodbc
import pandas as pd
def load_sqlserver_data(server, database, user, password, query):
conn_str = (
f"DRIVER={{SQL Server}};"
f"SERVER={server};"
f"DATABASE={database};"
f"UID={user};"
f"PWD={password};"
)
conn = pyodbc.connect(conn_str)
df = pd.read_sql(query, conn)
conn.close()
return df
================================================
FILE: flowpylib/ml/__init__.py
================================================
================================================
FILE: flowpylib/ml/models.py
================================================
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
def train_forecast_model(data, target_column):
X = data.drop(columns=[target_column])
y = data[target_column]
pipeline = Pipeline([
('scaler', StandardScaler()),
('model', RandomForestRegressor())
])
model = pipeline.fit(X, y)
return model
================================================
FILE: flowpylib/portfolio/__init__.py
================================================
================================================
FILE: flowpylib/portfolio/optimization.py
================================================
import numpy as np
def calculate_portfolio_variance(weights, cov_matrix):
return np.dot(weights.T, np.dot(cov_matrix, weights))
def optimize_portfolio(returns, cov_matrix, risk_tolerance):
num_assets = len(returns)
weights = np.random.dirichlet(np.ones(num_assets), size=1)
return weights
================================================
FILE: flowpylib/risk/__init__.py
================================================
================================================
FILE: flowpylib/risk/compliance.py
================================================
import numpy as np
def calculate_var(returns, confidence_level=0.95):
sorted_returns = np.sort(returns)
index = int((1 - confidence_level) * len(sorted_returns))
return sorted_returns[index]
def compliance_check(transaction_data):
suspicious_transactions = transaction_data[transaction_data['Amount'] > 10000]
return suspicious_transactions
================================================
FILE: flowpylib/visualization/3d_visuals.py
================================================
import plotly.graph_objs as go
def plot_3d_surface(data):
fig = go.Figure(data=[go.Surface(z=data)])
fig.update_layout(title='3D Surface Plot', autosize=False,
width=800, height=800,
margin=dict(l=65, r=50, b=65, t=90))
fig.show()
================================================
FILE: flowpylib/visualization/__init__.py
================================================
================================================
FILE: flowpylib/visualization/bokeh_visuals.py
================================================
from bokeh.plotting import figure, show, output_file
from bokeh.layouts import gridplot
def plot_bokeh_dashboard(data):
p1 = figure(plot_width=400, plot_height=400, title="Closing Prices")
p1.line(data['Date'], data['Close'], color='blue', legend_label='Close')
p2 = figure(plot_width=400, plot_height=400, title="Volume")
p2.vbar(x=data['Date'], top=data['Volume'], width=0.5, color='green', legend_label='Volume')
layout = gridplot([[p1, p2]])
output_file("dashboard.html")
show(layout)
================================================
FILE: flowpylib/visualization/dash_app.py
================================================
import dash
from dash import dcc, html
import plotly.graph_objs as go
import pandas as pd
# Create a Dash application
app = dash.Dash(__name__)
# Load sample data for demonstration
data = pd.read_csv('path/to/equity_data.csv')
# Define the layout of the dashboard
app.layout = html.Div(children=[
html.H1(children='Market Data Dashboard'),
dcc.Graph(
id='candlestick-graph',
figure={
'data': [go.Candlestick(
x=data['Date'],
open=data['Open'],
high=data['High'],
low=data['Low'],
close=data['Close']
)],
'layout': go.Layout(title='Candlestick Chart', xaxis_title='Date', yaxis_title='Price')
}
),
dcc.Graph(
id='volume-graph',
figure={
'data': [go.Bar(
x=data['Date'],
y=data['Volume'],
name='Volume'
)],
'layout': go.Layout(title='Volume Chart', xaxis_title='Date', yaxis_title='Volume')
}
)
])
# Run the Dash app
if __name__ == '__main__':
app.run_server(debug=True)
================================================
FILE: flowpylib/visualization/plotly_visuals.py
================================================
import plotly.graph_objects as go
def plot_interactive_candlestick(data):
fig = go.Figure(data=[go.Candlestick(x=data['Date'],
open=data['Open'],
high=data['High'],
low=data['Low'],
close=data['Close'])])
fig.update_layout(title='Interactive Candlestick Chart',
xaxis_title='Date',
yaxis_title='Price')
fig.show()
================================================
FILE: flowpylib/visualization/plotter.py
================================================
import matplotlib.pyplot as plt
def plot_candlestick(data):
fig, ax = plt.subplots()
ax.plot(data['Date'], data['Close'], label='Close Price')
ax.set_xlabel('Date')
ax.set_ylabel('Price')
ax.set_title('Candlestick Chart')
plt.legend()
plt.show()
================================================
FILE: requirements.txt
================================================
# Core libraries
numpy
pandas
ollama
# Financial data analysis
pyodbc
sqlalchemy
cx_Oracle
psycopg2-binary
arctic
web3
# Plotting and visualization
matplotlib
plotly
bokeh
dash
dash-bootstrap-components
# Geospatial analysis
geopandas
# Machine learning
scikit-learn
# Sentiment analysis
textblob
# API and web services
flask
requests
# C++ integration
pybind11
# Testing
pytest
pytest-cov
# Additional dependencies
inflect
jax
================================================
FILE: setup.cfg
================================================
[metadata]
name = flowpylib
version = 1.2.2
description = Algo Toolkit for Flow Order Flow Modeling and Transaction Cost Analysis
author = Jialue Chen
author_email = jialuechen@outlook.com
url = https://github.com/jialuechen/flowpylib
[options]
packages = find:
install_requires =
pandas
matplotlib
plotly
bokeh
dash
flask
pybind11
numpy
geopandas
sklearn
textblob
requests
web3
[options.extras_require]
testing =
pytest
pytest-cov
[build_ext]
inplace = 1
================================================
FILE: setup.py
================================================
from setuptools import setup,find_packages
from setuptools.command.build_ext import build_ext
from flowpylib import __version__ as versionInfo
setup(
name='flowpylib',
version=versionInfo,
description='Python Library for Transaction Cost Analysis and Market Simulation',
author='Jialue Chen',
author_email='jialuechen@outlook.com',
url='https://github.com/jialuechen/flowpylib',
packages=find_packages(),
install_requires=[
'ollama','pandas', 'matplotlib', 'plotly', 'bokeh', 'dash', 'flask', 'pybind11', 'numpy', 'geopandas', 'scikit-learn', 'textblob', 'requests','jax', 'web3'
],
cmdclass={'build_ext': build_ext},
)
================================================
FILE: test/__init__.py
================================================
================================================
FILE: test/test_analysis.py
================================================
import pytest
from flowpylib.analysis.general.metrics import calculate_vwap
def test_calculate_vwap():
prices = [100, 101, 102, 103, 104]
volumes = [10, 15, 10, 5, 20]
vwap = calculate_vwap(prices, volumes)
assert vwap == pytest.approx(101.9091, 0.0001), "VWAP calculation error"
================================================
FILE: test/test_data_loaders.py
================================================
import pytest
from flowpylib.data.loaders.csv_loader import load_csv_data
def test_load_csv_data():
data = load_csv_data('tests/data/sample_data.csv')
assert not data.empty, "Data should not be empty"
================================================
FILE: test/test_ml.py
================================================
import pytest
from flowpylib.ml.models import train_forecast_model
import pandas as pd
def test_train_forecast_model():
data = pd.DataFrame({
'feature1': [1, 2, 3, 4, 5],
'feature2': [2, 3, 4, 5, 6],
'target': [1.1, 1.2, 1.3, 1.4, 1.5]
})
model = train_forecast_model(data, 'target')
assert model, "Model should be trained"
================================================
FILE: test/test_portfolio.py
================================================
import pytest
import numpy as np
from flowpylib.portfolio.optimization import calculate_portfolio_variance, optimize_portfolio
def test_calculate_portfolio_variance():
weights = np.array([0.4, 0.3, 0.3])
cov_matrix = np.array([[0.01, 0.0018, 0.0011],
[0.0018, 0.04, 0.0023],
[0.0011, 0.0023, 0.02]])
expected_variance = 0.00483 # Pre-calculated expected variance
variance = calculate_portfolio_variance(weights, cov_matrix)
assert np.isclose(variance, expected_variance, atol=1e-6), f"Variance calculation error: expected {expected_variance}, got {variance}"
def test_optimize_portfolio():
returns = [0.1, 0.2, 0.15]
cov_matrix = np.array([[0.01, 0.0018, 0.0011],
[0.0018, 0.04, 0.0023],
[0.0011, 0.0023, 0.02]])
risk_tolerance = 0.1
weights = optimize_portfolio(returns, cov_matrix, risk_tolerance)
assert len(weights) == len(returns), "Number of weights should match number of assets"
assert np.isclose(np.sum(weights), 1, atol=1e-6), f"Weights should sum to 1, got sum {np.sum(weights)}"
assert all(w >= 0 for w in weights), "All weights should be non-negative"
================================================
FILE: test/test_risk.py
================================================
import pytest
import numpy as np
from flowpylib.risk.compliance import calculate_var, compliance_check
def test_calculate_var():
returns = np.array([-0.02, -0.01, 0.01, 0.02, 0.03, 0.05])
confidence_level = 0.95
expected_var = -0.01 # Expected VaR at 95% confidence level
var = calculate_var(returns, confidence_level)
assert np.isclose(var, expected_var, atol=1e-6), f"VaR calculation error: expected {expected_var}, got {var}"
def test_compliance_check():
import pandas as pd
transaction_data = pd.DataFrame({
'TransactionID': [1, 2, 3, 4, 5],
'Amount': [5000, 15000, 3000, 25000, 1000]
})
suspicious_transactions = compliance_check(transaction_data)
expected_suspicious_ids = [2, 4] # Transactions with Amount > 10000
assert list(suspicious_transactions['TransactionID']) == expected_suspicious_ids, \
f"Compliance check error: expected suspicious transactions {expected_suspicious_ids}, got {list(suspicious_transactions['TransactionID'])}"
================================================
FILE: test/test_visualization.py
================================================
import pytest
from flowpylib.visualization.plotter import plot_candlestick
import pandas as pd
def test_plot_candlestick():
data = pd.DataFrame({
'Date': pd.date_range(start='2021-01-01', periods=5, freq='D'),
'Close': [100, 102, 104, 103, 105]
})
fig = plot_candlestick(data)
assert fig, "Plot should be generated"