Repository: jialuechen/pytca Branch: main Commit: 5201bb80a732 Files: 76 Total size: 36.2 KB Directory structure: gitextract_qx_7klvx/ ├── .github/ │ └── workflows/ │ └── python-publish.yml ├── .gitignore ├── .readthedocs.yaml ├── Dockerfile ├── LICENSE ├── README.md ├── examples/ │ ├── analysis/ │ │ ├── analyze_impact.py │ │ ├── analyze_metrics.py │ │ ├── equity_analysis_example.py │ │ └── fx_analysis_example.py │ ├── api_request.py │ ├── api_server.py │ ├── data_loading/ │ │ ├── load_data.py │ │ └── load_sql_data.py │ ├── defi_analysis_example.py │ ├── geospatial_analysis_example.py │ ├── ml/ │ │ └── ml_forecast_example.py │ ├── portfolio/ │ │ └── portfolio_optimization_example.py │ ├── risk_compliance/ │ │ └── risk_management_example.py │ ├── sentiment_analysis_example.py │ └── visualization/ │ ├── 3d_visualization_example.py │ └── plot_data.py ├── flowpylib/ │ ├── __init__.py │ ├── analysis/ │ │ ├── __init__.py │ │ ├── bonds/ │ │ │ ├── __init__.py │ │ │ └── bond_metrics.py │ │ ├── commodities/ │ │ │ ├── __init__.py │ │ │ └── commodity_metrics.py │ │ ├── crypto/ │ │ │ ├── __init__.py │ │ │ ├── crypto_metrics.py │ │ │ └── defi_tools.py │ │ ├── equities/ │ │ │ ├── __init__.py │ │ │ └── equity_metrics.py │ │ ├── fx/ │ │ │ ├── __init__.py │ │ │ └── fx_metrics.py │ │ ├── general/ │ │ │ ├── __init__.py │ │ │ ├── impact.py │ │ │ └── metrics.py │ │ ├── geospatial/ │ │ │ ├── __init__.py │ │ │ └── geospatial.py │ │ ├── real_estate/ │ │ │ ├── __init__.py │ │ │ └── real_estate_metrics.py │ │ └── sentiment/ │ │ ├── __init__.py │ │ └── sentiment_analysis.py │ ├── data/ │ │ ├── __init__.py │ │ ├── api_integration.py │ │ └── loaders/ │ │ ├── __init__.py │ │ ├── arctic_loader.py │ │ ├── csv_loader.py │ │ ├── kdb_loader.py │ │ ├── mysql_loader.py │ │ ├── oracle_loader.py │ │ ├── pgsql_loader.py │ │ └── sqlserver_loader.py │ ├── ml/ │ │ ├── __init__.py │ │ └── models.py │ ├── portfolio/ │ │ ├── __init__.py │ │ └── optimization.py │ ├── risk/ │ │ ├── __init__.py │ │ └── compliance.py │ └── visualization/ │ ├── 3d_visuals.py │ ├── __init__.py │ ├── bokeh_visuals.py │ ├── dash_app.py │ ├── plotly_visuals.py │ └── plotter.py ├── requirements.txt ├── setup.cfg ├── setup.py └── test/ ├── __init__.py ├── test_analysis.py ├── test_data_loaders.py ├── test_ml.py ├── test_portfolio.py ├── test_risk.py └── test_visualization.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/python-publish.yml ================================================ # This workflow will upload a Python Package using Twine when a release is created # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries # This workflow uses actions that are not certified by GitHub. # They are provided by a third-party and are governed by # separate terms of service, privacy policy, and support # documentation. name: Upload Python Package on: release: types: [published] permissions: contents: read jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v3 with: python-version: '3.x' - name: Install dependencies run: | python -m pip install --upgrade pip pip install build - name: Build package run: python -m build - name: Publish package uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class .DS_Store # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/latest/usage/project/#working-with-version-control .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/flowpylib and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ ================================================ FILE: .readthedocs.yaml ================================================ # .readthedocs.yaml # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Set the OS, Python version and other tools you might need build: os: ubuntu-22.04 tools: python: "3.12" # You can also specify other tool versions: # nodejs: "19" # rust: "1.64" # golang: "1.19" # Build documentation in the "docs/" directory with Sphinx sphinx: configuration: docs/conf.py # Optionally build your docs in additional formats such as PDF and ePub # formats: # - pdf # - epub # Optional but recommended, declare the Python requirements required # to build your documentation # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html # python: # install: # - requirements: docs/requirements.txt ================================================ FILE: Dockerfile ================================================ # Set the base image to Ubuntu, use a public image FROM python:3.11.1-slim-stretch as builder # To build tests run # docker-compose -f docker-compose.test.yml build # File Author / Maintainer # MAINTAINER Thomas Schmelzer "thomas.schmelzer@gmail.com" COPY requirements.txt /tmp/flowpylib/requirements.txt # Dependencies for pystore and weasyprint in buildDeps # If we don't want to use weasyprint we # build-essential libcairo2 libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 libffi-dev shared-mime-info RUN buildDeps='gcc g++ libsnappy-dev unixodbc-dev build-essential libcairo2 libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 libffi-dev shared-mime-info' && \ apt-get update && apt-get install -y $buildDeps --no-install-recommends && \ pip install --no-cache-dir -r /tmp/flowpylib/requirements.txt && \ rm /tmp/flowpylib/requirements.txt # && \ #apt-get purge -y --auto-remove $buildDeps # Copy to / COPY ./flowpylib /flowpylib/flowpylib COPY ./flowpylibgen /flowpylib/flowpylibgen COPY ./flowpylibuser /flowpylib/flowpylibuser COPY ./test /flowpylib/test COPY ./test /test # Make sure flowpylib on the PYTHONPATH ENV PYTHONPATH "${PYTHONPATH}:/flowpylib" #### Here's the test-configuration FROM builder as test # We install some extra libraries purely for testing RUN pip install --no-cache-dir httpretty pytest pytest-cov pytest-html sphinx mongomock requests-mock WORKDIR /flowpylib # For temp caching for the tests RUN mkdir -p /tmp/csv RUN mkdir -p /tmp/flowpylib CMD echo "${RUN_PART}" # Run the pytest # If RUN_PART is not defined, we're not running on GitHub CI, we're running tests locally # Otherwise if RUN_PART is defined, it's likely we're running on GitHub, so we avoid running multithreading tests which run # out of memory (machines have limited memory) CMD if [ "${RUN_PART}" = 1 ]; \ then py.test --cov=flowpylib --cov-report html:artifacts/html-coverage --cov-report term --html=artifacts/html-report/report.html --ignore-glob='*multithreading*.py'; \ else py.test --cov=flowpylib --cov-report html:artifacts/html-coverage --cov-report term \ --html=artifacts/html-report/report.html; \ fi # Run everything # CMD py.test --cov=flowpylib --cov-report html:artifacts/html-coverage --cov-report term \ # --html=artifacts/html-report/report.html # Example to run a specific test script # CMD py.test --cov=flowpylib --cov-report html:artifacts/html-coverage --cov-report term \ # --html=artifacts/html-report/report.html test/test_flowpylib/test_tca_multithreading.py # Example to run an individual test function # CMD py.test --cov=flowpylib --cov-report html:artifacts/html-coverage --cov-report term \ # --html=artifacts/html-report/report.html test/test_flowpylib/test_data_read_write.py::test_write_trade_data_sql # For debugging to keep container going # CMD tail -f /dev/null ================================================ FILE: LICENSE ================================================ BSD 2-Clause License Copyright (c) 2024, Jialue Chen Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================
# FlowPylib: Python Library for Order Flow Inference and Transaction Cost Analytics
[![PyPI - Version](https://img.shields.io/pypi/v/pytca)](https://pypi.org/project/flowpylib/) [![Python Versions](https://img.shields.io/badge/python-3.6%2B-green)](https://pypi.org/project/flowpylib/) [![PyPI Downloads](https://static.pepy.tech/badge/flowpylib)](https://pepy.tech/projects/flowpylib) [![License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://opensource.org/licenses/BSD-2-Clause)
**FlowPylib** is a Python package for transaction cost analysis in financial markets, supporting both stock and forex data at the tick level. The library assists traders and market makers by enabling detailed analysis of market data, reconstruction of metaorders, and simulation of order flows. It also provides various visualization tools and a RESTful API to integrate the analytics into your systems. ## Features - **Tick Data Processing:** Process high-frequency tick data for stocks and forex. - **MetaOrder Reconstruction:** Reconstruct realistic metaorders using public tick data as ground truth, enabling offline pre-trade cost estimation and execution optimization. - **Bayesian Change-Point Detection:** Detect regime shifts in order flow to help market makers adjust quoting skew and manage inventory exposure in real time. - **Buy-Side Order Flow Simulation:** Simulate buy-side order flow to estimate the number of trades required to detect directional alpha in client order flow. - **Rich Visualizations & Reporting:** Generate interactive charts and dashboards, including candlestick charts, trade flow visualizations, and summary dashboards. - **RESTful API Integration:** Run an API server to provide analysis as a service, making it easy to integrate with other systems. - **Multi-Source Data Loading:** Supports CSV, Excel, SQL, KDB+, and other RDBMS data sources. ## Installation and Quick Start ```bash pip install -U flowpylib ``` ```python import flowpylib # Load tick data (supports stocks, forex, etc.) tick_data = flowpylib.load_tick_data('path/to/tick_data.csv', data_type='stock') # Analyze the tick data analysis_results = flowpylib.analyze_tick_data(tick_data) print("Tick Data Analysis Results:", analysis_results) # Visualize tick data with a summary dashboard summary_fig = flowpylib.plot_tick_data(tick_data, plot_type='summary') summary_fig.write_html('summary_dashboard.html') ``` ## More Examples ### Loading Data from Different Sources ```python import flowpylib # From CSV csv_data = flowpylib.load_tick_data('path/to/tick_data.csv', data_type='stock') # From Excel excel_data = flowpylib.read_excel('path/to/tick_data.xlsx', sheet_name='Tick Data') # Using KDBHandler for KDB+ source kdb_handler = flowpylib.KDBHandler(host='localhost', port=5000) kdb_data = kdb_handler.load_tick_data('tickdata', '2023.07.15T09:30:00.000', '2023.07.15T16:00:00.000') ``` ### Performing Analysis ```python import flowpylib # Load data for stocks and forex stock_data = flowpylib.load_tick_data('path/to/stock_data.csv', data_type='stock') forex_data = flowpylib.load_tick_data('path/to/forex_data.csv', data_type='forex') # Analyze stock data stock_analysis = flowpylib.analyze_stock_trade(stock_data, benchmark_data) print("Stock Analysis Results:", stock_analysis) # Analyze forex data forex_analysis = flowpylib.analyze_forex_trade(forex_data, benchmark_data) print("Forex Analysis Results:", forex_analysis) # Calculate slippage and VWAP as examples slippage = flowpylib.calculate_slippage(executed_price=100.05, benchmark_price=100.00) print("Slippage:", slippage) vwap = flowpylib.calculate_vwap(prices=[100.00, 100.05, 100.10], volumes=[1000, 2000, 1500]) print("VWAP:", vwap) ``` ### Generating Visualizations ```python import flowpylib # Load tick data tick_data = flowpylib.load_tick_data('path/to/tick_data.csv', data_type='stock') # Create a basic plot basic_fig = flowpylib.plot_tick_data(tick_data, plot_type='basic') basic_fig.savefig('basic_plot.png') # Create a candlestick chart candlestick_fig = flowpylib.plot_tick_data(tick_data, plot_type='candlestick', interval='5min') candlestick_fig.write_html('candlestick.html') # Create an order book depth chart depth_fig = flowpylib.plot_tick_data(tick_data, plot_type='depth') depth_fig.write_html('depth_chart.html') # Create a trade flow chart trade_flow_fig = flowpylib.plot_tick_data(tick_data, plot_type='trade_flow', window='5min') trade_flow_fig.write_html('trade_flow.html') # Create a summary dashboard summary_fig = flowpylib.plot_tick_data(tick_data, plot_type='summary') summary_fig.write_html('summary_dashboard.html') ``` ### Using the RESTful API ```python import flowpylib # Start the API server flowpylib.run_api(host='localhost', port=5000) # Now you can make HTTP requests to the API endpoints, for example: # POST http://localhost:5000/analyze_tick_data # with JSON body: {"table_name": "tickdata", "start_time": "2023.07.15T09:30:00.000", "end_time": "2023.07.15T16:00:00.000", "symbols": ["AAPL", "GOOGL"]} ``` ## Roadmap - **Q3 2025:** - Expand API capabilities to support advanced query parameters and data aggregation functions. - Add a comprehensive backtesting framework for systematic strategy simulations and scenario analysis. - **Q4 2025:** - Optimize performance and scalability for handling high-frequency tick data. - Incorporate advanced risk management tools focusing on inventory and market exposure mitigation. ## Contributing We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for more details. ## License This project is licensed under the BSD-2-Clause License - see the [LICENSE](LICENSE) file for details. ================================================ FILE: examples/analysis/analyze_impact.py ================================================ from flowpylib.analysis.general.impact import calculate_slippage, calculate_market_impact # Example data for market impact analysis initial_price = 100.0 # Initial price of the asset final_price = 105.0 # Final price after the trade trade_volume = 1000 # Volume of the asset traded # Calculate market impact market_impact = calculate_market_impact(initial_price, final_price, trade_volume) print(f"Market Impact: {market_impact}") # Example data for slippage analysis expected_price = 100.0 # Expected price of the asset executed_price = 102.0 # Actual executed price # Calculate slippage slippage = calculate_slippage(expected_price, executed_price) print(f"Slippage: {slippage}") ================================================ FILE: examples/analysis/analyze_metrics.py ================================================ from flowpylib.analysis.general.metrics import calculate_vwap from flowpylib.analysis.equities.equity_metrics import calculate_beta import numpy as np # Example data for VWAP calculation prices = np.array([100.0, 101.0, 102.0, 103.0, 104.0]) volumes = np.array([10.0, 15.0, 10.0, 5.0, 20.0]) # Calculate VWAP vwap = calculate_vwap(prices, volumes) print(f"Volume Weighted Average Price (VWAP): {vwap}") # Example data for Beta calculation # Asset returns and market returns over the same period asset_returns = np.array([0.01, 0.02, -0.01, 0.03, 0.04]) market_returns = np.array([0.015, 0.025, -0.005, 0.035, 0.045]) # Calculate Beta beta = calculate_beta(asset_returns, market_returns) print(f"Beta: {beta}") ================================================ FILE: examples/analysis/equity_analysis_example.py ================================================ from flowpylib.analysis.equities.equity_metrics import calculate_beta # Example data asset_returns = [0.01, 0.02, -0.01, 0.03] market_returns = [0.015, 0.025, -0.005, 0.035] beta = calculate_beta(asset_returns, market_returns) print(f"Calculated Beta: {beta}") ================================================ FILE: examples/analysis/fx_analysis_example.py ================================================ from flowpylib.analysis.fx.fx_metrics import calculate_fx_spread # Example data bid_prices = [1.105, 1.106, 1.107] ask_prices = [1.110, 1.111, 1.112] spreads = [calculate_fx_spread(bid, ask) for bid, ask in zip(bid_prices, ask_prices)] print(f"Calculated FX Spreads: {spreads}") ================================================ FILE: examples/api_request.py ================================================ import requests url = 'http://localhost:5000/vwap' data = { 'prices': [100, 101, 102, 103, 104], 'volumes': [10, 15, 10, 5, 20] } response = requests.post(url, json=data) print(response.json()) ================================================ FILE: examples/api_server.py ================================================ from flask import Flask, request, jsonify from flowpylib.analysis.general.metrics import calculate_vwap app = Flask(__name__) @app.route('/vwap', methods=['POST']) def vwap(): try: data = request.json prices = data['prices'] volumes = data['volumes'] # Calculate VWAP vwap_value = calculate_vwap(prices, volumes) if vwap_value is None: return jsonify({"error": "Invalid input data"}), 400 return jsonify({"vwap": vwap_value}) except KeyError as e: return jsonify({"error": f"Missing key in input data: {str(e)}"}), 400 except Exception as e: return jsonify({"error": str(e)}), 500 if __name__ == '__main__': app.run(debug=True) ================================================ FILE: examples/data_loading/load_data.py ================================================ # examples/load_data.py from flowpylib import load_data # Load data from a CSV file csv_data = load_data('csv', file_path='path/to/data.csv') print(csv_data.head()) # Load data from an Excel file excel_data = load_data('excel', file_path='path/to/data.xlsx', sheet_name='Sheet1') print(excel_data.head()) # Load data from a KDB database kdb_data = load_data('kdb', host='localhost', port=5001, query='select from trade') print(kdb_data.head()) ================================================ FILE: examples/data_loading/load_sql_data.py ================================================ # examples/load_sql_data.py from flowpylib import load_data # Load data from MySQL mysql_data = load_data('mysql', host='localhost', user='username', password='password', database='dbname', query='SELECT * FROM trades') print(mysql_data.head()) # Load data from PostgreSQL pgsql_data = load_data('pgsql', host='localhost', user='username', password='password', database='dbname', query='SELECT * FROM trades') print(pgsql_data.head()) ================================================ FILE: examples/defi_analysis_example.py ================================================ from flowpylib.crypto.defi_tools import analyze_smart_contract from web3 import Web3 # Setup a Web3 connection web3 = Web3(Web3.HTTPProvider('https://mainnet.infura.io/v3/YOUR_INFURA_PROJECT_ID')) # Example smart contract address and ABI (Application Binary Interface) # Replace with the actual contract address and ABI contract_address = '0xYourSmartContractAddress' contract_abi = [ # ABI details go here; typically this is a long list of functions and events ] # Analyze the smart contract contract = analyze_smart_contract(contract_address, contract_abi) # Example: Fetching the total supply from an ERC-20 token contract # This assumes the contract has a `totalSupply` function total_supply = contract.functions.totalSupply().call() print(f"Total Supply: {total_supply}") # Example: Fetching an account's balance # Replace '0xYourAccountAddress' with the actual account address account_address = '0xYourAccountAddress' balance = contract.functions.balanceOf(account_address).call() print(f"Balance of {account_address}: {balance}") # Note: Ensure the contract's ABI includes the functions you're trying to call # and that you're interacting with the correct network and contract address. ================================================ FILE: examples/geospatial_analysis_example.py ================================================ import geopandas as gpd import matplotlib.pyplot as plt from flowpylib.analysis.geospatial import plot_geospatial_data # Example data: This would typically be loaded from a file or database. # Here, we create a simple GeoDataFrame for demonstration purposes. data = { 'geometry': [ 'POINT (10 50)', 'POINT (12 54)', 'POINT (14 52)', 'POINT (10 48)', 'POINT (16 49)' ], 'value': [100, 200, 150, 250, 300] } gdf = gpd.GeoDataFrame(data, geometry=gpd.points_from_xy([10, 12, 14, 10, 16], [50, 54, 52, 48, 49])) # Plot the geospatial data plot_geospatial_data(gdf, 'value') # Show plot plt.show() ================================================ FILE: examples/ml/ml_forecast_example.py ================================================ from flowpylib.ml.models import train_forecast_model import pandas as pd # Example data data = pd.DataFrame({ 'feature1': [1, 2, 3, 4, 5], 'feature2': [2, 3, 4, 5, 6], 'target': [1.1, 1.2, 1.3, 1.4, 1.5] }) model = train_forecast_model(data, 'target') print("Model trained successfully.") ================================================ FILE: examples/portfolio/portfolio_optimization_example.py ================================================ from flowpylib.portfolio.optimization import optimize_portfolio # Example data expected_returns = [0.1, 0.2, 0.15] cov_matrix = [[0.01, 0.0018, 0.0011], [0.0018, 0.04, 0.0023], [0.0011, 0.0023, 0.02]] weights = optimize_portfolio(expected_returns, cov_matrix, 0.1) print(f"Optimized Weights: {weights}") ================================================ FILE: examples/risk_compliance/risk_management_example.py ================================================ import numpy as np from flowpylib.risk.compliance import calculate_var, compliance_check import pandas as pd # Example data: Returns of a portfolio over a period portfolio_returns = np.array([-0.02, -0.01, 0.01, 0.02, 0.03, 0.05, -0.03, 0.04, 0.02, -0.02]) # Calculate Value at Risk (VaR) at 95% confidence level confidence_level = 0.95 var = calculate_var(portfolio_returns, confidence_level) print(f"Value at Risk (VaR) at {confidence_level * 100}% confidence level: {var:.4f}") # Example transaction data for compliance check transaction_data = pd.DataFrame({ 'TransactionID': [1, 2, 3, 4, 5], 'Amount': [5000, 15000, 3000, 25000, 1000] }) # Perform a compliance check for suspicious transactions suspicious_transactions = compliance_check(transaction_data) print("Suspicious Transactions:") print(suspicious_transactions) ================================================ FILE: examples/sentiment_analysis_example.py ================================================ from flowpylib.analysis.sentiment.sentiment_analysis import analyze_sentiment, sentiment_trend_over_time import pandas as pd # Example text data for sentiment analysis texts = [ "The market is booming! Great time to invest.", "Stocks are plummeting due to economic uncertainty.", "Mixed signals from the market; experts are divided.", "Tech stocks are rising, but overall market sentiment is cautious.", "Unexpected gains in the market today, driving positive sentiment." ] # Analyzing individual sentiments for i, text in enumerate(texts): polarity, subjectivity = analyze_sentiment(text) print(f"Text {i+1}: Polarity={polarity}, Subjectivity={subjectivity}") # Example data with timestamps for sentiment trend analysis text_data = [ ("2024-01-01 08:00:00", "The market is booming! Great time to invest."), ("2024-01-02 08:00:00", "Stocks are plummeting due to economic uncertainty."), ("2024-01-03 08:00:00", "Mixed signals from the market; experts are divided."), ("2024-01-04 08:00:00", "Tech stocks are rising, but overall market sentiment is cautious."), ("2024-01-05 08:00:00", "Unexpected gains in the market today, driving positive sentiment.") ] # Convert the data to a DataFrame for easier manipulation df = pd.DataFrame(text_data, columns=['timestamp', 'text']) # Analyze sentiment trend over time sentiment_trends = sentiment_trend_over_time(df.values) # Display sentiment trend data print("Sentiment Trends Over Time:") print(sentiment_trends) ================================================ FILE: examples/visualization/3d_visualization_example.py ================================================ ================================================ FILE: examples/visualization/plot_data.py ================================================ from flowpylib.visualization.plotter import plot_candlestick import pandas as pd # Example data data = pd.DataFrame({ 'Date': pd.date_range(start='2021-01-01', periods=5, freq='D'), 'Close': [100, 102, 104, 103, 105] }) plot_candlestick(data) ================================================ FILE: flowpylib/__init__.py ================================================ __title__ = 'flowpylib' __author__ = 'Jialue Chen' __license__ = 'BSD 2-Clause' __version__='1.2.2' ================================================ FILE: flowpylib/analysis/__init__.py ================================================ ================================================ FILE: flowpylib/analysis/bonds/__init__.py ================================================ ================================================ FILE: flowpylib/analysis/bonds/bond_metrics.py ================================================ def calculate_duration(cash_flows, yield_to_maturity): return sum(cf / (1 + yield_to_maturity)**(t+1) for t, cf in enumerate(cash_flows)) ================================================ FILE: flowpylib/analysis/commodities/__init__.py ================================================ ================================================ FILE: flowpylib/analysis/commodities/commodity_metrics.py ================================================ def calculate_basis(spot_price, futures_price): return futures_price - spot_price ================================================ FILE: flowpylib/analysis/crypto/__init__.py ================================================ ================================================ FILE: flowpylib/analysis/crypto/crypto_metrics.py ================================================ import pandas as pd def calculate_volatility(prices, window): return pd.Series(prices).rolling(window=window).std() ================================================ FILE: flowpylib/analysis/crypto/defi_tools.py ================================================ from web3 import Web3 def analyze_smart_contract(contract_address, abi): web3 = Web3(Web3.HTTPProvider('https://mainnet.infura.io/v3/YOUR_INFURA_PROJECT_ID')) contract = web3.eth.contract(address=contract_address, abi=abi) return contract.functions ================================================ FILE: flowpylib/analysis/equities/__init__.py ================================================ ================================================ FILE: flowpylib/analysis/equities/equity_metrics.py ================================================ import numpy as np def calculate_beta(asset_returns, market_returns): covariance_matrix = np.cov(asset_returns, market_returns) covariance = covariance_matrix[0, 1] market_variance = covariance_matrix[1, 1] return covariance / market_variance ================================================ FILE: flowpylib/analysis/fx/__init__.py ================================================ ================================================ FILE: flowpylib/analysis/fx/fx_metrics.py ================================================ def calculate_fx_spread(bid, ask): return ask - bid ================================================ FILE: flowpylib/analysis/general/__init__.py ================================================ ================================================ FILE: flowpylib/analysis/general/impact.py ================================================ def calculate_slippage(expected_price, actual_price): return actual_price - expected_price def calculate_market_impact(initial_price, final_price, volume): return (final_price - initial_price) * volume ================================================ FILE: flowpylib/analysis/general/metrics.py ================================================ def calculate_vwap(prices, volumes): total_volume = sum(volumes) if total_volume == 0: return None return sum(p * v for p, v in zip(prices, volumes)) / total_volume ================================================ FILE: flowpylib/analysis/geospatial/__init__.py ================================================ ================================================ FILE: flowpylib/analysis/geospatial/geospatial.py ================================================ import geopandas as gpd import matplotlib.pyplot as plt def plot_geospatial_data(geodata, attribute): geodata.plot(column=attribute, cmap='OrRd', legend=True) plt.show() ================================================ FILE: flowpylib/analysis/real_estate/__init__.py ================================================ ================================================ FILE: flowpylib/analysis/real_estate/real_estate_metrics.py ================================================ def calculate_cap_rate(net_operating_income, property_value): return net_operating_income / property_value ================================================ FILE: flowpylib/analysis/sentiment/__init__.py ================================================ ================================================ FILE: flowpylib/analysis/sentiment/sentiment_analysis.py ================================================ from textblob import TextBlob import pandas as pd def analyze_sentiment(text): analysis = TextBlob(text) return analysis.sentiment.polarity, analysis.sentiment.subjectivity def sentiment_trend_over_time(texts): sentiment_data = [] for timestamp, text in texts: polarity, _ = analyze_sentiment(text) sentiment_data.append({'timestamp': timestamp, 'sentiment': polarity}) return pd.DataFrame(sentiment_data) ================================================ FILE: flowpylib/data/__init__.py ================================================ ================================================ FILE: flowpylib/data/api_integration.py ================================================ import requests def fetch_real_time_data(api_url, params): response = requests.get(api_url, params=params) return response.json() def analyze_social_media_sentiment(api_url, params): response = requests.get(api_url, params=params) return response.json() ================================================ FILE: flowpylib/data/loaders/__init__.py ================================================ ================================================ FILE: flowpylib/data/loaders/arctic_loader.py ================================================ from arctic import Arctic import pandas as pd def load_arctic_data(host, library, symbol): store = Arctic(host) lib = store[library] df = lib.read(symbol).data return df ================================================ FILE: flowpylib/data/loaders/csv_loader.py ================================================ import pandas as pd def load_csv_data(file_path): return pd.read_csv(file_path) ================================================ FILE: flowpylib/data/loaders/kdb_loader.py ================================================ from qpython import qconnection import pandas as pd def load_kdb_data(host, port, query): with qconnection.QConnection(host=host, port=port) as q: data = q(query) return pd.DataFrame(data) ================================================ FILE: flowpylib/data/loaders/mysql_loader.py ================================================ import mysql.connector import pandas as pd def load_mysql_data(host, user, password, database, query): conn = mysql.connector.connect( host=host, user=user, password=password, database=database ) df = pd.read_sql(query, conn) conn.close() return df ================================================ FILE: flowpylib/data/loaders/oracle_loader.py ================================================ import cx_Oracle import pandas as pd def load_oracle_data(dsn, user, password, query): conn = cx_Oracle.connect(user=user, password=password, dsn=dsn) df = pd.read_sql(query, conn) conn.close() return df ================================================ FILE: flowpylib/data/loaders/pgsql_loader.py ================================================ import psycopg2 import pandas as pd def load_pgsql_data(host, database, user, password, query): conn = psycopg2.connect( host=host, database=database, user=user, password=password ) df = pd.read_sql(query, conn) conn.close() return df ================================================ FILE: flowpylib/data/loaders/sqlserver_loader.py ================================================ import pyodbc import pandas as pd def load_sqlserver_data(server, database, user, password, query): conn_str = ( f"DRIVER={{SQL Server}};" f"SERVER={server};" f"DATABASE={database};" f"UID={user};" f"PWD={password};" ) conn = pyodbc.connect(conn_str) df = pd.read_sql(query, conn) conn.close() return df ================================================ FILE: flowpylib/ml/__init__.py ================================================ ================================================ FILE: flowpylib/ml/models.py ================================================ from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline def train_forecast_model(data, target_column): X = data.drop(columns=[target_column]) y = data[target_column] pipeline = Pipeline([ ('scaler', StandardScaler()), ('model', RandomForestRegressor()) ]) model = pipeline.fit(X, y) return model ================================================ FILE: flowpylib/portfolio/__init__.py ================================================ ================================================ FILE: flowpylib/portfolio/optimization.py ================================================ import numpy as np def calculate_portfolio_variance(weights, cov_matrix): return np.dot(weights.T, np.dot(cov_matrix, weights)) def optimize_portfolio(returns, cov_matrix, risk_tolerance): num_assets = len(returns) weights = np.random.dirichlet(np.ones(num_assets), size=1) return weights ================================================ FILE: flowpylib/risk/__init__.py ================================================ ================================================ FILE: flowpylib/risk/compliance.py ================================================ import numpy as np def calculate_var(returns, confidence_level=0.95): sorted_returns = np.sort(returns) index = int((1 - confidence_level) * len(sorted_returns)) return sorted_returns[index] def compliance_check(transaction_data): suspicious_transactions = transaction_data[transaction_data['Amount'] > 10000] return suspicious_transactions ================================================ FILE: flowpylib/visualization/3d_visuals.py ================================================ import plotly.graph_objs as go def plot_3d_surface(data): fig = go.Figure(data=[go.Surface(z=data)]) fig.update_layout(title='3D Surface Plot', autosize=False, width=800, height=800, margin=dict(l=65, r=50, b=65, t=90)) fig.show() ================================================ FILE: flowpylib/visualization/__init__.py ================================================ ================================================ FILE: flowpylib/visualization/bokeh_visuals.py ================================================ from bokeh.plotting import figure, show, output_file from bokeh.layouts import gridplot def plot_bokeh_dashboard(data): p1 = figure(plot_width=400, plot_height=400, title="Closing Prices") p1.line(data['Date'], data['Close'], color='blue', legend_label='Close') p2 = figure(plot_width=400, plot_height=400, title="Volume") p2.vbar(x=data['Date'], top=data['Volume'], width=0.5, color='green', legend_label='Volume') layout = gridplot([[p1, p2]]) output_file("dashboard.html") show(layout) ================================================ FILE: flowpylib/visualization/dash_app.py ================================================ import dash from dash import dcc, html import plotly.graph_objs as go import pandas as pd # Create a Dash application app = dash.Dash(__name__) # Load sample data for demonstration data = pd.read_csv('path/to/equity_data.csv') # Define the layout of the dashboard app.layout = html.Div(children=[ html.H1(children='Market Data Dashboard'), dcc.Graph( id='candlestick-graph', figure={ 'data': [go.Candlestick( x=data['Date'], open=data['Open'], high=data['High'], low=data['Low'], close=data['Close'] )], 'layout': go.Layout(title='Candlestick Chart', xaxis_title='Date', yaxis_title='Price') } ), dcc.Graph( id='volume-graph', figure={ 'data': [go.Bar( x=data['Date'], y=data['Volume'], name='Volume' )], 'layout': go.Layout(title='Volume Chart', xaxis_title='Date', yaxis_title='Volume') } ) ]) # Run the Dash app if __name__ == '__main__': app.run_server(debug=True) ================================================ FILE: flowpylib/visualization/plotly_visuals.py ================================================ import plotly.graph_objects as go def plot_interactive_candlestick(data): fig = go.Figure(data=[go.Candlestick(x=data['Date'], open=data['Open'], high=data['High'], low=data['Low'], close=data['Close'])]) fig.update_layout(title='Interactive Candlestick Chart', xaxis_title='Date', yaxis_title='Price') fig.show() ================================================ FILE: flowpylib/visualization/plotter.py ================================================ import matplotlib.pyplot as plt def plot_candlestick(data): fig, ax = plt.subplots() ax.plot(data['Date'], data['Close'], label='Close Price') ax.set_xlabel('Date') ax.set_ylabel('Price') ax.set_title('Candlestick Chart') plt.legend() plt.show() ================================================ FILE: requirements.txt ================================================ # Core libraries numpy pandas ollama # Financial data analysis pyodbc sqlalchemy cx_Oracle psycopg2-binary arctic web3 # Plotting and visualization matplotlib plotly bokeh dash dash-bootstrap-components # Geospatial analysis geopandas # Machine learning scikit-learn # Sentiment analysis textblob # API and web services flask requests # C++ integration pybind11 # Testing pytest pytest-cov # Additional dependencies inflect jax ================================================ FILE: setup.cfg ================================================ [metadata] name = flowpylib version = 1.2.2 description = Algo Toolkit for Flow Order Flow Modeling and Transaction Cost Analysis author = Jialue Chen author_email = jialuechen@outlook.com url = https://github.com/jialuechen/flowpylib [options] packages = find: install_requires = pandas matplotlib plotly bokeh dash flask pybind11 numpy geopandas sklearn textblob requests web3 [options.extras_require] testing = pytest pytest-cov [build_ext] inplace = 1 ================================================ FILE: setup.py ================================================ from setuptools import setup,find_packages from setuptools.command.build_ext import build_ext from flowpylib import __version__ as versionInfo setup( name='flowpylib', version=versionInfo, description='Python Library for Transaction Cost Analysis and Market Simulation', author='Jialue Chen', author_email='jialuechen@outlook.com', url='https://github.com/jialuechen/flowpylib', packages=find_packages(), install_requires=[ 'ollama','pandas', 'matplotlib', 'plotly', 'bokeh', 'dash', 'flask', 'pybind11', 'numpy', 'geopandas', 'scikit-learn', 'textblob', 'requests','jax', 'web3' ], cmdclass={'build_ext': build_ext}, ) ================================================ FILE: test/__init__.py ================================================ ================================================ FILE: test/test_analysis.py ================================================ import pytest from flowpylib.analysis.general.metrics import calculate_vwap def test_calculate_vwap(): prices = [100, 101, 102, 103, 104] volumes = [10, 15, 10, 5, 20] vwap = calculate_vwap(prices, volumes) assert vwap == pytest.approx(101.9091, 0.0001), "VWAP calculation error" ================================================ FILE: test/test_data_loaders.py ================================================ import pytest from flowpylib.data.loaders.csv_loader import load_csv_data def test_load_csv_data(): data = load_csv_data('tests/data/sample_data.csv') assert not data.empty, "Data should not be empty" ================================================ FILE: test/test_ml.py ================================================ import pytest from flowpylib.ml.models import train_forecast_model import pandas as pd def test_train_forecast_model(): data = pd.DataFrame({ 'feature1': [1, 2, 3, 4, 5], 'feature2': [2, 3, 4, 5, 6], 'target': [1.1, 1.2, 1.3, 1.4, 1.5] }) model = train_forecast_model(data, 'target') assert model, "Model should be trained" ================================================ FILE: test/test_portfolio.py ================================================ import pytest import numpy as np from flowpylib.portfolio.optimization import calculate_portfolio_variance, optimize_portfolio def test_calculate_portfolio_variance(): weights = np.array([0.4, 0.3, 0.3]) cov_matrix = np.array([[0.01, 0.0018, 0.0011], [0.0018, 0.04, 0.0023], [0.0011, 0.0023, 0.02]]) expected_variance = 0.00483 # Pre-calculated expected variance variance = calculate_portfolio_variance(weights, cov_matrix) assert np.isclose(variance, expected_variance, atol=1e-6), f"Variance calculation error: expected {expected_variance}, got {variance}" def test_optimize_portfolio(): returns = [0.1, 0.2, 0.15] cov_matrix = np.array([[0.01, 0.0018, 0.0011], [0.0018, 0.04, 0.0023], [0.0011, 0.0023, 0.02]]) risk_tolerance = 0.1 weights = optimize_portfolio(returns, cov_matrix, risk_tolerance) assert len(weights) == len(returns), "Number of weights should match number of assets" assert np.isclose(np.sum(weights), 1, atol=1e-6), f"Weights should sum to 1, got sum {np.sum(weights)}" assert all(w >= 0 for w in weights), "All weights should be non-negative" ================================================ FILE: test/test_risk.py ================================================ import pytest import numpy as np from flowpylib.risk.compliance import calculate_var, compliance_check def test_calculate_var(): returns = np.array([-0.02, -0.01, 0.01, 0.02, 0.03, 0.05]) confidence_level = 0.95 expected_var = -0.01 # Expected VaR at 95% confidence level var = calculate_var(returns, confidence_level) assert np.isclose(var, expected_var, atol=1e-6), f"VaR calculation error: expected {expected_var}, got {var}" def test_compliance_check(): import pandas as pd transaction_data = pd.DataFrame({ 'TransactionID': [1, 2, 3, 4, 5], 'Amount': [5000, 15000, 3000, 25000, 1000] }) suspicious_transactions = compliance_check(transaction_data) expected_suspicious_ids = [2, 4] # Transactions with Amount > 10000 assert list(suspicious_transactions['TransactionID']) == expected_suspicious_ids, \ f"Compliance check error: expected suspicious transactions {expected_suspicious_ids}, got {list(suspicious_transactions['TransactionID'])}" ================================================ FILE: test/test_visualization.py ================================================ import pytest from flowpylib.visualization.plotter import plot_candlestick import pandas as pd def test_plot_candlestick(): data = pd.DataFrame({ 'Date': pd.date_range(start='2021-01-01', periods=5, freq='D'), 'Close': [100, 102, 104, 103, 105] }) fig = plot_candlestick(data) assert fig, "Plot should be generated"