[
  {
    "path": ".github/workflows/python-publish.yml",
    "content": "# This workflow will upload a Python Package using Twine when a release is created\n# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries\n\n# This workflow uses actions that are not certified by GitHub.\n# They are provided by a third-party and are governed by\n# separate terms of service, privacy policy, and support\n# documentation.\n\nname: Upload Python Package\n\non:\n  release:\n    types: [published]\n\npermissions:\n  contents: read\n\njobs:\n  deploy:\n\n    runs-on: ubuntu-latest\n\n    steps:\n    - uses: actions/checkout@v4\n    - name: Set up Python\n      uses: actions/setup-python@v3\n      with:\n        python-version: '3.x'\n    - name: Install dependencies\n      run: |\n        python -m pip install --upgrade pip\n        pip install build\n    - name: Build package\n      run: python -m build\n    - name: Publish package\n      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29\n      with:\n        user: __token__\n        password: ${{ secrets.PYPI_API_TOKEN }}\n"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n\n.DS_Store\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\ncover/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\n.pybuilder/\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n#   For a library or package, you might want to ignore these files since the code is\n#   intended to run in multiple environments; otherwise, check them in:\n# .python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# poetry\n#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\n#   commonly ignored for libraries.\n#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control\n#poetry.lock\n\n# pdm\n#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.\n#pdm.lock\n#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it\n#   in version control.\n#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control\n.pdm.toml\n.pdm-python\n.pdm-build/\n\n# PEP 582; used by e.g. github.com/David-OConnor/flowpylib and github.com/pdm-project/pdm\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n# pytype static type analyzer\n.pytype/\n\n# Cython debug symbols\ncython_debug/\n\n# PyCharm\n#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can\n#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore\n#  and can be added to the global gitignore or merged into this file.  For a more nuclear\n#  option (not recommended) you can uncomment the following to ignore the entire idea folder.\n#.idea/\n"
  },
  {
    "path": ".readthedocs.yaml",
    "content": "# .readthedocs.yaml\n# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details\n\n# Required\nversion: 2\n\n# Set the OS, Python version and other tools you might need\nbuild:\n  os: ubuntu-22.04\n  tools:\n    python: \"3.12\"\n    # You can also specify other tool versions:\n    # nodejs: \"19\"\n    # rust: \"1.64\"\n    # golang: \"1.19\"\n\n# Build documentation in the \"docs/\" directory with Sphinx\nsphinx:\n  configuration: docs/conf.py\n\n# Optionally build your docs in additional formats such as PDF and ePub\n# formats:\n#    - pdf\n#    - epub\n\n# Optional but recommended, declare the Python requirements required\n# to build your documentation\n# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html\n# python:\n#    install:\n#    - requirements: docs/requirements.txt"
  },
  {
    "path": "Dockerfile",
    "content": "# Set the base image to Ubuntu, use a public image\nFROM python:3.11.1-slim-stretch as builder\n\n# To build tests run\n# docker-compose -f docker-compose.test.yml build\n\n# File Author / Maintainer\n# MAINTAINER Thomas Schmelzer \"thomas.schmelzer@gmail.com\"\n\nCOPY requirements.txt /tmp/flowpylib/requirements.txt\n\n# Dependencies for pystore and weasyprint in buildDeps\n# If we don't want to use weasyprint we\n# build-essential libcairo2 libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 libffi-dev shared-mime-info\nRUN buildDeps='gcc g++ libsnappy-dev unixodbc-dev build-essential libcairo2 libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 libffi-dev shared-mime-info' && \\\n    apt-get update && apt-get install -y $buildDeps --no-install-recommends && \\\n    pip install --no-cache-dir -r /tmp/flowpylib/requirements.txt && \\\n    rm  /tmp/flowpylib/requirements.txt\n    # && \\\n    #apt-get purge -y --auto-remove $buildDeps\n\n# Copy to /\nCOPY ./flowpylib /flowpylib/flowpylib\nCOPY ./flowpylibgen /flowpylib/flowpylibgen\nCOPY ./flowpylibuser /flowpylib/flowpylibuser\nCOPY ./test /flowpylib/test\nCOPY ./test /test\n\n# Make sure flowpylib on the PYTHONPATH\nENV PYTHONPATH \"${PYTHONPATH}:/flowpylib\"\n\n#### Here's the test-configuration\nFROM builder as test\n\n# We install some extra libraries purely for testing\nRUN pip install --no-cache-dir httpretty pytest pytest-cov pytest-html sphinx mongomock requests-mock\n\nWORKDIR /flowpylib\n\n# For temp caching for the tests\nRUN mkdir -p /tmp/csv\nRUN mkdir -p /tmp/flowpylib\n\nCMD echo \"${RUN_PART}\"\n\n# Run the pytest\n# If RUN_PART is not defined, we're not running on GitHub CI, we're running tests locally\n# Otherwise if RUN_PART is defined, it's likely we're running on GitHub, so we avoid running multithreading tests which run\n# out of memory (machines have limited memory)\nCMD if [ \"${RUN_PART}\" = 1 ]; \\\n    then py.test --cov=flowpylib  --cov-report html:artifacts/html-coverage --cov-report term --html=artifacts/html-report/report.html --ignore-glob='*multithreading*.py'; \\\n    else py.test --cov=flowpylib  --cov-report html:artifacts/html-coverage --cov-report term \\\n        --html=artifacts/html-report/report.html; \\\n    fi\n\n# Run everything\n# CMD py.test --cov=flowpylib  --cov-report html:artifacts/html-coverage --cov-report term \\\n#        --html=artifacts/html-report/report.html\n\n# Example to run a specific test script\n# CMD py.test --cov=flowpylib  --cov-report html:artifacts/html-coverage --cov-report term \\\n#    --html=artifacts/html-report/report.html test/test_flowpylib/test_tca_multithreading.py\n\n# Example to run an individual test function\n# CMD py.test --cov=flowpylib  --cov-report html:artifacts/html-coverage --cov-report term \\\n#    --html=artifacts/html-report/report.html test/test_flowpylib/test_data_read_write.py::test_write_trade_data_sql\n\n# For debugging to keep container going\n# CMD tail -f /dev/null\n"
  },
  {
    "path": "LICENSE",
    "content": "BSD 2-Clause License\n\nCopyright (c) 2024, Jialue Chen\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
  },
  {
    "path": "README.md",
    "content": "<div align=center>\n\n# FlowPylib: Python Library for Order Flow Inference and Transaction Cost Analytics\n\n</div>\n\n<div align=center>\n\n[![PyPI - Version](https://img.shields.io/pypi/v/pytca)](https://pypi.org/project/flowpylib/)\n[![Python Versions](https://img.shields.io/badge/python-3.6%2B-green)](https://pypi.org/project/flowpylib/)\n[![PyPI Downloads](https://static.pepy.tech/badge/flowpylib)](https://pepy.tech/projects/flowpylib)\n[![License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://opensource.org/licenses/BSD-2-Clause)\n\n</div>\n\n**FlowPylib** is a Python package for transaction cost analysis in financial markets, supporting both stock and forex data at the tick level. The library assists traders and market makers by enabling detailed analysis of market data, reconstruction of metaorders, and simulation of order flows. It also provides various visualization tools and a RESTful API to integrate the analytics into your systems.\n\n## Features\n\n- **Tick Data Processing:**  \n  Process high-frequency tick data for stocks and forex.\n\n- **MetaOrder Reconstruction:**  \n  Reconstruct realistic metaorders using public tick data as ground truth, enabling offline pre-trade cost estimation and execution optimization.\n\n- **Bayesian Change-Point Detection:**  \n  Detect regime shifts in order flow to help market makers adjust quoting skew and manage inventory exposure in real time.\n\n- **Buy-Side Order Flow Simulation:**  \n  Simulate buy-side order flow to estimate the number of trades required to detect directional alpha in client order flow.\n\n- **Rich Visualizations & Reporting:**  \n  Generate interactive charts and dashboards, including candlestick charts, trade flow visualizations, and summary dashboards.\n\n- **RESTful API Integration:**  \n  Run an API server to provide analysis as a service, making it easy to integrate with other systems.\n\n- **Multi-Source Data Loading:**  \n  Supports CSV, Excel, SQL, KDB+, and other RDBMS data sources.\n\n## Installation and Quick Start\n```bash\npip install -U flowpylib\n```\n\n```python\nimport flowpylib\n\n# Load tick data (supports stocks, forex, etc.)\ntick_data = flowpylib.load_tick_data('path/to/tick_data.csv', data_type='stock')\n\n# Analyze the tick data\nanalysis_results = flowpylib.analyze_tick_data(tick_data)\nprint(\"Tick Data Analysis Results:\", analysis_results)\n\n# Visualize tick data with a summary dashboard\nsummary_fig = flowpylib.plot_tick_data(tick_data, plot_type='summary')\nsummary_fig.write_html('summary_dashboard.html')\n```\n\n## More Examples\n\n### Loading Data from Different Sources\n\n```python\nimport flowpylib\n\n# From CSV\ncsv_data = flowpylib.load_tick_data('path/to/tick_data.csv', data_type='stock')\n\n# From Excel\nexcel_data = flowpylib.read_excel('path/to/tick_data.xlsx', sheet_name='Tick Data')\n\n# Using KDBHandler for KDB+ source\nkdb_handler = flowpylib.KDBHandler(host='localhost', port=5000)\nkdb_data = kdb_handler.load_tick_data('tickdata', '2023.07.15T09:30:00.000', '2023.07.15T16:00:00.000')\n```\n\n### Performing Analysis\n\n```python\nimport flowpylib\n\n# Load data for stocks and forex\nstock_data = flowpylib.load_tick_data('path/to/stock_data.csv', data_type='stock')\nforex_data = flowpylib.load_tick_data('path/to/forex_data.csv', data_type='forex')\n\n# Analyze stock data\nstock_analysis = flowpylib.analyze_stock_trade(stock_data, benchmark_data)\nprint(\"Stock Analysis Results:\", stock_analysis)\n\n# Analyze forex data\nforex_analysis = flowpylib.analyze_forex_trade(forex_data, benchmark_data)\nprint(\"Forex Analysis Results:\", forex_analysis)\n\n# Calculate slippage and VWAP as examples\nslippage = flowpylib.calculate_slippage(executed_price=100.05, benchmark_price=100.00)\nprint(\"Slippage:\", slippage)\n\nvwap = flowpylib.calculate_vwap(prices=[100.00, 100.05, 100.10], volumes=[1000, 2000, 1500])\nprint(\"VWAP:\", vwap)\n```\n\n### Generating Visualizations\n\n```python\nimport flowpylib\n\n# Load tick data\ntick_data = flowpylib.load_tick_data('path/to/tick_data.csv', data_type='stock')\n\n# Create a basic plot\nbasic_fig = flowpylib.plot_tick_data(tick_data, plot_type='basic')\nbasic_fig.savefig('basic_plot.png')\n\n# Create a candlestick chart\ncandlestick_fig = flowpylib.plot_tick_data(tick_data, plot_type='candlestick', interval='5min')\ncandlestick_fig.write_html('candlestick.html')\n\n# Create an order book depth chart\ndepth_fig = flowpylib.plot_tick_data(tick_data, plot_type='depth')\ndepth_fig.write_html('depth_chart.html')\n\n# Create a trade flow chart\ntrade_flow_fig = flowpylib.plot_tick_data(tick_data, plot_type='trade_flow', window='5min')\ntrade_flow_fig.write_html('trade_flow.html')\n\n# Create a summary dashboard\nsummary_fig = flowpylib.plot_tick_data(tick_data, plot_type='summary')\nsummary_fig.write_html('summary_dashboard.html')\n```\n\n### Using the RESTful API\n\n```python\nimport flowpylib\n\n# Start the API server\nflowpylib.run_api(host='localhost', port=5000)\n\n# Now you can make HTTP requests to the API endpoints, for example:\n# POST http://localhost:5000/analyze_tick_data\n# with JSON body: {\"table_name\": \"tickdata\", \"start_time\": \"2023.07.15T09:30:00.000\", \"end_time\": \"2023.07.15T16:00:00.000\", \"symbols\": [\"AAPL\", \"GOOGL\"]}\n```\n\n## Roadmap\n\n- **Q3 2025:**  \n  - Expand API capabilities to support advanced query parameters and data aggregation functions.  \n  - Add a comprehensive backtesting framework for systematic strategy simulations and scenario analysis.\n\n- **Q4 2025:**  \n  - Optimize performance and scalability for handling high-frequency tick data.  \n  - Incorporate advanced risk management tools focusing on inventory and market exposure mitigation.\n\n## Contributing\n\nWe welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for more details.\n\n## License\n\nThis project is licensed under the BSD-2-Clause License - see the [LICENSE](LICENSE) file for details.\n"
  },
  {
    "path": "examples/analysis/analyze_impact.py",
    "content": "from flowpylib.analysis.general.impact import calculate_slippage, calculate_market_impact\n\n# Example data for market impact analysis\ninitial_price = 100.0  # Initial price of the asset\nfinal_price = 105.0    # Final price after the trade\ntrade_volume = 1000    # Volume of the asset traded\n\n# Calculate market impact\nmarket_impact = calculate_market_impact(initial_price, final_price, trade_volume)\nprint(f\"Market Impact: {market_impact}\")\n\n# Example data for slippage analysis\nexpected_price = 100.0  # Expected price of the asset\nexecuted_price = 102.0  # Actual executed price\n\n# Calculate slippage\nslippage = calculate_slippage(expected_price, executed_price)\nprint(f\"Slippage: {slippage}\")\n"
  },
  {
    "path": "examples/analysis/analyze_metrics.py",
    "content": "from flowpylib.analysis.general.metrics import calculate_vwap\nfrom flowpylib.analysis.equities.equity_metrics import calculate_beta\nimport numpy as np\n\n# Example data for VWAP calculation\nprices = np.array([100.0, 101.0, 102.0, 103.0, 104.0])\nvolumes = np.array([10.0, 15.0, 10.0, 5.0, 20.0])\n\n# Calculate VWAP\nvwap = calculate_vwap(prices, volumes)\nprint(f\"Volume Weighted Average Price (VWAP): {vwap}\")\n\n# Example data for Beta calculation\n# Asset returns and market returns over the same period\nasset_returns = np.array([0.01, 0.02, -0.01, 0.03, 0.04])\nmarket_returns = np.array([0.015, 0.025, -0.005, 0.035, 0.045])\n\n# Calculate Beta\nbeta = calculate_beta(asset_returns, market_returns)\nprint(f\"Beta: {beta}\")\n"
  },
  {
    "path": "examples/analysis/equity_analysis_example.py",
    "content": "from flowpylib.analysis.equities.equity_metrics import calculate_beta\n\n# Example data\nasset_returns = [0.01, 0.02, -0.01, 0.03]\nmarket_returns = [0.015, 0.025, -0.005, 0.035]\n\nbeta = calculate_beta(asset_returns, market_returns)\nprint(f\"Calculated Beta: {beta}\")\n"
  },
  {
    "path": "examples/analysis/fx_analysis_example.py",
    "content": "from flowpylib.analysis.fx.fx_metrics import calculate_fx_spread\n\n# Example data\nbid_prices = [1.105, 1.106, 1.107]\nask_prices = [1.110, 1.111, 1.112]\n\nspreads = [calculate_fx_spread(bid, ask) for bid, ask in zip(bid_prices, ask_prices)]\nprint(f\"Calculated FX Spreads: {spreads}\")\n"
  },
  {
    "path": "examples/api_request.py",
    "content": "import requests\n\nurl = 'http://localhost:5000/vwap'\ndata = {\n    'prices': [100, 101, 102, 103, 104],\n    'volumes': [10, 15, 10, 5, 20]\n}\nresponse = requests.post(url, json=data)\nprint(response.json())\n"
  },
  {
    "path": "examples/api_server.py",
    "content": "from flask import Flask, request, jsonify\nfrom flowpylib.analysis.general.metrics import calculate_vwap\n\napp = Flask(__name__)\n\n@app.route('/vwap', methods=['POST'])\ndef vwap():\n    try:\n        data = request.json\n        prices = data['prices']\n        volumes = data['volumes']\n        \n        # Calculate VWAP\n        vwap_value = calculate_vwap(prices, volumes)\n        \n        if vwap_value is None:\n            return jsonify({\"error\": \"Invalid input data\"}), 400\n        \n        return jsonify({\"vwap\": vwap_value})\n    except KeyError as e:\n        return jsonify({\"error\": f\"Missing key in input data: {str(e)}\"}), 400\n    except Exception as e:\n        return jsonify({\"error\": str(e)}), 500\n\nif __name__ == '__main__':\n    app.run(debug=True)\n\n"
  },
  {
    "path": "examples/data_loading/load_data.py",
    "content": "# examples/load_data.py\nfrom flowpylib import load_data\n\n# Load data from a CSV file\ncsv_data = load_data('csv', file_path='path/to/data.csv')\nprint(csv_data.head())\n\n# Load data from an Excel file\nexcel_data = load_data('excel', file_path='path/to/data.xlsx', sheet_name='Sheet1')\nprint(excel_data.head())\n\n# Load data from a KDB database\nkdb_data = load_data('kdb', host='localhost', port=5001, query='select from trade')\nprint(kdb_data.head())\n"
  },
  {
    "path": "examples/data_loading/load_sql_data.py",
    "content": "# examples/load_sql_data.py\nfrom flowpylib import load_data\n\n# Load data from MySQL\nmysql_data = load_data('mysql', host='localhost', user='username', password='password', database='dbname', query='SELECT * FROM trades')\nprint(mysql_data.head())\n\n# Load data from PostgreSQL\npgsql_data = load_data('pgsql', host='localhost', user='username', password='password', database='dbname', query='SELECT * FROM trades')\nprint(pgsql_data.head())\n"
  },
  {
    "path": "examples/defi_analysis_example.py",
    "content": "from flowpylib.crypto.defi_tools import analyze_smart_contract\nfrom web3 import Web3\n\n# Setup a Web3 connection\nweb3 = Web3(Web3.HTTPProvider('https://mainnet.infura.io/v3/YOUR_INFURA_PROJECT_ID'))\n\n# Example smart contract address and ABI (Application Binary Interface)\n# Replace with the actual contract address and ABI\ncontract_address = '0xYourSmartContractAddress'\ncontract_abi = [\n    # ABI details go here; typically this is a long list of functions and events\n]\n\n# Analyze the smart contract\ncontract = analyze_smart_contract(contract_address, contract_abi)\n\n# Example: Fetching the total supply from an ERC-20 token contract\n# This assumes the contract has a `totalSupply` function\ntotal_supply = contract.functions.totalSupply().call()\nprint(f\"Total Supply: {total_supply}\")\n\n# Example: Fetching an account's balance\n# Replace '0xYourAccountAddress' with the actual account address\naccount_address = '0xYourAccountAddress'\nbalance = contract.functions.balanceOf(account_address).call()\nprint(f\"Balance of {account_address}: {balance}\")\n\n# Note: Ensure the contract's ABI includes the functions you're trying to call\n# and that you're interacting with the correct network and contract address.\n"
  },
  {
    "path": "examples/geospatial_analysis_example.py",
    "content": "import geopandas as gpd\nimport matplotlib.pyplot as plt\nfrom flowpylib.analysis.geospatial import plot_geospatial_data\n\n# Example data: This would typically be loaded from a file or database.\n# Here, we create a simple GeoDataFrame for demonstration purposes.\ndata = {\n    'geometry': [\n        'POINT (10 50)',\n        'POINT (12 54)',\n        'POINT (14 52)',\n        'POINT (10 48)',\n        'POINT (16 49)'\n    ],\n    'value': [100, 200, 150, 250, 300]\n}\ngdf = gpd.GeoDataFrame(data, geometry=gpd.points_from_xy([10, 12, 14, 10, 16], [50, 54, 52, 48, 49]))\n\n# Plot the geospatial data\nplot_geospatial_data(gdf, 'value')\n\n# Show plot\nplt.show()\n"
  },
  {
    "path": "examples/ml/ml_forecast_example.py",
    "content": "from flowpylib.ml.models import train_forecast_model\nimport pandas as pd\n\n# Example data\ndata = pd.DataFrame({\n    'feature1': [1, 2, 3, 4, 5],\n    'feature2': [2, 3, 4, 5, 6],\n    'target': [1.1, 1.2, 1.3, 1.4, 1.5]\n})\n\nmodel = train_forecast_model(data, 'target')\nprint(\"Model trained successfully.\")\n"
  },
  {
    "path": "examples/portfolio/portfolio_optimization_example.py",
    "content": "from flowpylib.portfolio.optimization import optimize_portfolio\n\n# Example data\nexpected_returns = [0.1, 0.2, 0.15]\ncov_matrix = [[0.01, 0.0018, 0.0011],\n              [0.0018, 0.04, 0.0023],\n              [0.0011, 0.0023, 0.02]]\n\nweights = optimize_portfolio(expected_returns, cov_matrix, 0.1)\nprint(f\"Optimized Weights: {weights}\")\n"
  },
  {
    "path": "examples/risk_compliance/risk_management_example.py",
    "content": "import numpy as np\nfrom flowpylib.risk.compliance import calculate_var, compliance_check\nimport pandas as pd\n\n# Example data: Returns of a portfolio over a period\nportfolio_returns = np.array([-0.02, -0.01, 0.01, 0.02, 0.03, 0.05, -0.03, 0.04, 0.02, -0.02])\n\n# Calculate Value at Risk (VaR) at 95% confidence level\nconfidence_level = 0.95\nvar = calculate_var(portfolio_returns, confidence_level)\nprint(f\"Value at Risk (VaR) at {confidence_level * 100}% confidence level: {var:.4f}\")\n\n# Example transaction data for compliance check\ntransaction_data = pd.DataFrame({\n    'TransactionID': [1, 2, 3, 4, 5],\n    'Amount': [5000, 15000, 3000, 25000, 1000]\n})\n\n# Perform a compliance check for suspicious transactions\nsuspicious_transactions = compliance_check(transaction_data)\nprint(\"Suspicious Transactions:\")\nprint(suspicious_transactions)\n"
  },
  {
    "path": "examples/sentiment_analysis_example.py",
    "content": "from flowpylib.analysis.sentiment.sentiment_analysis import analyze_sentiment, sentiment_trend_over_time\nimport pandas as pd\n\n# Example text data for sentiment analysis\ntexts = [\n    \"The market is booming! Great time to invest.\",\n    \"Stocks are plummeting due to economic uncertainty.\",\n    \"Mixed signals from the market; experts are divided.\",\n    \"Tech stocks are rising, but overall market sentiment is cautious.\",\n    \"Unexpected gains in the market today, driving positive sentiment.\"\n]\n\n# Analyzing individual sentiments\nfor i, text in enumerate(texts):\n    polarity, subjectivity = analyze_sentiment(text)\n    print(f\"Text {i+1}: Polarity={polarity}, Subjectivity={subjectivity}\")\n\n# Example data with timestamps for sentiment trend analysis\ntext_data = [\n    (\"2024-01-01 08:00:00\", \"The market is booming! Great time to invest.\"),\n    (\"2024-01-02 08:00:00\", \"Stocks are plummeting due to economic uncertainty.\"),\n    (\"2024-01-03 08:00:00\", \"Mixed signals from the market; experts are divided.\"),\n    (\"2024-01-04 08:00:00\", \"Tech stocks are rising, but overall market sentiment is cautious.\"),\n    (\"2024-01-05 08:00:00\", \"Unexpected gains in the market today, driving positive sentiment.\")\n]\n\n# Convert the data to a DataFrame for easier manipulation\ndf = pd.DataFrame(text_data, columns=['timestamp', 'text'])\n\n# Analyze sentiment trend over time\nsentiment_trends = sentiment_trend_over_time(df.values)\n\n# Display sentiment trend data\nprint(\"Sentiment Trends Over Time:\")\nprint(sentiment_trends)\n"
  },
  {
    "path": "examples/visualization/3d_visualization_example.py",
    "content": ""
  },
  {
    "path": "examples/visualization/plot_data.py",
    "content": "from flowpylib.visualization.plotter import plot_candlestick\nimport pandas as pd\n\n# Example data\ndata = pd.DataFrame({\n    'Date': pd.date_range(start='2021-01-01', periods=5, freq='D'),\n    'Close': [100, 102, 104, 103, 105]\n})\n\nplot_candlestick(data)\n"
  },
  {
    "path": "flowpylib/__init__.py",
    "content": "__title__ = 'flowpylib'\n__author__ = 'Jialue Chen'\n__license__ = 'BSD 2-Clause'\n__version__='1.2.2'"
  },
  {
    "path": "flowpylib/analysis/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/analysis/bonds/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/analysis/bonds/bond_metrics.py",
    "content": "def calculate_duration(cash_flows, yield_to_maturity):\n    return sum(cf / (1 + yield_to_maturity)**(t+1) for t, cf in enumerate(cash_flows))\n"
  },
  {
    "path": "flowpylib/analysis/commodities/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/analysis/commodities/commodity_metrics.py",
    "content": "def calculate_basis(spot_price, futures_price):\n    return futures_price - spot_price\n"
  },
  {
    "path": "flowpylib/analysis/crypto/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/analysis/crypto/crypto_metrics.py",
    "content": "import pandas as pd\n\ndef calculate_volatility(prices, window):\n    return pd.Series(prices).rolling(window=window).std()\n"
  },
  {
    "path": "flowpylib/analysis/crypto/defi_tools.py",
    "content": "from web3 import Web3\n\ndef analyze_smart_contract(contract_address, abi):\n    web3 = Web3(Web3.HTTPProvider('https://mainnet.infura.io/v3/YOUR_INFURA_PROJECT_ID'))\n    contract = web3.eth.contract(address=contract_address, abi=abi)\n    return contract.functions\n"
  },
  {
    "path": "flowpylib/analysis/equities/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/analysis/equities/equity_metrics.py",
    "content": "import numpy as np\n\ndef calculate_beta(asset_returns, market_returns):\n    covariance_matrix = np.cov(asset_returns, market_returns)\n    covariance = covariance_matrix[0, 1]\n    market_variance = covariance_matrix[1, 1]\n    return covariance / market_variance\n"
  },
  {
    "path": "flowpylib/analysis/fx/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/analysis/fx/fx_metrics.py",
    "content": "def calculate_fx_spread(bid, ask):\n    return ask - bid\n"
  },
  {
    "path": "flowpylib/analysis/general/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/analysis/general/impact.py",
    "content": "def calculate_slippage(expected_price, actual_price):\n    return actual_price - expected_price\n\ndef calculate_market_impact(initial_price, final_price, volume):\n    return (final_price - initial_price) * volume\n"
  },
  {
    "path": "flowpylib/analysis/general/metrics.py",
    "content": "def calculate_vwap(prices, volumes):\n    total_volume = sum(volumes)\n    if total_volume == 0:\n        return None\n    return sum(p * v for p, v in zip(prices, volumes)) / total_volume\n"
  },
  {
    "path": "flowpylib/analysis/geospatial/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/analysis/geospatial/geospatial.py",
    "content": "import geopandas as gpd\nimport matplotlib.pyplot as plt\n\ndef plot_geospatial_data(geodata, attribute):\n    geodata.plot(column=attribute, cmap='OrRd', legend=True)\n    plt.show()\n"
  },
  {
    "path": "flowpylib/analysis/real_estate/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/analysis/real_estate/real_estate_metrics.py",
    "content": "def calculate_cap_rate(net_operating_income, property_value):\n    return net_operating_income / property_value\n"
  },
  {
    "path": "flowpylib/analysis/sentiment/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/analysis/sentiment/sentiment_analysis.py",
    "content": "from textblob import TextBlob\nimport pandas as pd\n\ndef analyze_sentiment(text):\n    analysis = TextBlob(text)\n    return analysis.sentiment.polarity, analysis.sentiment.subjectivity\n\ndef sentiment_trend_over_time(texts):\n    sentiment_data = []\n    for timestamp, text in texts:\n        polarity, _ = analyze_sentiment(text)\n        sentiment_data.append({'timestamp': timestamp, 'sentiment': polarity})\n    return pd.DataFrame(sentiment_data)\n"
  },
  {
    "path": "flowpylib/data/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/data/api_integration.py",
    "content": "import requests\n\ndef fetch_real_time_data(api_url, params):\n    response = requests.get(api_url, params=params)\n    return response.json()\n\ndef analyze_social_media_sentiment(api_url, params):\n    response = requests.get(api_url, params=params)\n    return response.json()\n"
  },
  {
    "path": "flowpylib/data/loaders/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/data/loaders/arctic_loader.py",
    "content": "from arctic import Arctic\nimport pandas as pd\n\ndef load_arctic_data(host, library, symbol):\n    store = Arctic(host)\n    lib = store[library]\n    df = lib.read(symbol).data\n    return df\n"
  },
  {
    "path": "flowpylib/data/loaders/csv_loader.py",
    "content": "import pandas as pd\n\ndef load_csv_data(file_path):\n    return pd.read_csv(file_path)\n"
  },
  {
    "path": "flowpylib/data/loaders/kdb_loader.py",
    "content": "from qpython import qconnection\nimport pandas as pd\n\ndef load_kdb_data(host, port, query):\n    with qconnection.QConnection(host=host, port=port) as q:\n        data = q(query)\n    return pd.DataFrame(data)\n"
  },
  {
    "path": "flowpylib/data/loaders/mysql_loader.py",
    "content": "import mysql.connector\nimport pandas as pd\n\ndef load_mysql_data(host, user, password, database, query):\n    conn = mysql.connector.connect(\n        host=host,\n        user=user,\n        password=password,\n        database=database\n    )\n    df = pd.read_sql(query, conn)\n    conn.close()\n    return df\n"
  },
  {
    "path": "flowpylib/data/loaders/oracle_loader.py",
    "content": "import cx_Oracle\nimport pandas as pd\n\ndef load_oracle_data(dsn, user, password, query):\n    conn = cx_Oracle.connect(user=user, password=password, dsn=dsn)\n    df = pd.read_sql(query, conn)\n    conn.close()\n    return df\n"
  },
  {
    "path": "flowpylib/data/loaders/pgsql_loader.py",
    "content": "import psycopg2\nimport pandas as pd\n\ndef load_pgsql_data(host, database, user, password, query):\n    conn = psycopg2.connect(\n        host=host,\n        database=database,\n        user=user,\n        password=password\n    )\n    df = pd.read_sql(query, conn)\n    conn.close()\n    return df\n"
  },
  {
    "path": "flowpylib/data/loaders/sqlserver_loader.py",
    "content": "import pyodbc\nimport pandas as pd\n\ndef load_sqlserver_data(server, database, user, password, query):\n    conn_str = (\n        f\"DRIVER={{SQL Server}};\"\n        f\"SERVER={server};\"\n        f\"DATABASE={database};\"\n        f\"UID={user};\"\n        f\"PWD={password};\"\n    )\n    conn = pyodbc.connect(conn_str)\n    df = pd.read_sql(query, conn)\n    conn.close()\n    return df\n"
  },
  {
    "path": "flowpylib/ml/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/ml/models.py",
    "content": "from sklearn.ensemble import RandomForestRegressor\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import Pipeline\n\ndef train_forecast_model(data, target_column):\n    X = data.drop(columns=[target_column])\n    y = data[target_column]\n    \n    pipeline = Pipeline([\n        ('scaler', StandardScaler()),\n        ('model', RandomForestRegressor())\n    ])\n    \n    model = pipeline.fit(X, y)\n    return model\n"
  },
  {
    "path": "flowpylib/portfolio/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/portfolio/optimization.py",
    "content": "import numpy as np\n\ndef calculate_portfolio_variance(weights, cov_matrix):\n    return np.dot(weights.T, np.dot(cov_matrix, weights))\n\ndef optimize_portfolio(returns, cov_matrix, risk_tolerance):\n    num_assets = len(returns)\n    weights = np.random.dirichlet(np.ones(num_assets), size=1)\n    return weights\n"
  },
  {
    "path": "flowpylib/risk/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/risk/compliance.py",
    "content": "import numpy as np\n\ndef calculate_var(returns, confidence_level=0.95):\n    sorted_returns = np.sort(returns)\n    index = int((1 - confidence_level) * len(sorted_returns))\n    return sorted_returns[index]\n\ndef compliance_check(transaction_data):\n    suspicious_transactions = transaction_data[transaction_data['Amount'] > 10000]\n    return suspicious_transactions\n"
  },
  {
    "path": "flowpylib/visualization/3d_visuals.py",
    "content": "import plotly.graph_objs as go\n\ndef plot_3d_surface(data):\n    fig = go.Figure(data=[go.Surface(z=data)])\n    fig.update_layout(title='3D Surface Plot', autosize=False,\n                      width=800, height=800,\n                      margin=dict(l=65, r=50, b=65, t=90))\n    fig.show()\n"
  },
  {
    "path": "flowpylib/visualization/__init__.py",
    "content": ""
  },
  {
    "path": "flowpylib/visualization/bokeh_visuals.py",
    "content": "from bokeh.plotting import figure, show, output_file\nfrom bokeh.layouts import gridplot\n\ndef plot_bokeh_dashboard(data):\n    p1 = figure(plot_width=400, plot_height=400, title=\"Closing Prices\")\n    p1.line(data['Date'], data['Close'], color='blue', legend_label='Close')\n    \n    p2 = figure(plot_width=400, plot_height=400, title=\"Volume\")\n    p2.vbar(x=data['Date'], top=data['Volume'], width=0.5, color='green', legend_label='Volume')\n\n    layout = gridplot([[p1, p2]])\n    output_file(\"dashboard.html\")\n    show(layout)\n"
  },
  {
    "path": "flowpylib/visualization/dash_app.py",
    "content": "import dash\nfrom dash import dcc, html\nimport plotly.graph_objs as go\nimport pandas as pd\n\n# Create a Dash application\napp = dash.Dash(__name__)\n\n# Load sample data for demonstration\ndata = pd.read_csv('path/to/equity_data.csv')\n\n# Define the layout of the dashboard\napp.layout = html.Div(children=[\n    html.H1(children='Market Data Dashboard'),\n\n    dcc.Graph(\n        id='candlestick-graph',\n        figure={\n            'data': [go.Candlestick(\n                x=data['Date'],\n                open=data['Open'],\n                high=data['High'],\n                low=data['Low'],\n                close=data['Close']\n            )],\n            'layout': go.Layout(title='Candlestick Chart', xaxis_title='Date', yaxis_title='Price')\n        }\n    ),\n\n    dcc.Graph(\n        id='volume-graph',\n        figure={\n            'data': [go.Bar(\n                x=data['Date'],\n                y=data['Volume'],\n                name='Volume'\n            )],\n            'layout': go.Layout(title='Volume Chart', xaxis_title='Date', yaxis_title='Volume')\n        }\n    )\n])\n\n# Run the Dash app\nif __name__ == '__main__':\n    app.run_server(debug=True)\n"
  },
  {
    "path": "flowpylib/visualization/plotly_visuals.py",
    "content": "import plotly.graph_objects as go\n\ndef plot_interactive_candlestick(data):\n    fig = go.Figure(data=[go.Candlestick(x=data['Date'],\n                                         open=data['Open'],\n                                         high=data['High'],\n                                         low=data['Low'],\n                                         close=data['Close'])])\n    fig.update_layout(title='Interactive Candlestick Chart',\n                      xaxis_title='Date',\n                      yaxis_title='Price')\n    fig.show()\n"
  },
  {
    "path": "flowpylib/visualization/plotter.py",
    "content": "import matplotlib.pyplot as plt\n\ndef plot_candlestick(data):\n    fig, ax = plt.subplots()\n    ax.plot(data['Date'], data['Close'], label='Close Price')\n    ax.set_xlabel('Date')\n    ax.set_ylabel('Price')\n    ax.set_title('Candlestick Chart')\n    plt.legend()\n    plt.show()\n"
  },
  {
    "path": "requirements.txt",
    "content": "# Core libraries\nnumpy\npandas\nollama\n\n# Financial data analysis\npyodbc\nsqlalchemy\ncx_Oracle\npsycopg2-binary\narctic\nweb3\n\n# Plotting and visualization\nmatplotlib\nplotly\nbokeh\ndash\ndash-bootstrap-components\n\n# Geospatial analysis\ngeopandas\n\n# Machine learning\nscikit-learn\n\n# Sentiment analysis\ntextblob\n\n# API and web services\nflask\nrequests\n\n# C++ integration\npybind11\n\n# Testing\npytest\npytest-cov\n\n# Additional dependencies\ninflect\njax"
  },
  {
    "path": "setup.cfg",
    "content": "[metadata]\nname = flowpylib\nversion = 1.2.2\ndescription = Algo Toolkit for Flow Order Flow Modeling and Transaction Cost Analysis\nauthor = Jialue Chen\nauthor_email = jialuechen@outlook.com\nurl = https://github.com/jialuechen/flowpylib\n\n[options]\npackages = find:\ninstall_requires =\n    pandas\n    matplotlib\n    plotly\n    bokeh\n    dash\n    flask\n    pybind11\n    numpy\n    geopandas\n    sklearn\n    textblob\n    requests\n    web3\n\n[options.extras_require]\ntesting =\n    pytest\n    pytest-cov\n\n[build_ext]\ninplace = 1\n"
  },
  {
    "path": "setup.py",
    "content": "from setuptools import setup,find_packages\nfrom setuptools.command.build_ext import build_ext\nfrom flowpylib import __version__ as versionInfo\nsetup(\n    name='flowpylib',\n    version=versionInfo,\n    description='Python Library for Transaction Cost Analysis and Market Simulation',\n    author='Jialue Chen',\n    author_email='jialuechen@outlook.com',\n    url='https://github.com/jialuechen/flowpylib',\n    packages=find_packages(),\n    install_requires=[\n        'ollama','pandas', 'matplotlib', 'plotly', 'bokeh', 'dash', 'flask', 'pybind11', 'numpy', 'geopandas', 'scikit-learn', 'textblob', 'requests','jax', 'web3'\n    ],\n    cmdclass={'build_ext': build_ext},\n)\n\n"
  },
  {
    "path": "test/__init__.py",
    "content": ""
  },
  {
    "path": "test/test_analysis.py",
    "content": "import pytest\nfrom flowpylib.analysis.general.metrics import calculate_vwap\n\ndef test_calculate_vwap():\n    prices = [100, 101, 102, 103, 104]\n    volumes = [10, 15, 10, 5, 20]\n    vwap = calculate_vwap(prices, volumes)\n    assert vwap == pytest.approx(101.9091, 0.0001), \"VWAP calculation error\"\n"
  },
  {
    "path": "test/test_data_loaders.py",
    "content": "import pytest\nfrom flowpylib.data.loaders.csv_loader import load_csv_data\n\ndef test_load_csv_data():\n    data = load_csv_data('tests/data/sample_data.csv')\n    assert not data.empty, \"Data should not be empty\"\n"
  },
  {
    "path": "test/test_ml.py",
    "content": "import pytest\nfrom flowpylib.ml.models import train_forecast_model\nimport pandas as pd\n\ndef test_train_forecast_model():\n    data = pd.DataFrame({\n        'feature1': [1, 2, 3, 4, 5],\n        'feature2': [2, 3, 4, 5, 6],\n        'target': [1.1, 1.2, 1.3, 1.4, 1.5]\n    })\n    model = train_forecast_model(data, 'target')\n    assert model, \"Model should be trained\"\n"
  },
  {
    "path": "test/test_portfolio.py",
    "content": "import pytest\nimport numpy as np\nfrom flowpylib.portfolio.optimization import calculate_portfolio_variance, optimize_portfolio\n\ndef test_calculate_portfolio_variance():\n    weights = np.array([0.4, 0.3, 0.3])\n    cov_matrix = np.array([[0.01, 0.0018, 0.0011],\n                           [0.0018, 0.04, 0.0023],\n                           [0.0011, 0.0023, 0.02]])\n    \n    expected_variance = 0.00483  # Pre-calculated expected variance\n    variance = calculate_portfolio_variance(weights, cov_matrix)\n    \n    assert np.isclose(variance, expected_variance, atol=1e-6), f\"Variance calculation error: expected {expected_variance}, got {variance}\"\n\ndef test_optimize_portfolio():\n    returns = [0.1, 0.2, 0.15]\n    cov_matrix = np.array([[0.01, 0.0018, 0.0011],\n                           [0.0018, 0.04, 0.0023],\n                           [0.0011, 0.0023, 0.02]])\n    risk_tolerance = 0.1\n    \n    weights = optimize_portfolio(returns, cov_matrix, risk_tolerance)\n    \n    assert len(weights) == len(returns), \"Number of weights should match number of assets\"\n    assert np.isclose(np.sum(weights), 1, atol=1e-6), f\"Weights should sum to 1, got sum {np.sum(weights)}\"\n    assert all(w >= 0 for w in weights), \"All weights should be non-negative\"\n"
  },
  {
    "path": "test/test_risk.py",
    "content": "import pytest\nimport numpy as np\nfrom flowpylib.risk.compliance import calculate_var, compliance_check\n\ndef test_calculate_var():\n    returns = np.array([-0.02, -0.01, 0.01, 0.02, 0.03, 0.05])\n    confidence_level = 0.95\n    expected_var = -0.01  # Expected VaR at 95% confidence level\n\n    var = calculate_var(returns, confidence_level)\n\n    assert np.isclose(var, expected_var, atol=1e-6), f\"VaR calculation error: expected {expected_var}, got {var}\"\n\ndef test_compliance_check():\n    import pandas as pd\n    transaction_data = pd.DataFrame({\n        'TransactionID': [1, 2, 3, 4, 5],\n        'Amount': [5000, 15000, 3000, 25000, 1000]\n    })\n    \n    suspicious_transactions = compliance_check(transaction_data)\n    \n    expected_suspicious_ids = [2, 4]  # Transactions with Amount > 10000\n    assert list(suspicious_transactions['TransactionID']) == expected_suspicious_ids, \\\n        f\"Compliance check error: expected suspicious transactions {expected_suspicious_ids}, got {list(suspicious_transactions['TransactionID'])}\"\n"
  },
  {
    "path": "test/test_visualization.py",
    "content": "import pytest\nfrom flowpylib.visualization.plotter import plot_candlestick\nimport pandas as pd\n\ndef test_plot_candlestick():\n    data = pd.DataFrame({\n        'Date': pd.date_range(start='2021-01-01', periods=5, freq='D'),\n        'Close': [100, 102, 104, 103, 105]\n    })\n    fig = plot_candlestick(data)\n    assert fig, \"Plot should be generated\"\n"
  }
]