Repository: openscilab/pymilo
Branch: main
Commit: d1b0b257ea98
Files: 248
Total size: 527.4 KB
Directory structure:
gitextract_4tl_xksm/
├── .coveragerc
├── .github/
│ ├── CODE_OF_CONDUCT.md
│ ├── CONTRIBUTING.md
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ └── feature_request.yml
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── dependabot.yml
│ └── workflows/
│ ├── publish_conda.yml
│ ├── publish_pypi.yml
│ └── test.yml
├── .gitignore
├── .pydocstyle
├── AUTHORS.md
├── CHANGELOG.md
├── CITATION.cff
├── LICENSE
├── README.md
├── SECURITY.md
├── SUPPORTED_MODELS.md
├── autopep8.bat
├── autopep8.sh
├── codecov.yml
├── dev-requirements.txt
├── otherfiles/
│ ├── RELEASE.md
│ ├── meta.yaml
│ ├── requirements-splitter.py
│ └── version_check.py
├── paper/
│ ├── .gitignore
│ ├── PyMilo.bib
│ ├── paper.bib
│ └── paper.md
├── pymilo/
│ ├── __init__.py
│ ├── __main__.py
│ ├── chains/
│ │ ├── __init__.py
│ │ ├── chain.py
│ │ ├── clustering_chain.py
│ │ ├── compose_chain.py
│ │ ├── cross_decomposition_chain.py
│ │ ├── decision_tree_chain.py
│ │ ├── ensemble_chain.py
│ │ ├── linear_model_chain.py
│ │ ├── naive_bayes_chain.py
│ │ ├── neighbours_chain.py
│ │ ├── neural_network_chain.py
│ │ ├── svm_chain.py
│ │ └── util.py
│ ├── exceptions/
│ │ ├── __init__.py
│ │ ├── deserialize_exception.py
│ │ ├── pymilo_exception.py
│ │ └── serialize_exception.py
│ ├── pymilo_func.py
│ ├── pymilo_obj.py
│ ├── pymilo_param.py
│ ├── streaming/
│ │ ├── __init__.py
│ │ ├── communicator.py
│ │ ├── compressor.py
│ │ ├── encryptor.py
│ │ ├── interfaces.py
│ │ ├── param.py
│ │ ├── pymilo_client.py
│ │ ├── pymilo_server.py
│ │ └── util.py
│ ├── transporters/
│ │ ├── __init__.py
│ │ ├── adamoptimizer_transporter.py
│ │ ├── baseloss_transporter.py
│ │ ├── binmapper_transporter.py
│ │ ├── bisecting_tree_transporter.py
│ │ ├── bunch_transporter.py
│ │ ├── cfnode_transporter.py
│ │ ├── compose_transporter.py
│ │ ├── feature_extraction_transporter.py
│ │ ├── function_transporter.py
│ │ ├── general_data_structure_transporter.py
│ │ ├── generator_transporter.py
│ │ ├── lossfunction_transporter.py
│ │ ├── neighbors_tree_transporter.py
│ │ ├── preprocessing_transporter.py
│ │ ├── randomstate_transporter.py
│ │ ├── sgdoptimizer_transporter.py
│ │ ├── transporter.py
│ │ ├── tree_transporter.py
│ │ └── treepredictor_transporter.py
│ └── utils/
│ ├── __init__.py
│ ├── data_exporter.py
│ ├── test_pymilo.py
│ └── util.py
├── requirements.txt
├── setup.py
├── streaming-requirements.txt
└── tests/
├── test_clusterings/
│ ├── affinity_propagation.py
│ ├── birch.py
│ ├── bisecting_kmeans.py
│ ├── dbscan.py
│ ├── gaussian_mixture/
│ │ ├── bayesian_gaussian_mixture.py
│ │ └── gaussian_mixture.py
│ ├── hdbscan.py
│ ├── hierarchical_clustering/
│ │ ├── agglomerative_clustering.py
│ │ └── feature_agglomeration.py
│ ├── kmeans.py
│ ├── mean_shift.py
│ ├── minibatch_kmeans.py
│ ├── optics.py
│ ├── spectral_clustering/
│ │ ├── spectral_biclustering.py
│ │ ├── spectral_clustering.py
│ │ └── spectral_coclustering.py
│ └── test_clusterings.py
├── test_composes/
│ ├── column_transformer.py
│ ├── test_compose_models.py
│ ├── transformed_target_regressor.py
│ └── util.py
├── test_cross_decomposition/
│ ├── cca.py
│ ├── pls_canonical.py
│ ├── pls_regression.py
│ └── test_cross_decompositions.py
├── test_decision_trees/
│ ├── decision_tree/
│ │ ├── decision_tree_classification.py
│ │ └── decision_tree_regression.py
│ ├── extra_tree/
│ │ ├── extra_tree_classification.py
│ │ └── extra_tree_regression.py
│ └── test_decision_trees.py
├── test_ensembles/
│ ├── adaboost/
│ │ ├── adaboost_classifier.py
│ │ └── adaboost_regressor.py
│ ├── bagging/
│ │ ├── bagging_classifier.py
│ │ └── bagging_regressor.py
│ ├── extra_trees/
│ │ ├── extra_trees_classifier.py
│ │ └── extra_trees_regressor.py
│ ├── gradient_booster/
│ │ ├── gradient_booster_classifier.py
│ │ └── gradient_booster_regressor.py
│ ├── hist_gradient_boosting/
│ │ ├── hist_gradient_boosting_classifier.py
│ │ └── hist_gradient_boosting_regressor.py
│ ├── isolation_forest.py
│ ├── pipeline.py
│ ├── random_forests/
│ │ ├── random_forest_classifier.py
│ │ └── random_forest_regressor.py
│ ├── random_trees_embedding.py
│ ├── stacking/
│ │ ├── stacking_classifier.py
│ │ └── stacking_regressor.py
│ ├── test_ensembles.py
│ └── voting/
│ ├── voting_classifier.py
│ └── voting_regressor.py
├── test_exceptions/
│ ├── custom_models.py
│ ├── export_exceptions.py
│ ├── import_exceptions.py
│ ├── invalid_jsons/
│ │ ├── corrupted.json
│ │ └── unknown-model.json
│ ├── test_exceptions.py
│ └── valid_jsons/
│ └── linear_regression.json
├── test_feature_extraction/
│ ├── count_vectorizer.py
│ ├── dict_vectorizer.py
│ ├── feature_hasher.py
│ ├── hashing_vectorizer.py
│ ├── patch_extractor.py
│ ├── pipeline.py
│ ├── test_feature_extractions.py
│ ├── tfidf_transformer.py
│ ├── tfidf_vectorizer.py
│ └── util.py
├── test_linear_models/
│ ├── bayesian/
│ │ ├── ard_regression.py
│ │ └── bayesian_regression.py
│ ├── elasticnet/
│ │ ├── elastic_net.py
│ │ ├── elastic_net_cv.py
│ │ ├── multi_task_elastic_net.py
│ │ └── multi_task_elastic_net_cv.py
│ ├── glm/
│ │ ├── gamma_regression.py
│ │ ├── poisson_regression.py
│ │ └── tweedie_regression.py
│ ├── lasso_lars/
│ │ ├── lasso.py
│ │ ├── lasso_cv.py
│ │ ├── lasso_lars.py
│ │ ├── lasso_lars_cv.py
│ │ ├── lasso_lars_ic.py
│ │ ├── multi_task_lasso.py
│ │ └── multi_task_lasso_cv.py
│ ├── linear_regression/
│ │ └── linear_regression.py
│ ├── logistic/
│ │ ├── logistic_regression.py
│ │ └── logistic_regression_cv.py
│ ├── omp/
│ │ ├── omp.py
│ │ └── omp_cv.py
│ ├── passive_aggressive/
│ │ ├── passive_aggressive_classifier.py
│ │ └── passive_aggressive_regressor.py
│ ├── perceptron/
│ │ └── perception.py
│ ├── quantile/
│ │ └── quantile.py
│ ├── ridge/
│ │ ├── ridge_classifier.py
│ │ ├── ridge_classifier_cv.py
│ │ ├── ridge_regression.py
│ │ └── ridge_regression_cv.py
│ ├── robustness/
│ │ ├── huber_regression.py
│ │ ├── ransac_regression.py
│ │ └── theil_sen_regression.py
│ ├── sgd/
│ │ ├── sgd_classifier.py
│ │ ├── sgd_oneclass_svm.py
│ │ └── sgd_regression.py
│ └── test_linear_models.py
├── test_misc_functionalities.py/
│ └── test_batch.py
├── test_ml_streaming/
│ ├── docker_files/
│ │ ├── Dockerfile1
│ │ └── Dockerfile2
│ ├── run_server.py
│ ├── scenarios/
│ │ ├── scenario1.py
│ │ ├── scenario2.py
│ │ ├── scenario3.py
│ │ └── scenario4.py
│ └── test_streaming.py
├── test_naive_bayes/
│ ├── bernoulli.py
│ ├── categorical.py
│ ├── complement.py
│ ├── gaussian.py
│ ├── multinomial.py
│ └── test_naive_bayes_models.py
├── test_neighbors/
│ ├── kneighbors_classifier.py
│ ├── kneighbors_regressor.py
│ ├── local_outlier_factor.py
│ ├── nearest_centroid.py
│ ├── nearest_neighbor.py
│ ├── radius_neighbors_classifier.py
│ ├── radius_neighbors_regressor.py
│ └── test_neighbors.py
├── test_neural_networks/
│ ├── bernoulli_rbm/
│ │ └── bernoulli_rbm.py
│ ├── mlp/
│ │ ├── mlp_classification.py
│ │ └── mlp_regression.py
│ └── test_neural_networks.py
├── test_preprocessings/
│ ├── binarizer.py
│ ├── function_transformer.py
│ ├── kbins_discretizer.py
│ ├── kernel_centerer.py
│ ├── label_binarizer.py
│ ├── label_encoder.py
│ ├── max_abs_scaler.py
│ ├── multilabel_binarizer.py
│ ├── normalizer.py
│ ├── one_hot_encoder.py
│ ├── ordinal_encoder.py
│ ├── polynomial_features.py
│ ├── power_transformer.py
│ ├── quantile_transformer.py
│ ├── robust_scaler.py
│ ├── spline_transformer.py
│ ├── standard_scaler.py
│ ├── target_encoder.py
│ ├── test_preprocessings.py
│ └── util.py
└── test_svms/
├── linear_svc.py
├── linear_svr.py
├── nu_svc.py
├── nu_svr.py
├── one_class_svm.py
├── svc.py
├── svr.py
└── test_svms.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .coveragerc
================================================
[run]
branch = True
omit =
*/pymilo/__main__.py
[report]
# Regexes for lines to exclude from consideration
exclude_lines =
pragma: no cover
================================================
FILE: .github/CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the overall
community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or advances of
any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email address,
without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
pymilo@openscilab.com.
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series of
actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or permanent
ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within the
community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.1, available at
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
Community Impact Guidelines were inspired by
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
For answers to common questions about this code of conduct, see the FAQ at
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
[https://www.contributor-covenant.org/translations][translations].
[homepage]: https://www.contributor-covenant.org
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
[Mozilla CoC]: https://github.com/mozilla/diversity
[FAQ]: https://www.contributor-covenant.org/faq
[translations]: https://www.contributor-covenant.org/translations
================================================
FILE: .github/CONTRIBUTING.md
================================================
# Contribution
Changes and improvements are more than welcome! ❤️ Feel free to fork and open a pull request.
Please consider the following :
1. Fork it!
2. Create your feature branch (under `dev` branch)
3. Add your functions/methods to proper files
4. Add standard `docstring` to your functions/methods
5. Add tests for your functions/methods (`tests` folder)
6. Pass all CI tests
7. Update `CHANGELOG.md`
- Describe changes under `[Unreleased]` section
8. Submit a pull request into `dev` (please complete the pull request template)
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: Bug Report
description: File a bug report
title: "[Bug]: "
body:
- type: markdown
attributes:
value: |
Thanks for your time to fill out this bug report!
- type: input
id: contact
attributes:
label: Contact details
description: How can we get in touch with you if we need more info?
placeholder: ex. email@example.com
validations:
required: false
- type: textarea
id: what-happened
attributes:
label: What happened?
description: Provide a clear and concise description of what the bug is.
placeholder: >
Tell us a description of the bug.
validations:
required: true
- type: textarea
id: step-to-reproduce
attributes:
label: Steps to reproduce
description: Provide details of how to reproduce the bug.
placeholder: >
ex. 1. Go to '...'
validations:
required: true
- type: textarea
id: expected-behavior
attributes:
label: Expected behavior
description: What did you expect to happen?
placeholder: >
ex. I expected '...' to happen
validations:
required: true
- type: textarea
id: actual-behavior
attributes:
label: Actual behavior
description: What did actually happen?
placeholder: >
ex. Instead '...' happened
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: Operating system
description: Which operating system are you using?
options:
- Windows
- macOS
- Linux
default: 0
validations:
required: true
- type: dropdown
id: python-version
attributes:
label: Python version
description: Which version of Python are you using?
options:
- Python 3.14
- Python 3.13
- Python 3.12
- Python 3.11
- Python 3.10
- Python 3.9
- Python 3.8
- Python 3.7
default: 1
validations:
required: true
- type: dropdown
id: pymilo-version
attributes:
label: PyMilo version
description: Which version of PyMilo are you using?
options:
- PyMilo 1.6
- PyMilo 1.5
- PyMilo 1.4
- PyMilo 1.3
- PyMilo 1.2
- PyMilo 1.1
- PyMilo 1.0
- PyMilo 0.9
- PyMilo 0.8
- PyMilo 0.7
- PyMilo 0.6
- PyMilo 0.5
- PyMilo 0.4
- PyMilo 0.3
- PyMilo 0.2
- PyMilo 0.1
default: 0
validations:
required: true
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
render: shell
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
contact_links:
- name: Discord
url: https://discord.com/invite/mtuMS8AjDS
about: Ask questions and discuss with other PyMilo community members
- name: Website
url: https://openscilab.com/
about: Check out our website for more information
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: Feature Request
description: Suggest a feature for this project
title: "[Feature]: "
body:
- type: textarea
id: description
attributes:
label: Describe the feature you want to add
placeholder: >
I'd like to be able to [...]
validations:
required: true
- type: textarea
id: possible-solution
attributes:
label: Describe your proposed solution
placeholder: >
I think this could be done by [...]
validations:
required: false
- type: textarea
id: alternatives
attributes:
label: Describe alternatives you've considered, if relevant
placeholder: >
Another way to do this would be [...]
validations:
required: false
- type: textarea
id: aditional-context
attributes:
label: Additional context
placeholder: >
Add any other context or screenshots about the feature request here.
validations:
required: false
================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
#### Reference Issues/PRs
#### What does this implement/fix? Explain your changes.
#### Any other comments?
================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
- package-ecosystem: pip
directory: "/"
schedule:
interval: weekly
time: "01:30"
open-pull-requests-limit: 10
target-branch: dev
assignees:
- "AHReccese"
================================================
FILE: .github/workflows/publish_conda.yml
================================================
name: publish_conda
permissions: read-all
on:
push:
# Sequence of patterns matched against refs/tags
tags:
- '*' # Push events to matching v*, i.e. v1.0, v20.15.10
jobs:
publish:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: publish-to-conda
uses: sepandhaghighi/conda-package-publish-action@v1.2
with:
subDir: 'otherfiles'
AnacondaToken: ${{ secrets.ANACONDA_TOKEN }}
================================================
FILE: .github/workflows/publish_pypi.yml
================================================
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
name: Upload Python Package
on:
push:
# Sequence of patterns matched against refs/tags
tags:
- '*' # Push events to matching v*, i.e. v1.0, v20.15.10
jobs:
deploy:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine
- name: Build and publish
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python setup.py sdist bdist_wheel
twine upload dist/*.tar.gz
twine upload dist/*.whl
================================================
FILE: .github/workflows/test.yml
================================================
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: CI
on:
push:
branches:
- main
- dev
pull_request:
branches:
- dev
- main
env:
TEST_PYTHON_VERSION: 3.9
TEST_OS: 'ubuntu-22.04'
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-22.04, windows-2022, macos-15-intel]
python-version: [3.7, 3.8, 3.9, 3.10.5, 3.11.0, 3.12.0, 3.13.0, 3.14.0]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Installation
run: |
python -m pip install --upgrade pip
pip install .["streaming"]
- name: Test requirements Installation
run: |
python otherfiles/requirements-splitter.py
pip install --upgrade --upgrade-strategy=only-if-needed -r test-requirements.txt
- name: Pymilo Core Functionality Tests with pytest
env:
COVERAGE_FILE: .coverage.core
run: |
python -m pytest . --ignore=./tests/test_ml_streaming --cov=pymilo --cov-report=term --cov-report=xml:coverage_core.xml
- name: Pymilo Streaming Functionality Tests with pytest
env:
COVERAGE_FILE: .coverage.streaming
run: |
python -m pytest ./tests/test_ml_streaming --cov=pymilo --cov-report=term --cov-report=xml:coverage_streaming.xml
- name: Merge coverage files
run: |
coverage combine
coverage xml -o coverage_combined.xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
files: coverage_combined.xml
fail_ci_if_error: false
if: matrix.python-version == env.TEST_PYTHON_VERSION && matrix.os == env.TEST_OS
- name: Vulture, Bandit and Pydocstyle Tests
run: |
python -m vulture pymilo/ otherfiles/ setup.py --min-confidence 65 --exclude=__init__.py --sort-by-size
python -m bandit -r pymilo -s B311
python -m pydocstyle -v
if: matrix.python-version == env.TEST_PYTHON_VERSION
- name: Version check
run: |
python otherfiles/version_check.py
if: matrix.python-version == env.TEST_PYTHON_VERSION
================================================
FILE: .gitignore
================================================
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
.venv/
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
### Example user template template
### Example user template
# IntelliJ project files
.idea
*.iml
out
gen
/.VSCodeCounter
/tests/exported*
/paper/refs
================================================
FILE: .pydocstyle
================================================
[pydocstyle]
match_dir = ^(?!(tests|build)).*
match = .*\.py
================================================
FILE: AUTHORS.md
================================================
# Core Developers
----------
- AmirHosein Rostami - Open Science Laboratory ([Github](https://github.com/AHReccese)) **
- Sepand Haghighi - Open Science Laboratory ([Github](https://github.com/sepandhaghighi))
- Alireza Zolanvari - Open Science Laboratory ([Github](https://github.com/AlirezaZolanvari))
- Sadra Sabouri - Open Science Laboratory ([Github](https://github.com/sadrasabouri))
** **Maintainer**
# Other Contributors
----------
- [@zhmbshr](https://github.com/zhmbshr) ++
++ **Graphic designer**
================================================
FILE: CHANGELOG.md
================================================
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
## [1.6] - 2026-04-03
### Added
- `_check_response_error` function in `WebSocketClientCommunicator`
- `register_client` function in `WebSocketClientCommunicator`
- `remove_client` function in `WebSocketClientCommunicator`
- `register_model` function in `WebSocketClientCommunicator`
- `remove_model` function in `WebSocketClientCommunicator`
- `get_ml_models` function in `WebSocketClientCommunicator`
- `grant_access` function in `WebSocketClientCommunicator`
- `revoke_access` function in `WebSocketClientCommunicator`
- `get_allowance` function in `WebSocketClientCommunicator`
- `get_allowed_models` function in `WebSocketClientCommunicator`
- `_action_handlers` registry in `WebSocketServerCommunicator`
- `_handle_register_client` function in `WebSocketServerCommunicator`
- `_handle_remove_client` function in `WebSocketServerCommunicator`
- `_handle_register_model` function in `WebSocketServerCommunicator`
- `_handle_remove_model` function in `WebSocketServerCommunicator`
- `_handle_get_ml_models` function in `WebSocketServerCommunicator`
- `_handle_grant_access` function in `WebSocketServerCommunicator`
- `_handle_revoke_access` function in `WebSocketServerCommunicator`
- `_handle_get_allowance` function in `WebSocketServerCommunicator`
- `_handle_get_allowed_models` function in `WebSocketServerCommunicator`
- `scenario4` access control test for streaming
- `--port` argument in test server runner
- `close` function in `WebSocketClientCommunicator`
- `close` function in `PymiloClient`
- Context manager support (`__enter__`/`__exit__`) in `PymiloClient`
### Changed
- Test system modified
- `_ArrayFunctionDispatcher` import in `FunctionTransporter` to use new numpy API
- `datetime.utcnow()` to `datetime.now(timezone.utc)` in `PymiloException`
- `get_allowance` endpoint in `RESTServerCommunicator` to handle empty allowances
- `send_message` function in `WebSocketClientCommunicator`
- `download` function in `WebSocketClientCommunicator`
- `upload` function in `WebSocketClientCommunicator`
- `attribute_call` function in `WebSocketClientCommunicator`
- `attribute_type` function in `WebSocketClientCommunicator`
- `__init__` function in `WebSocketServerCommunicator`
- `handle_message` function in `WebSocketServerCommunicator`
- `_handle_download` function in `WebSocketServerCommunicator`
- `_handle_upload` function in `WebSocketServerCommunicator`
- `_handle_attribute_call` function in `WebSocketServerCommunicator`
- `_handle_attribute_type` function in `WebSocketServerCommunicator`
- `parse` function in `WebSocketServerCommunicator`
- WebSocket streaming tests enabled
## [1.5] - 2026-01-26
### Added
- `_is_remainder_cols_list` function in GeneralDataStructureTransporter
- `ComposeTransporter` Transporter
- Composite params initialized in `pymilo_param.py`
- `get_transporter` in `chains/util.py`
- `deserialize_possible_ml_model` in `chains/util.py`
- `serialize_possible_ml_model` in `chains/util.py`
- `TransformedTargetRegressor` model
- `ColumnTransformer` model
- Composite models test runner
- Composite models chain
- JOSS paper
### Changed
- `serialize` function in FunctionTransporter
- `serialize_spline` function in PreprocessingTransporter
- `deserialize_spline` function in PreprocessingTransporter
- Ensemble models test runner
- `get_deserialized_list` function in GeneralDataStructureTransporter
- `deserialize` function in GeneralDataStructureTransporter
- `serialize` function in GeneralDataStructureTransporter
- `get_deserialized_dict` function in GeneralDataStructureTransporter
- `serialize_dict` function in GeneralDataStructureTransporter
- `serialize_tuple` function in GeneralDataStructureTransporter
- Test system modified
- `README.md` updated
### Removed
- `get_transporter` in `ensemble_chain.py`
- `deserialize_possible_ml_model` in `ensemble_chain.py`
- `serialize_possible_ml_model` in `ensemble_chain.py`
## [1.4] - 2025-12-01
### Added
- `get_allowed_models` function in `PymiloClient`
- `get_allowance` function in `PymiloClient`
- `revoke_access` function in `PymiloClient`
- `grant_access` function in `PymiloClient`
- `get_ml_models` function in `PymiloClient`
- `deregister_ml_model` function in `PymiloClient`
- `register_ml_model` function in `PymiloClient`
- `deregister` function in `PymiloClient`
- `register` function in `PymiloClient`
- `REST_API_PREFIX ` function in `streaming.param.py`
- `register_client` function in `RESTClientCommunicator`
- `remove_client` function in `RESTClientCommunicator`
- `register_model` function in `RESTClientCommunicator`
- `remove_model` function in `RESTClientCommunicator`
- `get_ml_models` function in `RESTClientCommunicator`
- `grant_access` function in `RESTClientCommunicator`
- `revoke_access` function in `RESTClientCommunicator`
- `get_allowance` function in `RESTClientCommunicator`
- `get_allowed_models` function in `RESTClientCommunicator`
- `_validate_id` function in `PymiloServer`
- `init_client` function in `PymiloServer`
- `remove_client` function in `PymiloServer`
- `grant_access` function in `PymiloServer`
- `revoke_access` function in `PymiloServer`
- `get_allowed_models` function in `PymiloServer`
- `get_clients_allowance` function in `PymiloServer`
- `get_clients` function in `PymiloServer`
- `init_ml_model` function in `PymiloServer`
- `set_ml_model` function in `PymiloServer`
- `remove_ml_model` function in `PymiloServer`
- `get_ml_models` function in `PymiloServer`
### Changed
- `is_callable_attribute` function in `PymiloServer`
- `execute_model` function in `PymiloServer`
- `update_model` function in `PymiloServer`
- `export_model` function in `PymiloServer`
- `__getattr__` in `PymiloClient`
- `upload` function in `PymiloClient`
- `download` function in `PymiloClient`
- `encrypt_compress` function in `PymiloClient`
- `ClientCommunicator` interface
- `handle_message` function in `WebSocketServerCommunicator`
- `_handle_download` function in `WebSocketServerCommunicator`
- `setup_routes` function in `RESTServerCommunicator`
- `__init__` function in `RESTClientCommunicator`
- `download` function in `RESTClientCommunicator`
- `upload` function in `RESTClientCommunicator`
- `attribute_call` function in `RESTClientCommunicator`
- `attribute_type` function in `RESTClientCommunicator`
- `README.md` updated
- `__init__` function in `PyMiloServer`
- Test system modified
- `Python 3.14` added to `test.yml`
### Removed
- Python 3.6 support
## [1.3] - 2025-02-26
### Added
- `TfidfVectorizer` feature extractor
- `TfidfTransformer` feature extractor
- `HashingVectorizer` feature extractor
- `CountVectorizer` feature extractor
- `PatchExtractor` feature extractor
- `DictVectorizer` feature extractor
- `FeatureHasher` feature extractor
- `FeatureExtractorTransporter` Transporter
- `FeatureExtraction` support added to Ensemble chain
- FeatureExtraction params initialized in `pymilo_param.py`
- Feature Extraction models test runner
- Zenodo badge to `README.md`
### Changed
- `get_deserialized_list` in `GeneralDataStructureTransporter`
- `get_deserialized_dict` in `GeneralDataStructureTransporter`
- `serialize` in `GeneralDataStructureTransporter`
- `serialize_tuple` in `GeneralDataStructureTransporter`
- `AttributeCallPayload` in `streaming.communicator.py`
- `get_deserialized_regular_primary_types` in `GeneralDataStructureTransporter`
- Test system modified
## [1.2] - 2025-01-22
### Added
- `generate_dockerfile` testcases
- `generate_dockerfile` function in `streaming.util.py`
- `cite` section in `README.md`
- `CLI` handler
- `print_supported_ml_models` function in `pymilo_func.py`
- `pymilo_help` function in `pymilo_func.py`
- `SKLEARN_SUPPORTED_CATEGORIES` in `pymilo_param.py`
- `OVERVIEW` in `pymilo_param.py`
- `get_sklearn_class` in `utils.util.py`
### Changed
- `ML Streaming` testcases modified to use PyMilo CLI
- `to_pymilo_issue` function in `PymiloException`
- `valid_url_valid_file` testcase added in `test_exceptions.py`
- `valid_url_valid_file` function in `import_exceptions.py`
- `StandardPayload` in `RESTServerCommunicator`
- testcase for LogisticRegressionCV, LogisticRegression
- `README.md` updated
- `AUTHORS.md` updated
## [1.1] - 2024-11-25
### Added
- `is_socket_closed` function in `streaming.communicator.py`
- `validate_http_url` function in `streaming.util.py`
- `validate_websocket_url` function in `streaming.util.py`
- `ML Streaming` WebSocket testcases
- `CommunicationProtocol` Enum in `streaming.communicator.py`
- `WebSocketClientCommunicator` class in `streaming.communicator.py`
- `WebSocketServerCommunicator` class in `streaming.communicator.py`
- batch operation testcases
- `batch_export` function in `pymilo/pymilo_obj.py`
- `batch_import` function in `pymilo/pymilo_obj.py`
- `CCA` model
- `PLSCanonical` model
- `PLSRegression` model
- Cross decomposition models test runner
- Cross decomposition chain
- PyMilo exception types added in `pymilo/exceptions/__init__.py`
- PyMilo exception types added in `pymilo/__init__.py`
### Changed
- `core` and `streaming` tests divided in `test.yml`
- `communication_protocol` parameter added to `PyMiloClient` class
- `communication_protocol` parameter added to `PyMiloServer` class
- `ML Streaming` testcases updated to support protocol selection
- `README.md` updated
- Tests config modified
- Cross decomposition params initialized in `pymilo_param`
- Cross decomposition support added to `pymilo_func.py`
- `SUPPORTED_MODELS.md` updated
- `README.md` updated
- GitHub actions are limited to the `dev` and `main` branches
- `Python 3.13` added to `test.yml`
## [1.0] - 2024-09-16
### Added
- Compression method test in `ML Streaming` RESTful testcases
- `CLI` handler in `tests/test_ml_streaming/run_server.py`
- `Compression` Enum in `streaming.compressor.py`
- `GZIPCompressor` class in `streaming.compressor.py`
- `ZLIBCompressor` class in `streaming.compressor.py`
- `LZMACompressor` class in `streaming.compressor.py`
- `BZ2Compressor` class in `streaming.compressor.py`
- `encrypt_compress` function in `PymiloClient`
- `parse` function in `RESTServerCommunicator`
- `is_callable_attribute` function in `PymiloServer`
- `streaming.param.py`
- `attribute_type` function in `RESTServerCommunicator`
- `AttributeTypePayload` class in `RESTServerCommunicator`
- `attribute_type` function in `RESTClientCommunicator`
- `Mode` Enum in `PymiloClient`
- Import from url testcases
- `download_model` function in `utils.util.py`
- `PymiloServer` class in `streaming.pymilo_server.py`
- `PymiloClient` class in `PymiloClient`
- `Communicator` interface in `streaming.interfaces.py`
- `RESTClientCommunicator` class in `streaming.communicator.py`
- `RESTServerCommunicator` class in `streaming.communicator.py`
- `Compressor` interface in `streaming.interfaces.py`
- `DummyCompressor` class in `streaming.compressor.py`
- `Encryptor` interface in `streaming.interfaces.py`
- `DummyEncryptor` class in `streaming.encryptor.py`
- `ML Streaming` RESTful testcases
- `streaming-requirements.txt`
### Changed
- `README.md` updated
- `ML Streaming` RESTful testcases
- `attribute_call` function in `RESTServerCommunicator`
- `AttributeCallPayload` class in `RESTServerCommunicator`
- upload function in `RESTClientCommunicator`
- download function in `RESTClientCommunicator`
- `__init__` function in `RESTClientCommunicator`
- `attribute_calls` function in `RESTClientCommunicator`
- `requests` added to `requirements.txt`
- `uvicorn`, `fastapi`, `requests` and `pydantic` added to `dev-requirements.txt`
- `ML Streaming` RESTful testcases
- `__init__` function in `PymiloServer`
- `__getattr__` function in `PymiloClient`
- `__init__` function in `PymiloClient`
- `toggle_mode` function in `PymiloClient`
- `upload` function in `PymiloClient`
- `download` function in `PymiloClient`
- `__init__` function in `PymiloServer`
- `serialize_cfnode` function in `transporters.cfnode_transporter.py`
- `__init__` function in `Import` class
- `serialize` function in `transporters.tree_transporter.py`
- `deserialize` function in `transporters.tree_transporter.py`
- `serialize` function in `transporters.sgdoptimizer_transporter.py`
- `deserialize` function in `transporters.sgdoptimizer_transporter.py`
- `serialize` function in `transporters.randomstate_transporter.py`
- `deserialize` function in `transporters.randomstate_transporter.py`
- `serialize` function in `transporters.bunch_transporter.py`
- `deserialize` function in `transporters.bunch_transporter.py`
- `serialize` function in `transporters.adamoptimizer_transporter.py`
- `deserialize` function in `transporters.adamoptimizer_transporter.py`
- `serialize_linear_model` function in `chains.linear_model_chain.py`
- `serialize_ensemble` function in `chains.ensemble_chain.py`
- `serialize` function in `GeneralDataStructureTransporter` Transporter refactored
- `get_deserialized_list` function in `GeneralDataStructureTransporter` Transporter refactored
- `Export` class call by reference bug fixed
## [0.9] - 2024-07-01
### Added
- Anaconda workflow
- `prefix_list` function in `utils.util.py`
- `KBinsDiscretizer` preprocessing model
- `PowerTransformer` preprocessing model
- `SplineTransformer` preprocessing model
- `TargetEncoder` preprocessing model
- `QuantileTransformer` preprocessing model
- `RobustScaler` preprocessing model
- `PolynomialFeatures` preprocessing model
- `OrdinalEncoder` preprocessing model
- `Normalizer` preprocessing model
- `MaxAbsScaler` preprocessing model
- `MultiLabelBinarizer` preprocessing model
- `KernelCenterer` preprocessing model
- `FunctionTransformer` preprocessing model
- `Binarizer` preprocessing model
- Preprocessing models test runner
### Changed
- `Command` enum class in `transporter.py`
- `SerializationErrorTypes` enum class in `serialize_exception.py`
- `DeserializationErrorTypes` enum class in `deserialize_exception.py`
- `meta.yaml` modified
- `NaN` type in `pymilo_param`
- `NaN` type transportation in `GeneralDataStructureTransporter` Transporter
- `BSpline` Transportation in `PreprocessingTransporter` Transporter
- one layer deeper transportation in `PreprocessingTransporter` Transporter
- dictating outer ndarray dtype in `GeneralDataStructureTransporter` Transporter
- preprocessing params fulfilled in `pymilo_param`
- `SUPPORTED_MODELS.md` updated
- `README.md` updated
- `serialize_possible_ml_model` in the Ensemble chain
## [0.8] - 2024-05-06
### Added
- `StandardScaler` Transformer in `pymilo_param.py`
- `PreprocessingTransporter` Transporter
- ndarray shape config in `GeneralDataStructure` Transporter
- `util.py` in chains
- `BinMapperTransporter` Transporter
- `BunchTransporter` Transporter
- `GeneratorTransporter` Transporter
- `TreePredictorTransporter` Transporter
- `AdaboostClassifier` model
- `AdaboostRegressor` model
- `BaggingClassifier` model
- `BaggingRegressor` model
- `ExtraTreesClassifier` model
- `ExtraTreesRegressor` model
- `GradientBoosterClassifier` model
- `GradientBoosterRegressor` model
- `HistGradientBoosterClassifier` model
- `HistGradientBoosterRegressor` model
- `RandomForestClassifier` model
- `RandomForestRegressor` model
- `IsolationForest` model
- `RandomTreesEmbedding` model
- `StackingClassifier` model
- `StackingRegressor` model
- `VotingClassifier` model
- `VotingRegressor` model
- `Pipeline` model
- Ensemble models test runner
- Ensemble chain
- `SECURITY.md`
### Changed
- `Pipeline` test updated
- `LabelBinarizer`,`LabelEncoder` and `OneHotEncoder` got embedded in `PreprocessingTransporter`
- Preprocessing support added to Ensemble chain
- Preprocessing params initialized in `pymilo_param`
- `util.py` in utils updated
- `test_pymilo.py` updated
- `pymilo_func.py` updated
- `linear_model_chain.py` updated
- `neural_network_chain.py` updated
- `decision_tree_chain.py` updated
- `clustering_chain.py` updated
- `naive_bayes_chain.py` updated
- `neighbours_chain.py` updated
- `svm_chain.py` updated
- `GeneralDataStructure` Transporter updated
- `LossFunction` Transporter updated
- `AbstractTransporter` updated
- Tests config modified
- Unequal sklearn version error added in `pymilo_param.py`
- Ensemble params initialized in `pymilo_param`
- Ensemble support added to `pymilo_func.py`
- `SUPPORTED_MODELS.md` updated
- `README.md` updated
## [0.7] - 2024-04-03
### Added
- `pymilo_nearest_neighbor_test` function added to `test_pymilo.py`
- `NeighborsTreeTransporter` Transporter
- `LocalOutlierFactor` model
- `RadiusNeighborsClassifier` model
- `RadiusNeighborsRegressor` model
- `NearestCentroid` model
- `NearestNeighbors` model
- `KNeighborsClassifier` model
- `KNeighborsRegressor` model
- Neighbors models test runner
- Neighbors chain
### Changed
- Tests config modified
- Neighbors params initialized in `pymilo_param`
- Neighbors support added to `pymilo_func.py`
- `SUPPORTED_MODELS.md` updated
- `README.md` updated
## [0.6] - 2024-03-27
### Added
- `deserialize_primitive_type` function in `GeneralDataStructureTransporter`
- `is_deserialized_ndarray` function in `GeneralDataStructureTransporter`
- `deep_deserialize_ndarray` function in `GeneralDataStructureTransporter`
- `deep_serialize_ndarray` function in `GeneralDataStructureTransporter`
- `SVR` model
- `SVC` model
- `One Class SVM` model
- `NuSVR` model
- `NuSVC` model
- `Linear SVR` model
- `Linear SVC` model
- SVM models test runner
- SVM chain
### Changed
- `pymilo_param.py` updated
- `pymilo_obj.py` updated to use predefined strings
- `TreeTransporter` updated
- `get_homogeneous_type` function in `util.py` updated
- `GeneralDataStructureTransporter` updated to use deep ndarray serializer & deserializer
- `check_str_in_iterable` updated
- `Label Binarizer` Transporter updated
- `Function` Transporter updated
- `CFNode` Transporter updated
- `Bisecting Tree` Transporter updated
- Tests config modified
- SVM params initialized in `pymilo_param`
- SVM support added to `pymilo_func.py`
- `SUPPORTED_MODELS.md` updated
- `README.md` updated
## [0.5] - 2024-01-31
### Added
- `reset` function in the `Transport` interface
- `reset` function implementation in `AbstractTransporter`
- `Gaussian Naive Bayes` declared as `GaussianNB` model
- `Multinomial Naive Bayes` model declared as `MultinomialNB` model
- `Complement Naive Bayes` model declared as `ComplementNB` model
- `Bernoulli Naive Bayes` model declared as `BernoulliNB` model
- `Categorical Naive Bayes` model declared as `CategoricalNB` model
- Naive Bayes models test runner
- Naive Bayes chain
### Changed
- `Transport` function of `AbstractTransporter` updated
- fix the order of `CFNode` fields serialization in `CFNodeTransporter`
- `GeneralDataStructureTransporter` support list of ndarray with different shapes
- Tests config modified
- Naive Bayes params initialized in `pymilo_param`
- Naive Bayes support added to `pymilo_func.py`
- `SUPPORTED_MODELS.md` updated
- `README.md` updated
## [0.4] - 2024-01-22
### Added
- `has_named_parameter` method in `util.py`
- `CFSubcluster` Transporter(inside `CFNode` Transporter)
- `CFNode` Transporter
- `Birch` model
- `SpectralBiclustering` model
- `SpectralCoclustering` model
- `MiniBatchKMeans` model
- `feature_request.yml` template
- `config.yml` for issue template
- `BayesianGaussianMixture` model
- `serialize_tuple` method in `GeneralDataStructureTransporter`
- `import_function` method in `util.py`
- `Function` Transporter
- `FeatureAgglomeration` model
- `HDBSCAN` model
- `GaussianMixture` model
- `OPTICS` model
- `DBSCAN` model
- `AgglomerativeClustering` model
- `SpectralClustering` model
- `MeanShift` model
- `AffinityPropagation` model
- `Kmeans` model
- Clustering models test runner
- Clustering chain
### Changed
- `LossFunctionTransporter` enhanced to handle scikit 1.4.0 `_loss_function_` field
- Codacy Static Code Analyzer's suggestions applied
- Spectral Clustering test folder refactored
- Bug report template modified
- `GeneralDataStructureTransporter` updated
- Tests config modified
- Clustering data set preparation added to `data_exporter.py`
- Clustering params initialized in `pymilo_param`
- Clustering support added to `pymilo_func.py`
- `Python 3.12` added to `test.yml`
- `dev-requirements.txt` updated
- Code quality badges added to `README.md`
- `SUPPORTED_MODELS.md` updated
- `README.md` updated
## [0.3] - 2023-09-27
### Added
- scikit-learn decision tree models
- `ExtraTreeClassifier` model
- `ExtraTreeRegressor` model
- `DecisionTreeClassifier` model
- `DecisionTreeRegressor` model
- `Tree` Transporter
- Decision Tree chain
### Changed
- Tests config modified
- DecisionTree params initialized in `pymilo_param`
- Decision Tree support added to `pymilo_func.py`
## [0.2] - 2023-08-02
### Added
- scikit-learn neural network models
- `MLP Regressor` model
- `MLP Classifier` model
- `BernoulliRBN` model
- `SGDOptimizer` transporter
- `RandomState(MT19937)` transporter
- `Adamoptimizer` transporter
- Neural Network chain
- Neural Network exceptions
- `ndarray_to_list` method in `GeneralDataStructureTransporter`
- `list_to_ndarray` method in `GeneralDataStructureTransporter`
- `neural_network_chain.py` chain
### Changed
- `GeneralDataStructure` Transporter updated
- `LabelBinerizer` Transporter updated
- `linear model` chain updated
- GeneralDataStructure transporter enhanced
- LabelBinerizer transporter updated
- transporters' chain router added to `pymilo func`
- NeuralNetwork params initialized in `pymilo_param`
- `pymilo_test` updated to support multiple models
- `linear_model_chain` refactored
## [0.1] - 2023-06-29
### Added
- scikit-learn linear models support
- `Export` class
- `Import` class
[Unreleased]: https://github.com/openscilab/pymilo/compare/v1.6...dev
[1.6]: https://github.com/openscilab/pymilo/compare/v1.5...v1.6
[1.5]: https://github.com/openscilab/pymilo/compare/v1.4...v1.5
[1.4]: https://github.com/openscilab/pymilo/compare/v1.3...v1.4
[1.3]: https://github.com/openscilab/pymilo/compare/v1.2...v1.3
[1.2]: https://github.com/openscilab/pymilo/compare/v1.1...v1.2
[1.1]: https://github.com/openscilab/pymilo/compare/v1.0...v1.1
[1.0]: https://github.com/openscilab/pymilo/compare/v0.9...v1.0
[0.9]: https://github.com/openscilab/pymilo/compare/v0.8...v0.9
[0.8]: https://github.com/openscilab/pymilo/compare/v0.7...v0.8
[0.7]: https://github.com/openscilab/pymilo/compare/v0.6...v0.7
[0.6]: https://github.com/openscilab/pymilo/compare/v0.5...v0.6
[0.5]: https://github.com/openscilab/pymilo/compare/v0.4...v0.5
[0.4]: https://github.com/openscilab/pymilo/compare/v0.3...v0.4
[0.3]: https://github.com/openscilab/pymilo/compare/v0.2...v0.3
[0.2]: https://github.com/openscilab/pymilo/compare/v0.1...v0.2
[0.1]: https://github.com/openscilab/pymilo/compare/e887108...v0.1
================================================
FILE: CITATION.cff
================================================
cff-version: "1.2.0"
message: If you use this software, please cite our article in the
Journal of Open Source Software.
title: "PyMilo: A Python Library for ML I/O"
abstract: >-
PyMilo is an open-source Python package that addresses the limitations of
existing machine learning (ML) model storage formats by providing a
transparent, reliable, end-to-end, and safe method for exporting and
deploying trained models. Current tools rely on black-box or executable
formats that obscure internal model structures, making them difficult to
audit, verify, or safely share. Meanwhile, tensor-centric formats securely
store and transfer numerical tensors but do not capture the internal and
structural composition of classical machine-learning models (e.g.,
scikit-learn pipelines), which remain PyMilo’s primary focus. Others apply
structural transformations during export that may degrade predictive
performance and reduce the model to a limited inference-only interface. In
contrast, PyMilo serializes models in a transparent human-readable format
that preserves end-to-end model fidelity and enables reliable, safe, and
interpretable exchange.
authors:
- family-names: Rostami
given-names: AmirHosein
orcid: "https://orcid.org/0009-0000-0638-2263"
- family-names: Haghighi
given-names: Sepand
orcid: "https://orcid.org/0000-0001-9450-2375"
- family-names: Sabouri
given-names: Sadra
orcid: "https://orcid.org/0000-0003-1047-2346"
- family-names: Zolanvari
given-names: Alireza
orcid: "https://orcid.org/0000-0003-2367-8343"
contact:
- family-names: Rostami
given-names: AmirHosein
orcid: "https://orcid.org/0009-0000-0638-2263"
version: 1.4
date-released: 2025-12-23
repository-code: "https://github.com/openscilab/pymilo"
url: "https://github.com/openscilab/pymilo"
license: MIT
keywords:
- Machine Learning
- Model Deployment
- Model Serialization
- Transparency
- MLOPS
doi: 10.5281/zenodo.17783630
preferred-citation:
authors:
- family-names: Rostami
given-names: AmirHosein
orcid: "https://orcid.org/0009-0000-0638-2263"
- family-names: Haghighi
given-names: Sepand
orcid: "https://orcid.org/0000-0001-9450-2375"
- family-names: Sabouri
given-names: Sadra
orcid: "https://orcid.org/0000-0003-1047-2346"
- family-names: Zolanvari
given-names: Alireza
orcid: "https://orcid.org/0000-0003-2367-8343"
date-published: 2025-12-20
doi: 10.21105/joss.08858
issn: 2475-9066
issue: 116
journal: Journal of Open Source Software
publisher:
name: Open Journals
start: 8858
title: "PyMilo: A Python Library for ML I/O"
type: article
url: "https://joss.theoj.org/papers/10.21105/joss.08858"
volume: 10
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2022 OpenSciLab
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
----------
## Overview
PyMilo is an open source Python package that provides a simple, efficient, and safe way for users to export pre-trained machine learning models in a transparent way. By this, the exported model can be used in other environments, transferred across different platforms, and shared with others. PyMilo allows the users to export the models that are trained using popular Python libraries like scikit-learn, and then use them in deployment environments, or share them without exposing the underlying code or dependencies. The transparency of the exported models ensures reliability and safety for the end users, as it eliminates the risks of binary or pickle formats.
| PyPI Counter |
|
| Github Stars |
|
| Branch |
main |
dev |
| CI |
|
|
| Code Quality |
 |
 |
## Installation
### PyPI
- Check [Python Packaging User Guide](https://packaging.python.org/installing/)
- Run `pip install pymilo==1.6`
### Source code
- Download [Version 1.6](https://github.com/openscilab/pymilo/archive/v1.6.zip) or [Latest Source](https://github.com/openscilab/pymilo/archive/dev.zip)
- Run `pip install .`
### Conda
- Check [Conda Managing Package](https://conda.io/)
- Update Conda using `conda update conda`
- Run `conda install -c openscilab pymilo`
## Usage
### Import/Export
Imagine you want to train a `LinearRegression` model representing this equation: $y = x_0 + 2x_1 + 3$. You will create data points (`X`, `y`) and train your model as follows.
```python
import numpy as np
from sklearn.linear_model import LinearRegression
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
y = np.dot(X, np.array([1, 2])) + 3
# y = 1 * x_0 + 2 * x_1 + 3
model = LinearRegression().fit(X, y)
pred = model.predict(np.array([[3, 5]]))
# pred = [16.] (=1 * 3 + 2 * 5 + 3)
```
Using PyMilo `Export` class you can easily serialize and export your trained model into a JSON file.
```python
from pymilo import Export
Export(model).save("model.json")
```
#### Export
The `Export` class facilitates exporting of machine learning models to JSON files.
| **Parameter** | **Description** |
| ------------- | --------------- |
| model | The machine learning model to be exported |
| **Property** | **Description** |
| ------------ | --------------- |
| data | The serialized model data including all learned parameters |
| version | The scikit-learn version used to train the model |
| type | The type/class name of the exported model |
| **Method** | **Description** |
| ---------- | --------------- |
| save | Save the exported model to a JSON file |
| to_json | Return the model as a JSON string representation |
| batch_export | Export multiple models to individual JSON files in a directory |
You can check out your model as a JSON file now.
```json
{
"data": {
"fit_intercept": true,
"copy_X": true,
"n_jobs": null,
"positive": false,
"n_features_in_": 2,
"coef_": {
"pymiloed-ndarray-list": [
1.0000000000000002,
1.9999999999999991
],
"pymiloed-ndarray-dtype": "float64",
"pymiloed-ndarray-shape": [
2
],
"pymiloed-data-structure": "numpy.ndarray"
},
"rank_": 2,
"singular_": {
"pymiloed-ndarray-list": [
1.618033988749895,
0.6180339887498948
],
"pymiloed-ndarray-dtype": "float64",
"pymiloed-ndarray-shape": [
2
],
"pymiloed-data-structure": "numpy.ndarray"
},
"intercept_": {
"value": 3.0000000000000018,
"np-type": "numpy.float64"
}
},
"sklearn_version": "1.4.2",
"pymilo_version": "0.8",
"model_type": "LinearRegression"
}
```
You can see all the learned parameters of the model in this file and change them if you want. This JSON representation is a transparent version of your model.
Now let's load it back. You can do it easily by using PyMilo `Import` class.
```python
from pymilo import Import
model = Import("model.json").to_model()
pred = model.predict(np.array([[3, 5]]))
# pred = [16.] (=1 * 3 + 2 * 5 + 3)
```
#### Import
The `Import` class facilitates importing of serialized models from JSON files, JSON strings, or URLs.
| **Parameter** | **Description** |
| ------------- | --------------- |
| file_adr | Path to the JSON file containing the serialized model |
| json_dump | JSON string representation of the serialized model |
| url | URL to download the serialized model from |
| **Property** | **Description** |
| ------------ | --------------- |
| data | The deserialized model data |
| version | The scikit-learn version of the original model |
| type | The type/class name of the imported model |
| **Method** | **Description** |
| ---------- | --------------- |
| to_model | Convert the imported data back to a scikit-learn model |
| batch_import | Import multiple models from JSON files in a directory |
This loaded model is exactly the same as the original trained model.
### ML streaming
You can easily serve your ML model from a remote server using `ML streaming` feature of PyMilo.
⚠️ `ML streaming` feature exists in versions `>=1.0`
⚠️ In order to use `ML streaming` feature, make sure you've installed the `streaming` mode of PyMilo
⚠️ The `ML streaming` feature is under construction and is not yet considered stable.
You can choose either `REST` or `WebSocket` as the communication medium protocol.
#### Server
Let's assume you are in the remote server and you want to import the exported JSON file and start serving your model through `REST` protocol!
```python
from pymilo import Import
from pymilo.streaming import PymiloServer, CommunicationProtocol
my_model = Import("model.json").to_model()
communicator = PymiloServer(
model=my_model,
port=8000,
communication_protocol=CommunicationProtocol["REST"],
).communicator
communicator.run()
```
#### PymiloServer
The `PymiloServer` class facilitates streaming machine learning models over a network.
| **Parameter** | **Description** |
| ------------- | --------------- |
| port | Port number for the server to listen on (default: 8000) |
| host | Host address for the server (default: "127.0.0.1") |
| compressor | Compression method from `Compression` enum |
| communication_protocol | Communication protocol from `CommunicationProtocol` enum |
The `compressor` parameter accepts values from the `Compression` enum including `NULL` (no compression), `GZIP`, `ZLIB`, `LZMA`, or `BZ2`. The `communication_protocol` parameter accepts values from the `CommunicationProtocol` enum including `REST` or `WEBSOCKET`.
| **Method** | **Description** |
| ---------- | --------------- |
| init_client | Initialize a new client with the given client ID |
| remove_client | Remove an existing client by client ID |
| init_ml_model | Initialize a new ML model for a given client |
| set_ml_model | Set or update the ML model for a client |
| remove_ml_model | Remove an existing ML model for a client |
| get_ml_models | Get all ML model IDs for a client |
| execute_model | Execute model methods or access attributes |
| grant_access | Allow a client to access another client's model |
| revoke_access | Revoke access to a client's model |
| get_allowed_models | Get models a client is allowed to access |
Now `PymiloServer` runs on port `8000` and exposes REST API to `upload`, `download` and retrieve **attributes** either **data attributes** like `model._coef` or **method attributes** like `model.predict(x_test)`.
ℹ️ By default, `PymiloServer` listens on the loopback interface (`127.0.0.1`). To make it accessible over a local network (LAN), specify your machine’s LAN IP address in the `host` parameter of the `PymiloServer` constructor.
#### Client
By using `PymiloClient` you can easily connect to the remote `PymiloServer` and execute any functionalities that the given ML model has, let's say you want to run `predict` function on your remote ML model and get the result:
```python
from pymilo.streaming import PymiloClient, CommunicationProtocol
pymilo_client = PymiloClient(
mode=PymiloClient.Mode.LOCAL,
server_url="SERVER_URL",
communication_protocol=CommunicationProtocol["REST"],
)
pymilo_client.toggle_mode(PymiloClient.Mode.DELEGATE)
result = pymilo_client.predict(x_test)
```
#### PymiloClient
The `PymiloClient` class facilitates working with remote PyMilo servers.
| **Parameter** | **Description** |
| ------------- | --------------- |
| model | The local ML model to wrap around |
| mode | Operating mode (LOCAL or DELEGATE) |
| compressor | Compression method from `Compression` enum |
| server_url | URL of the PyMilo server |
| communication_protocol | Communication protocol from `CommunicationProtocol` enum |
The `mode` parameter accepts two values `LOCAL` to execute operations on the local model, or `DELEGATE` to delegate operations to the remote server. The `compressor` parameter accepts values from the `Compression` enum including `NULL` (no compression), `GZIP`, `ZLIB`, `LZMA`, or `BZ2`. The `communication_protocol` parameter accepts values from the `CommunicationProtocol` enum including `REST` or `WEBSOCKET`.
| **Method** | **Description** |
| ---------- | --------------- |
| toggle_mode | Switch between LOCAL and DELEGATE modes |
| register | Register the client with the remote server |
| deregister | Deregister the client from the server |
| register_ml_model | Register an ML model with the server |
| deregister_ml_model | Deregister an ML model from the server |
| upload | Upload the local model to the remote server |
| download | Download the remote model to local |
| get_ml_models | Get all registered ML models for this client |
| grant_access | Grant access to this client's model to another client |
| revoke_access | Revoke access previously granted to another client |
| get_allowance | Get clients who have access to this client's models |
| get_allowed_models | Get models this client is allowed to access from another client |
ℹ️ If you've deployed `PymiloServer` locally (on port `8000` for instance), then `SERVER_URL` would be `http://127.0.0.1:8000` or `ws://127.0.0.1:8000` based on the selected protocol for the communication medium.
You can also download the remote ML model into your local and execute functions locally on your model.
Calling `download` function on `PymiloClient` will sync the local model that `PymiloClient` wraps upon with the remote ML model, and it doesn't save model directly to a file.
```python
pymilo_client.download()
```
If you want to save the ML model to a file in your local, you can use `Export` class.
```python
from pymilo import Export
Export(pymilo_client.model).save("model.json")
```
Now that you've synced the remote model with your local model, you can run functions.
```python
pymilo_client.toggle_mode(mode=PymiloClient.Mode.LOCAL)
result = pymilo_client.predict(x_test)
```
`PymiloClient` wraps around the ML model, either to the local ML model or the remote ML model, and you can work with `PymiloClient` in the exact same way that you did with the ML model, you can run exact same functions with same signature.
ℹ️ Through the usage of `toggle_mode` function you can specify whether `PymiloClient` applies requests on the local ML model `pymilo_client.toggle_mode(mode=Mode.LOCAL)` or delegates it to the remote server `pymilo_client.toggle_mode(mode=Mode.DELEGATE)`
## Supported ML models
| scikit-learn | PyTorch |
| ---------------- | ---------------- |
| Linear Models ✅ | - |
| Neural Networks ✅ | - |
| Trees ✅ | - |
| Clustering ✅ | - |
| Naïve Bayes ✅ | - |
| Support Vector Machines (SVMs) ✅ | - |
| Nearest Neighbors ✅ | - |
| Ensemble Models ✅ | - |
| Pipeline Model ✅ | - |
| Preprocessing Models ✅ | - |
| Cross Decomposition Models ✅ | - |
| Feature Extractor Models ✅ | - |
| Composite Models ✅ | - |
Details are available in [Supported Models](https://github.com/openscilab/pymilo/blob/main/SUPPORTED_MODELS.md).
## Issues & bug reports
Just fill an issue and describe it. We'll check it ASAP! or send an email to [pymilo@openscilab.com](mailto:pymilo@openscilab.com "pymilo@openscilab.com").
- Please complete the issue template
You can also join our discord server
## Contributing
We welcome contributions! Please read our **[Contributing Guidelines](.github/CONTRIBUTING.md)** before submitting any changes.
## Acknowledgments
[Python Software Foundation (PSF)](https://www.python.org/psf/) grants PyMilo library partially for versions **1.0, 1.1**. [PSF](https://www.python.org/psf/) is the organization behind Python. Their mission is to promote, protect, and advance the Python programming language and to support and facilitate the growth of a diverse and international community of Python programmers.
[Trelis Research](https://trelis.com/) grants PyMilo library partially for version **1.0**. [Trelis Research](https://trelis.com/) provides tools and tutorials for businesses and developers looking to fine-tune and deploy large language models.
## Cite
If you use PyMilo in your research, we would appreciate citations to the following paper:
[Rostami, A., Haghighi, S., Sabouri, S. and Zolanvari, A., 2025. PyMilo: A Python Library for ML I/O. *Journal of Open Source Software*, 10(116), p.8858.](https://joss.theoj.org/papers/10.21105/joss.08858)
```bibtex
@article{Rostami2025,
doi = {10.21105/joss.08858},
url = {https://doi.org/10.21105/joss.08858},
year = {2025},
publisher = {The Open Journal},
volume = {10},
number = {116},
pages = {8858},
author = {Rostami, AmirHosein and Haghighi, Sepand and Sabouri, Sadra and Zolanvari, Alireza},
title = {PyMilo: A Python Library for ML I/O},
journal = {Journal of Open Source Software}
}
```
Download [PyMilo.bib](https://raw.githubusercontent.com/openscilab/pymilo/main/paper/PyMilo.bib)
## Show your support
### Star this repo
Give a ⭐️ if this project helped you!
### Donate to our project
If you do like our project and we hope that you do, can you please support us? Our project is not and is never going to be working for profit. We need the money just so we can continue doing what we do ;-) .
================================================
FILE: SECURITY.md
================================================
# Security policy
## Supported versions
| Version | Supported |
| ------------- | ------------------ |
| 1.6 | :white_check_mark: |
| < 1.6 | :x: |
## Reporting a vulnerability
Please report security vulnerabilities by email to [pymilo@openscilab.com](mailto:pymilo@openscilab.com "pymilo@openscilab.com").
If the security vulnerability is accepted, a dedicated bugfix release will be issued as soon as possible (depending on the complexity of the fix).
================================================
FILE: SUPPORTED_MODELS.md
================================================
# Supported Models
**Last Update: 2025-12-30**
Scikit-Learn
Linear Models
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
Linear Regressor |
>=0.1 |
| 2 |
Ridge Regressor |
>=0.1 |
| 3 |
Ridge Classifier |
>=0.1 |
| 4 |
Ridge Regressor CV |
>=0.1 |
| 5 |
Ridge Classifier CV |
>=0.1 |
| 6 |
Lasso |
>=0.1 |
| 7 |
Lasso CV |
>=0.1 |
| 8 |
Lasso Lars |
>=0.1 |
| 9 |
Lasso Lars CV |
>=0.1 |
| 10 |
Lasso Lars IC |
>=0.1 |
| 11 |
Multi Task Lasso |
>=0.1 |
| 12 |
Multi Task Lasso CV |
>=0.1 |
| 13 |
Elastic Net |
>=0.1 |
| 14 |
Elastic Net CV |
>=0.1 |
| 15 |
Multi Task Elastic Net |
>=0.1 |
| 16 |
Multi Task Elastic Net CV |
>=0.1 |
| 17 |
Orthogonal Matching Pursuit |
>=0.1 |
| 18 |
Orthogonal Matching Pursuit CV |
>=0.1 |
| 19 |
Bayesian Regressor |
>=0.1 |
| 20 |
Automatic Relevance Determination Regressor |
>=0.1 |
| 21 |
Logistic Regressor |
>=0.1 |
| 22 |
Logistic Regressor CV |
>=0.1 |
| 23 |
Tweedie Regressor |
>=0.1 |
| 24 |
Poisson Regressor |
>=0.1 |
| 25 |
Gamma Regressor |
>=0.1 |
| 26 |
SGD Regressor |
>=0.1 |
| 27 |
SGD Classifier |
>=0.1 |
| 28 |
SGD oneclass SVM |
>=0.1 |
| 29 |
Perceptron |
>=0.1 |
| 30 |
Passive Aggressive Regressor |
>=0.1 |
| 31 |
Passive Aggressive Classifier |
>=0.1 |
| 32 |
OMP |
>=0.1 |
| 33 |
OMP CV |
>=0.1 |
| 34 |
Ransac Regressor |
>=0.1 |
| 35 |
Theil Regressor |
>=0.1 |
| 36 |
Huber Regressor |
>=0.1 |
| 37 |
Quantile Regressor |
>=0.1 |
Neural Networks
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
Multi Layer Perceptron Regression |
>=0.2 |
| 2 |
Multi Layer Perceptron Classifier |
>=0.2 |
| 3 |
Bernoulli RBM |
>=0.2 |
Decision Trees
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
Decision Tree Regressor |
>=0.3 |
| 2 |
Decision Tree Classifier |
>=0.3 |
| 3 |
Extra Tree Regressor |
>=0.3 |
| 4 |
Extra Tree Classifier |
>=0.3 |
Clustering Models
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
Kmeans |
>=0.4 |
| 2 |
Bisecting Kmeans |
>=0.4 |
| 3 |
Mini Batch KMeans |
>=0.4 |
| 4 |
Affinity Propagation |
>=0.4 |
| 5 |
Mean Shift |
>=0.4 |
| 6 |
Spectral Clustering |
>=0.4 |
| 7 |
Spectral Biclustering |
>=0.4 |
| 8 |
Spectral Coclustering |
>=0.4 |
| 9 |
Agglomerative Clustering |
>=0.4 |
| 10 |
Feature Agglomeration |
>=0.4 |
| 11 |
DBScan |
>=0.4 |
| 12 |
HDBScan |
>=0.4 |
| 13 |
Optics |
>=0.4 |
| 14 |
Gaussian Mixture |
>=0.4 |
| 15 |
Bayesian Gaussian Mixture |
>=0.4 |
| 16 |
Birch |
>=0.4 |
Naive Bayes
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
Gaussian Naive Bayes |
>=0.5 |
| 2 |
Multinomial Naive Bayes |
>=0.5 |
| 3 |
Bernoulli Naive Bayes |
>=0.5 |
| 4 |
Complement Naive Bayes |
>=0.5 |
| 5 |
Categorical Naive Bayes |
>=0.5 |
Support Vector Machine
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
Linear SVC |
>=0.6 |
| 2 |
Linear SVR |
>=0.6 |
| 3 |
NuSVC |
>=0.6 |
| 4 |
NuSVR |
>=0.6 |
| 5 |
One Class SVM |
>=0.6 |
| 6 |
SVC |
>=0.6 |
| 7 |
SVR |
>=0.6 |
Neighbors
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
KNeighborsClassifier |
>=0.7 |
| 2 |
KNeighborsRegressor |
>=0.7 |
| 3 |
NearestNeighbors |
>=0.7 |
| 4 |
NearestCentroid |
>=0.7 |
| 5 |
RadiusNeighborsClassifier |
>=0.7 |
| 6 |
RadiusNeighborsRegressor |
>=0.7 |
| 7 |
LocalOutlierFactor |
>=0.7 |
Ensemble
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
AdaboostClassifier |
>=0.8 |
| 2 |
AdaboostRegressor |
>=0.8 |
| 3 |
BaggingClassifier |
>=0.8 |
| 4 |
BaggingRegressor |
>=0.8 |
| 5 |
ExtraTreesClassifier |
>=0.8 |
| 6 |
ExtraTreesRegressor |
>=0.8 |
| 7 |
GradientBoosterClassifier |
>=0.8 |
| 8 |
GradientBoosterRegressor |
>=0.8 |
| 9 |
HistGradientBoostingClassifier |
>=0.8 |
| 10 |
HistGradientBoostingRegressor |
>=0.8 |
| 11 |
RandomForestClassifier |
>=0.8 |
| 12 |
RandomForestRegressor |
>=0.8 |
| 13 |
StackingClassifier |
>=0.8 |
| 14 |
StackingRegressor |
>=0.8 |
| 15 |
VotingClassifier |
>=0.8 |
| 16 |
VotingRegressor |
>=0.8 |
| 17 |
IsolationForest |
>=0.8 |
| 18 |
RandomTreesEmbedding |
>=0.8 |
Pipeline
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
Pipeline |
>=0.8 |
Preprocessing Modules
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
OneHotEncoder |
>=0.8 |
| 2 |
LabelBinarizer |
>=0.1 |
| 3 |
LabelEncoder |
>=0.8 |
| 4 |
StandardScaler |
>=0.8 |
| 5 |
Binarizer |
>=0.9 |
| 6 |
FunctionTransformer |
>=0.9 |
| 7 |
KernelCenterer |
>=0.9 |
| 8 |
MultiLabelBinarizer |
>=0.9 |
| 9 |
MaxAbsScaler |
>=0.9 |
| 10 |
Normalizer |
>=0.9 |
| 11 |
OrdinalEncoder |
>=0.9 |
| 12 |
PolynomialFeatures |
>=0.9 |
| 13 |
RobustScaler |
>=0.9 |
| 14 |
QuantileTransformer |
>=0.9 |
| 15 |
KBinsDiscretizer |
>=0.9 |
| 16 |
PowerTransformer |
>=0.9 |
| 17 |
SplineTransformer |
>=0.9 |
| 18 |
TargetEncoder |
>=0.9 |
Cross Decomposition Modules
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
PLSRegression |
>=1.1 |
| 2 |
PLSCanonical |
>=1.1 |
| 3 |
CCA |
>=1.1 |
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
DictVectorizer |
>=1.3 |
| 2 |
FeatureHasher |
>=1.3 |
| 3 |
PatchExtractor |
>=1.3 |
| 4 |
CountVectorizer |
>=1.3 |
| 5 |
HashingVectorizer |
>=1.3 |
| 6 |
TfidfTransformer |
>=1.3 |
| 7 |
TfidfVectorizer |
>=1.3 |
📚 Models Document
| ID |
Model Name |
PyMilo Version |
| 1 |
ColumnTransformer |
>=1.5 |
| 2 |
TransformedTargetRegressor |
>=1.5 |
================================================
FILE: autopep8.bat
================================================
python -m autopep8 pymilo --recursive --aggressive --aggressive --in-place --pep8-passes 2000 --max-line-length 120 --verbose --ignore=E721
python -m autopep8 otherfiles --recursive --aggressive --aggressive --in-place --pep8-passes 2000 --max-line-length 120 --verbose --ignore=E721
python -m autopep8 setup.py --recursive --aggressive --aggressive --in-place --pep8-passes 2000 --max-line-length 120 --verbose
================================================
FILE: autopep8.sh
================================================
#!/bin/sh
python -m autopep8 pymilo --recursive --aggressive --aggressive --in-place --pep8-passes 2000 --max-line-length 120 --verbose --ignore=E721
python -m autopep8 otherfiles --recursive --aggressive --aggressive --in-place --pep8-passes 2000 --max-line-length 120 --verbose --ignore=E721
python -m autopep8 setup.py --recursive --aggressive --aggressive --in-place --pep8-passes 2000 --max-line-length 120 --verbose
================================================
FILE: codecov.yml
================================================
codecov:
require_ci_to_pass: yes
coverage:
precision: 2
round: up
range: "70...100"
status:
patch:
default:
enabled: no
project:
default:
threshold: 1%
================================================
FILE: dev-requirements.txt
================================================
numpy==2.2.4
scikit-learn==1.6.1
scipy>=0.19.1
uvicorn==0.39.0
fastapi==0.128.8
requests==2.32.5
websockets==15.0.1
pydantic==2.12.5
setuptools>=40.8.0
vulture>=1.0
bandit>=1.5.1
pydocstyle>=3.0.0
pytest>=4.3.1
pytest-cov>=2.6.1
Pillow>=8.4.0
================================================
FILE: otherfiles/RELEASE.md
================================================
# PyMilo Release Instructions
#### Last Update: 2024-04-24
1. Create the `release` branch under `dev`
2. Update all version tags
1. `setup.py`
2. `README.md`
3. `SECURITY.md`
4. `otherfiles/version_check.py`
5. `otherfiles/meta.yaml`
6. `pymilo/pymilo_param.py`
3. Update `CHANGELOG.md`
1. Add a new header under `Unreleased` section (Example: `## [0.1] - 2022-08-17`)
2. Add a new compare link to the end of the file (Example: `[0.2]: https://github.com/openscilab/pymilo/compare/v0.1...v0.2`)
3. Update `dev` compare link (Example: `[Unreleased]: https://github.com/openscilab/pymilo/compare/v0.2...dev`)
4. Update `.github/ISSUE_TEMPLATE/bug_report.yml`
1. Add new version tag to `PyMilo version` dropbox options
5. Create a PR from `release` to `dev`
1. Title: `Version x.x` (Example: `Version 0.1`)
2. Tag all related issues
3. Labels: `release`
4. Set milestone
5. Wait for all CI pass
6. Need review (**2** reviewers)
7. Squash and merge
8. Delete `release` branch
6. Merge `dev` branch into `main`
1. `git checkout main`
2. `git merge dev`
3. `git push origin main`
4. Wait for all CI pass
7. Create a new release
1. Target branch: `main`
2. Tag: `vx.x` (Example: `v0.1`)
3. Title: `Version x.x` (Example: `Version 0.1`)
4. Copy changelogs
5. Tag all related issues
8. Bump!!
9. Close this version issues
10. Close milestone
================================================
FILE: otherfiles/meta.yaml
================================================
{% set name = "pymilo" %}
{% set version = "1.6" %}
package:
name: {{ name|lower }}
version: {{ version }}
source:
git_url: https://github.com/openscilab/pymilo
git_rev: v{{ version }}
build:
noarch: python
number: 0
script: {{ PYTHON }} -m pip install . -vv
requirements:
host:
- pip
- setuptools
- python >=3.7
run:
- python >=3.7
- numpy >=1.9.0
- scikit-learn >=0.22.2
- scipy >=0.19.1
- requests>=2.0.0
- uvicorn>=0.14.0
- fastapi>=0.68.0
- pydantic>=1.5.0
- websockets>=9.0
about:
home: https://github.com/openscilab/pymilo
license: MIT
license_family: MIT
summary: Python library for machine learning input and output
description: |
Pymilo is an open source Python package that provides a simple, efficient, and
safe way for users to export pre-trained machine learning models in a transparent way.
By this, the exported model can be used in other environments, transferred across different platforms,
and shared with others. Pymilo allows the users to export the models that are
trained using popular Python libraries like scikit-learn, and then use them in deployment environments,
or share them without exposing the underlying code or dependencies.
The transparency of the exported models ensures reliability and safety for the end users,
as it eliminates the risks of binary or pickle formats.
Website: https://openscilab.com
Repo: https://github.com/openscilab/pymilo
extra:
recipe-maintainers:
- AHReccese
================================================
FILE: otherfiles/requirements-splitter.py
================================================
# -*- coding: utf-8 -*-
"""Requirements splitter."""
test_req = ""
with open('dev-requirements.txt', 'r') as f:
for line in f:
if '==' not in line:
test_req += line
with open('test-requirements.txt', 'w') as f:
f.write(test_req)
================================================
FILE: otherfiles/version_check.py
================================================
# -*- coding: utf-8 -*-
"""Version-check script."""
import os
import sys
import codecs
Failed = 0
PYMILO_VERSION = "1.6"
SETUP_ITEMS = [
"version='{0}'",
'https://github.com/openscilab/pymilo/tarball/v{0}']
README_ITEMS = [
"[Version {0}](https://github.com/openscilab/pymilo/archive/v{0}.zip)",
"pip install pymilo=={0}"]
CHANGELOG_ITEMS = [
"## [{0}]",
"https://github.com/openscilab/pymilo/compare/v{0}...dev",
"[{0}]:"]
PARAMS_ITEMS = ['PYMILO_VERSION = "{0}"']
META_ITEMS = ['% set version = "{0}" %']
ISSUE_TEMPLATE_ITEMS = ["- PyMilo {0}"]
SECURITY_ITEMS = ["| {0} | :white_check_mark: |", "| < {0} | :x: |"]
FILES = {
os.path.join("otherfiles", "meta.yaml"): META_ITEMS,
"setup.py": SETUP_ITEMS,
"README.md": README_ITEMS,
"CHANGELOG.md": CHANGELOG_ITEMS,
"SECURITY.md": SECURITY_ITEMS,
os.path.join("pymilo", "pymilo_param.py"): PARAMS_ITEMS,
os.path.join(".github", "ISSUE_TEMPLATE", "bug_report.yml"): ISSUE_TEMPLATE_ITEMS,
}
TEST_NUMBER = len(FILES)
def print_result(failed=False):
"""
Print final result.
:param failed: failed flag
:type failed: bool
:return: None
"""
message = "Version tag tests "
if not failed:
print("\n" + message + "passed!")
else:
print("\n" + message + "failed!")
print("Passed : " + str(TEST_NUMBER - Failed) + "/" + str(TEST_NUMBER))
if __name__ == "__main__":
for file_name in FILES:
try:
file_content = codecs.open(
file_name, "r", "utf-8", 'ignore').read()
for test_item in FILES[file_name]:
if file_content.find(test_item.format(PYMILO_VERSION)) == -1:
print("Incorrect version tag in " + file_name)
Failed += 1
break
except Exception as e:
Failed += 1
print("Error in " + file_name + "\n" + "Message : " + str(e))
if Failed == 0:
print_result(False)
sys.exit(0)
else:
print_result(True)
sys.exit(1)
================================================
FILE: paper/.gitignore
================================================
/refs
================================================
FILE: paper/PyMilo.bib
================================================
@article{Rostami2025,
doi = {10.21105/joss.08858},
url = {https://doi.org/10.21105/joss.08858},
year = {2025},
publisher = {The Open Journal},
volume = {10},
number = {116},
pages = {8858},
author = {Rostami, AmirHosein and Haghighi, Sepand and Sabouri, Sadra and Zolanvari, Alireza},
title = {PyMilo: A Python Library for ML I/O},
journal = {Journal of Open Source Software}
}
================================================
FILE: paper/paper.bib
================================================
@article{Raschka2020,
author = {Sebastian Raschka and Joshua Patterson and Corey Nolet},
title = {Machine Learning in {P}ython: Main Developments and Technology Trends in Data Science, Machine Learning, and Artificial Intelligence},
journal = {Information},
volume = {11},
number = {4},
pages = {193},
year = {2020},
doi = {10.3390/info11040193}
}
@inproceedings{parida2025model,
author={Parida, Shreyas Kumar and Gerostathopoulos, Ilias and Bogner, Justus},
booktitle={2025 IEEE/ACM 4th International Conference on AI Engineering – Software Engineering for AI (CAIN)},
title={How Do Model Export Formats Impact the Development of {ML}-Enabled Systems? A Case Study on Model Integration},
year={2025},
volume={},
number={},
pages={48-59},
doi={10.1109/CAIN66642.2025.00014}
}
@inproceedings{davis2023reusing,
title={Reusing Deep Learning Models: Challenges and Directions in Software Engineering},
author={Davis, James C and Jajal, Purvish and Jiang, Wenxin and Schorlemmer, Taylor R and Synovic, Nicholas and Thiruvathukal, George K},
booktitle={2023 IEEE John Vincent Atanasoff International Symposium on Modern Computing (JVA)},
pages={17--30},
year={2023},
organization={IEEE},
doi={10.1109/JVA60410.2023.00015}
}
@article{Garbin2022,
author = {Cristina Garbin and Osvaldo Marques},
title = {Assessing Methods and Tools to Improve Reporting, Increase Transparency, and Reduce Failures in Machine Learning Applications in Health Care},
journal = {Radiology: Artificial Intelligence},
volume = {4},
number = {2},
pages = {e210127},
year = {2022},
doi = {10.1148/ryai.210127},
}
@article{bodimani2024assessing,
title={Assessing The Impact of Transparent {AI} Systems in Enhancing User Trust and Privacy},
volume={5},
url={https://thesciencebrigade.com/jst/article/view/68},
number={1},
journal={Journal of Science \& Technology},
author={Bodimani, Meghasai},
year={2024},
month={Feb.},
pages={50–67},
}
@misc{Brownlee2018,
author = {Jason Brownlee},
title = {Save and Load Machine Learning Models in {P}ython with scikit-learn},
howpublished = {\url{https://machinelearningmastery.com/save-load-machine-learning-models-python-scikit-learn/}},
year = {2018},
note = {Accessed: 2024-05-22}
}
@misc{PythonPickleDocs,
author = {{Python Software Foundation}},
title = {{p}ickle — {P}ython object serialization},
year = {2024},
howpublished = {\url{https://docs.python.org/3/library/pickle.html#security}},
}
@software{onnx,
author = {Bai, Junjie and Lu, Fang and Zhang, Ke and others},
title = {{ONNX} ({O}pen {N}eural {N}etwork {E}xchange)},
url = {https://github.com/onnx/onnx},
version = {1.18.0},
date = {2025-05-12},
}
@article{pmml,
title={{PMML}: An Open Standard for Sharing Models},
author={Guazzelli, Alex and Zeller, Michael and Lin, Wen-Ching and Williams, Graham},
year={2009},
doi={10.32614/RJ-2009-010}
}
@article{jajal2023analysis,
title={Analysis of Failures and Risks in Deep Learning Model Converters: A Case Study in the {ONNX} Ecosystem},
author={Jajal, Purvish and Jiang, Wenxin and Tewari, Arav and Kocinare, Erik and Woo, Joseph and Sarraf, Anusha and Lu, Yung-Hsiang and Thiruvathukal, George K and Davis, James C},
journal={arXiv preprint arXiv:2303.17708},
year={2023},
doi={10.48550/arXiv.2303.17708}
}
@inproceedings{cody2024extending,
title={On Extending the {A}utomatic {T}est {M}arkup {L}anguage ({ATML}) for Machine Learning},
author={Cody, Tyler and Li, Bingtong and Beling, Peter},
booktitle={2024 IEEE International Systems Conference (SysCon)},
pages={1--8},
year={2024},
organization={IEEE},
doi={10.1109/SysCon61195.2024.10553464}
}
@software{skops,
author = {{skops-dev}},
title = {{SKOPS}},
url = {https://github.com/skops-dev/skops},
version = {0.11.0},
date = {2024-12-10},
}
@article{tfjs2019,
title={Tensor{F}low.js: Machine Learning for the Web and Beyond},
author={Smilkov, Daniel and Thorat, Nikhil and Assogba, Yannick and Nicholson, Charles and Kreeger, Nick and Yu, Ping and Cai, Shanqing and Nielsen, Eric and Soegel, David and Bileschi, Stan and others},
journal={Proceedings of Machine Learning and Systems},
volume={1},
pages={309--321},
year={2019},
doi={10.48550/arXiv.1901.05350}
}
@inproceedings{quan2022towards,
title={Towards Understanding the Faults of {J}ava{S}cript-Based Deep Learning Systems},
author={Quan, Lili and Guo, Qianyu and Xie, Xiaofei and Chen, Sen and Li, Xiaohong and Liu, Yang},
booktitle={Proceedings of the 37th IEEE/ACM International Conference on Automated Software Engineering},
pages={1--13},
year={2022},
doi={10.1145/3551349.3560427}
}
@misc{NerdCorner2025,
author = {{Nerd Corner}},
title = {Tensor{F}low.js vs {T}ensor{F}low ({P}ython)},
year = {2025},
month = {Mar},
howpublished = {\url{https://nerd-corner.com/tensorflow-js-vs-tensorflow-python/}}
}
@inproceedings{rauker2023toward,
title={Toward Transparent {AI}: A Survey on Interpreting the Inner Structures of Deep Neural Networks},
author={R{\"a}uker, Tilman and Ho, Anson and Casper, Stephen and Hadfield-Menell, Dylan},
booktitle={2023 ieee conference on secure and trustworthy machine learning (satml)},
pages={464--483},
year={2023},
organization={IEEE},
doi={10.1109/SaTML54575.2023.00039}
}
@article{macrae2019governing,
title={Governing the safety of artificial intelligence in healthcare},
author={Macrae, Carl},
journal={BMJ quality \& safety},
volume={28},
number={6},
pages={495--498},
year={2019},
publisher={BMJ Publishing Group Ltd},
doi={10.1136/bmjqs-2019-009484}
}
@software{huggingface_safetensors_2022,
author = {{Hugging Face}},
title = {Safetensors - {ML} Safer for All},
year = {2025},
version = {0.6.2},
url = {https://github.com/huggingface/safetensors},
}
================================================
FILE: paper/paper.md
================================================
---
title: 'PyMilo: A Python Library for ML I/O'
tags:
- Machine Learning
- Model Deployment
- Model Serialization
- Transparency
- MLOPS
authors:
- name: AmirHosein Rostami
orcid: 0009-0000-0638-2263
corresponding: true
affiliation: "1, 2"
- name: Sepand Haghighi
orcid: 0000-0001-9450-2375
corresponding: false
affiliation: 1
- name: Sadra Sabouri
orcid: 0000-0003-1047-2346
corresponding: false
affiliation: "1, 3"
- name: Alireza Zolanvari
orcid: 0000-0003-2367-8343
corresponding: false
affiliation: 1
affiliations:
- index: 1
name: Open Science Lab
- index: 2
name: University of Toronto, Toronto, Canada
ror: 03dbr7087
- index: 3
name: University of Southern California, Los Angeles, United States
ror: 03taz7m60
date: 24 June 2025
bibliography: paper.bib
---
# Summary
PyMilo is an open-source Python package that addresses the limitations of existing machine learning (ML) model storage formats by providing a transparent, reliable, end-to-end, and safe method for exporting and deploying trained models.
Current tools rely on black-box or executable formats that obscure internal model structures, making them difficult to audit, verify, or safely share. Meanwhile, tensor-centric formats such as Safetensors [@huggingface_safetensors_2022] securely store and transfer numerical tensors but do not capture the internal and structural composition of classical machine-learning models (e.g., scikit-learn pipelines), which remain PyMilo’s primary focus.
Others apply structural transformations during export that may degrade predictive performance and reduce the model to a limited inference-only interface.
In contrast, PyMilo serializes models in a transparent human-readable format that preserves end-to-end model fidelity and enables reliable, safe, and interpretable exchange. Here, transparent refers to the ability to inspect model internals through a human-readable structure without execution, and end-to-end fidelity denotes that a model exported and re-imported with PyMilo retains the exact same signature, functionality, parameters, and internal structure as the original, ensuring complete behavioral and structural equivalence.
This package is designed to make the preservation and reuse of trained ML models safer, more interpretable, and easier to manage across different stages of the ML workflow (\autoref{fig:overall}).

\newpage
# Statement of Need
Modern machine learning development is largely centered around the Python ecosystem, which has become a dominant platform for building and training models due to its rich libraries and community support [@Raschka2020].
However, once a model is trained, sharing or deploying it securely and transparently remains a significant challenge [@parida2025model; @davis2023reusing]. This issue is especially important in high-stakes domains such as healthcare, where ensuring model accountability and integrity is critical [@Garbin2022].
In such settings, any lack of clarity about a model’s internal logic or origin can reduce trust in its predictions. Researchers have increasingly emphasized that greater transparency in AI systems is critical for maintaining user trust and protecting privacy in machine learning applications [@bodimani2024assessing].
Despite ongoing concerns around transparency and safety, the dominant approach for exchanging pretrained models remains ad hoc binary serialization, most commonly through Python’s `pickle` module or its variant `joblib`.
These formats allow developers to store complex model objects with minimal effort, but they were never designed with security or human interpretability in mind [@parida2025model]. In fact, loading a pickle file may execute arbitrary code contained within it, a known vulnerability that can be exploited if the file is maliciously crafted [@Brownlee2018; @PythonPickleDocs].
While these methods preserve full model fidelity within the Python ecosystem, they pose serious security risks and lack transparency, as the serialized files are opaque binary blobs that cannot be inspected without loading.
Furthermore, compatibility is fragile because pickled models often depend on specific library versions, which may hinder long-term reproducibility [@Brownlee2018].
To improve portability across environments, several standardized model interchange formats have been developed alongside `pickle`.
Most notably, Open Neural Network Exchange (ONNX) and Predictive Model Markup Language (PMML) convert trained models into framework-agnostic representations [@onnx; @pmml], enabling deployment in diverse systems without relying on the original training code.
ONNX uses a graph-based structure built from primitive operators (e.g., linear transforms, activations), while PMML provides an XML-based specification for traditional models like decision trees and regressions.
Although these formats enhance security by avoiding executable serialization, they introduce compatibility and fidelity challenges.
Exporting complex pipelines to ONNX or PMML often leads to structural approximations, missing metadata, or unsupported components, especially for customized models [@pmml].
As a result, the exported model may differ in behavior, resulting in performance degradation or loss of accuracy [@jajal2023analysis].
@jajal2023analysis found that models exported to ONNX can produce incorrect predictions despite successful conversion, indicating semantic inconsistencies between the original and exported versions. This reflects predictive performance degradation and highlights the risks of silent behavioral drift in deployed systems.
Beyond concerns about end-to-end model preservation, ONNX and PMML also present limitations in transparency, scope, and reversibility. ONNX uses a binary protocol buffer format that is not human-readable, which limits interpretability and makes auditing difficult.
PMML, although XML-based and readable, is verbose and narrowly scoped, supporting only a limited subset of scikit-learn models. As noted by @cody2024extending, both ONNX and PMML focus on static model specification rather than operational testing or lifecycle validation workflows. Moreover, PMML does not provide a mechanism to restore exported models into Python, making it a one-way format that limits reproducibility across ML workflows.
Other tools have been developed to address specific use cases, though they remain limited in scope. For example, SKOPS improves the safety of scikit-learn model storage by enabling limited inspection of model internals without requiring code execution [@skops].
However, it supports only scikit-learn models, lacks compatibility with other frameworks, and does not provide a fully transparent or human-readable structure.
TensorFlow.js targets JavaScript environments by converting TensorFlow or Keras models into a JSON configuration file and binary weight files for execution in the browser or Node.js [@tfjs2019].
However, this process has been shown to introduce compatibility issues, performance degradation, and inconsistencies in inference behavior due to backend limitations and environment-specific faults [@quan2022towards].
Models from other frameworks, such as scikit-learn or PyTorch, must be re-implemented or retrained in TensorFlow to be exported.
Additionally, running complex models in JavaScript runtimes introduces memory and performance limitations, often making the deployment of large neural networks prohibitively slow or even infeasible in browser environments [@NerdCorner2025].
In summary, current solutions force practitioners into trade-offs between security, transparency, end-to-end fidelity, and performance preservation.
The machine learning community still lacks a safe and transparent end-to-end model serialization framework through which users can securely share models, inspect them easily, and accurately reconstruct them for use across diverse frameworks and environments.
PyMilo is proposed to address the above gaps. It is an open-source Python library that provides an end-to-end solution for exporting and importing machine learning models in a safe, non-executable, and human-readable format such as JSON. PyMilo serializes trained models into a transparent format and fully reconstructs them without structural changes, preserving their original functionality and behavior.
This process does not affect inference time or performance and imports models on any target device without additional dependencies, enabling seamless execution in inference mode.
While PyMilo may import functions from widely used scientific libraries during deserialization to restore model behavior (for example, NumPy or SciPy), the JSON representation itself never contains executable code; any remaining security risk is therefore inherited from these already-trusted dependencies rather than introduced by PyMilo’s serialization mechanism.
PyMilo benefits a wide range of stakeholders, including machine learning engineers, data scientists, and AI practitioners, by facilitating the development of more transparent and accountable AI systems. Furthermore, researchers working on transparent AI [@rauker2023toward], user privacy in ML [@bodimani2024assessing], and safe AI [@macrae2019governing] can use PyMilo as a framework that provides transparency and safety in the machine learning environment.
# References
================================================
FILE: pymilo/__init__.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo modules."""
from .pymilo_param import PYMILO_VERSION
from .pymilo_obj import Export, Import
from .exceptions import PymiloException, PymiloSerializationException, PymiloDeserializationException
__version__ = PYMILO_VERSION
================================================
FILE: pymilo/__main__.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo main."""
import re
import argparse
from art import tprint
from .pymilo_param import (
PYMILO_VERSION,
URL_REGEX,
CLI_MORE_INFO,
CLI_UNKNOWN_MODEL,
CLI_ML_STREAMING_NOT_INSTALLED,
)
from .pymilo_func import print_supported_ml_models, pymilo_help
from .pymilo_obj import Import
from .utils.util import get_sklearn_class
ml_streaming_support = True
try:
from .streaming import PymiloServer, Compression, CommunicationProtocol
except BaseException:
ml_streaming_support = False
def main():
"""
CLI main function.
:return: None
"""
parser = argparse.ArgumentParser(description='Run the Pymilo server with a specified compression method.')
parser.add_argument(
'--compression',
type=str,
choices=['NULL', 'GZIP', 'ZLIB', 'LZMA', 'BZ2'],
default='NULL',
help='Specify the compression method (NULL, GZIP, ZLIB, LZMA, or BZ2). Default is NULL.'
)
parser.add_argument(
'--port',
type=int,
default=8000,
help='Specify PyMiloServer port number',
metavar="",
)
parser.add_argument(
'--protocol',
type=str,
choices=['REST', 'WEBSOCKET'],
default='REST',
help='Specify the communication protocol (REST or WEBSOCKET). Default is REST.'
)
parser.add_argument(
'--load',
type=str,
default=None,
help='the `load` command specifies the path to the JSON file of the previously exported ML model by PyMilo.',
metavar="",
)
parser.add_argument(
'--init',
type=str,
default=None,
help='the `init` command specifies the ML model to initialize the PyMilo Server with.',
metavar="",
)
parser.add_argument(
'--bare',
default=False,
action='store_true',
help='The `bare` command starts the PyMilo Server without an internal ML model.',
)
parser.add_argument('--version', action='store_true', default=False, help='PyMilo version')
parser.add_argument('-v', action='store_true', default=False, help='PyMilo version')
args = parser.parse_args()
if args.version or args.v:
print(PYMILO_VERSION)
return
if not ml_streaming_support:
print(CLI_ML_STREAMING_NOT_INSTALLED)
print(CLI_MORE_INFO)
tprint("PyMilo")
tprint("V:" + PYMILO_VERSION)
pymilo_help()
parser.print_help()
return
run_ps = False
_model = None
_port = args.port
_compressor = Compression[args.compression]
_communication_protocol = CommunicationProtocol[args.protocol]
if args.load:
path = args.load
run_ps = True
_model = Import(url=path) if re.match(URL_REGEX, path) else Import(file_adr=path)
_model = _model.to_model()
elif args.init:
model_name = args.init
model_class = get_sklearn_class(model_name)
if model_class is None:
print(f"{CLI_UNKNOWN_MODEL}\n{print_supported_ml_models()}")
return
run_ps = True
_model = model_class()
elif args.bare:
run_ps = True
if not run_ps:
tprint("PyMilo")
tprint("V:" + PYMILO_VERSION)
pymilo_help()
parser.print_help()
else:
PymiloServer(
model=_model,
port=_port,
compressor=_compressor,
communication_protocol=_communication_protocol,
).communicator.run()
if __name__ == '__main__':
main()
================================================
FILE: pymilo/chains/__init__.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo chains."""
================================================
FILE: pymilo/chains/chain.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Chain Module."""
from traceback import format_exc
from abc import ABC, abstractmethod
from ..utils.util import get_sklearn_type
from ..transporters.transporter import Command
from ..exceptions.serialize_exception import PymiloSerializationException, SerializationErrorTypes
from ..exceptions.deserialize_exception import PymiloDeserializationException, DeserializationErrorTypes
class Chain(ABC):
"""
Chain Interface.
Each Chain serializes/deserializes the given model.
"""
@abstractmethod
def is_supported(self, model):
"""
Check if the given model is a sklearn's ML model supported by this chain.
:param model: a string name of an ML model or a sklearn object of it
:type model: any object
:return: check result as bool
"""
@abstractmethod
def transport(self, request, command, is_inner_model=False):
"""
Return the transported (serialized or deserialized) model.
:param request: given ML model to be transported
:type request: any object
:param command: command to specify whether the request should be serialized or deserialized
:type command: transporter.Command
:param is_inner_model: determines whether it is an inner model of a super ML model
:type is_inner_model: boolean
:return: the transported request as a json string or sklearn ML model
"""
@abstractmethod
def serialize(self, model):
"""
Return the serialized json string of the given model.
:param model: given ML model to be get serialized
:type model: sklearn ML model
:return: the serialized json string of the given ML model
"""
@abstractmethod
def deserialize(self, serialized_model, is_inner_model=False):
"""
Return the associated sklearn ML model of the given previously serialized ML model.
:param serialized_model: given json string of a ML model to get deserialized to associated sklearn ML model
:type serialized_model: obj
:param is_inner_model: determines whether it is an inner ML model of a super ML model
:type is_inner_model: boolean
:return: associated sklearn ML model
"""
@abstractmethod
def validate(self, model, command):
"""
Check if the provided inputs are valid in relation to each other.
:param model: a sklearn ML model or a json string of it, serialized through the pymilo export
:type model: obj
:param command: command to specify whether the request should be serialized or deserialized
:type command: transporter.Command
:return: None
"""
class AbstractChain(Chain):
"""Abstract Chain with the general implementation of the Chain interface."""
def __init__(self, transporters, supported_models):
"""
Initialize the AbstractChain instance.
:param transporters: worker transporters dedicated to this chain
:type transporters: transporter.AbstractTransporter[]
:param supported_models: supported sklearn ML models belong to this chain
:type supported_models: dict
:return: an instance of the AbstractChain class
"""
self._transporters = transporters
self._supported_models = supported_models
def is_supported(self, model):
"""
Check if the given model is a sklearn's ML model supported by this chain.
:param model: a string name of an ML model or a sklearn object of it
:type model: any object
:return: check result as bool
"""
model_name = model if isinstance(model, str) else get_sklearn_type(model)
return model_name in self._supported_models
def transport(self, request, command, is_inner_model=False):
"""
Return the transported (serialized or deserialized) model.
:param request: given ML model to be transported
:type request: any object
:param command: command to specify whether the request should be serialized or deserialized
:type command: transporter.Command
:param is_inner_model: determines whether it is an inner model of a super ML model
:type is_inner_model: boolean
:return: the transported request as a json string or sklearn ML model
"""
if not is_inner_model:
self.validate(request, command)
if command == Command.SERIALIZE:
try:
return self.serialize(request)
except Exception as e:
raise PymiloSerializationException(
{
'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE,
'error': {
'Exception': repr(e),
'Traceback': format_exc(),
},
'object': request,
})
elif command == Command.DESERIALIZE:
try:
return self.deserialize(request, is_inner_model)
except Exception as e:
raise PymiloDeserializationException(
{
'error_type': DeserializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE,
'error': {
'Exception': repr(e),
'Traceback': format_exc()},
'object': request
})
def serialize(self, model):
"""
Return the serialized json string of the given model.
:param model: given ML model to be get serialized
:type model: sklearn ML model
:return: the serialized json string of the given ML model
"""
for transporter in self._transporters:
self._transporters[transporter].transport(model, Command.SERIALIZE)
return model.__dict__
def deserialize(self, serialized_model, is_inner_model=False):
"""
Return the associated sklearn ML model of the given previously serialized ML model.
:param serialized_model: given json string of a ML model to get deserialized to associated sklearn ML model
:type serialized_model: obj
:param is_inner_model: determines whether it is an inner ML model of a super ML model
:type is_inner_model: boolean
:return: associated sklearn ML model
"""
raw_model = None
data = None
if is_inner_model:
raw_model = self._supported_models[serialized_model["type"]]()
data = serialized_model["data"]
else:
raw_model = self._supported_models[serialized_model.type]()
data = serialized_model.data
for transporter in self._transporters:
self._transporters[transporter].transport(
serialized_model, Command.DESERIALIZE, is_inner_model)
for item in data:
setattr(raw_model, item, data[item])
return raw_model
def validate(self, model, command):
"""
Check if the provided inputs are valid in relation to each other.
:param model: a sklearn ML model or a json string of it, serialized through the pymilo export
:type model: obj
:param command: command to specify whether the request should be serialized or deserialized
:type command: transporter.Command
:return: None
"""
if command == Command.SERIALIZE:
if self.is_supported(model):
return
else:
raise PymiloSerializationException(
{
'error_type': SerializationErrorTypes.INVALID_MODEL,
'object': model
}
)
elif command == Command.DESERIALIZE:
if self.is_supported(model.type):
return
else:
raise PymiloDeserializationException(
{
'error_type': DeserializationErrorTypes.INVALID_MODEL,
'object': model
}
)
================================================
FILE: pymilo/chains/clustering_chain.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo chain for Clustering models."""
from ..chains.chain import AbstractChain
from ..pymilo_param import SKLEARN_CLUSTERING_TABLE, NOT_SUPPORTED
from ..transporters.cfnode_transporter import CFNodeTransporter
from ..transporters.function_transporter import FunctionTransporter
from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter
from ..transporters.preprocessing_transporter import PreprocessingTransporter
CLUSTERING_CHAIN = {
"PreprocessingTransporter": PreprocessingTransporter(),
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
"FunctionTransporter": FunctionTransporter(),
"CFNodeTransporter": CFNodeTransporter(),
}
if SKLEARN_CLUSTERING_TABLE["BisectingKMeans"] != NOT_SUPPORTED:
from ..transporters.bisecting_tree_transporter import BisectingTreeTransporter
from ..transporters.randomstate_transporter import RandomStateTransporter
CLUSTERING_CHAIN["RandomStateTransporter"] = RandomStateTransporter()
CLUSTERING_CHAIN["BisectingTreeTransporter"] = BisectingTreeTransporter()
clustering_chain = AbstractChain(CLUSTERING_CHAIN, SKLEARN_CLUSTERING_TABLE)
================================================
FILE: pymilo/chains/compose_chain.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo chain for compose models."""
from ..chains.chain import AbstractChain
from ..transporters.compose_transporter import ComposeTransporter
from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter
from ..transporters.function_transporter import FunctionTransporter
from ..transporters.transporter import Command
from ..pymilo_param import SKLEARN_COMPOSE_TABLE
COMPOSE_CHAIN = {
"ComposeTransporter": ComposeTransporter(),
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
"FunctionTransporter": FunctionTransporter(),
}
class ComposeModelChain(AbstractChain):
"""ComposeModelChain developed to handle sklearn Compose ML model transportation."""
def deserialize(self, compose, is_inner_model=False):
"""
Return the associated sklearn compose model of the given compose.
:param compose: given json string of a compose model to get deserialized to associated sklearn compose model
:type compose: obj
:param is_inner_model: determines whether it is an inner compose model of a super ml model
:type is_inner_model: boolean
:return: associated sklearn compose model
"""
data = compose["data"] if is_inner_model else compose.data
_type = compose["type"] if is_inner_model else compose.type
# ColumnTransformer requires 'transformers' arg; others use default constructor
if _type == "ColumnTransformer":
raw_model = self._supported_models[_type](transformers=data.get("transformers", []))
else:
raw_model = self._supported_models[_type]()
for transporter in self._transporters:
self._transporters[transporter].transport(compose, Command.DESERIALIZE, is_inner_model)
for item in data:
setattr(raw_model, item, data[item])
return raw_model
compose_chain = ComposeModelChain(COMPOSE_CHAIN, SKLEARN_COMPOSE_TABLE)
def get_transporter(model):
"""
Get associated transporter for the given ML model.
:param model: given model to get it's transporter
:type model: scikit ML model
:return: model category and transporter function
"""
if isinstance(model, str):
if model.upper() == "COMPOSE":
return "COMPOSE", compose_chain.transport
if compose_chain.is_supported(model):
return "COMPOSE", compose_chain.transport
else:
return None, None
================================================
FILE: pymilo/chains/cross_decomposition_chain.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo chain for Cross Decomposition models."""
from ..chains.chain import AbstractChain
from ..pymilo_param import SKLEARN_CROSS_DECOMPOSITION_TABLE
from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter
from ..transporters.preprocessing_transporter import PreprocessingTransporter
cross_decomposition_chain = AbstractChain(
{
"PreprocessingTransporter": PreprocessingTransporter(),
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
},
SKLEARN_CROSS_DECOMPOSITION_TABLE,
)
================================================
FILE: pymilo/chains/decision_tree_chain.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo chain for Decision Trees models."""
from ..chains.chain import AbstractChain
from ..pymilo_param import SKLEARN_DECISION_TREE_TABLE
from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter
from ..transporters.preprocessing_transporter import PreprocessingTransporter
from ..transporters.randomstate_transporter import RandomStateTransporter
from ..transporters.tree_transporter import TreeTransporter
decision_trees_chain = AbstractChain(
{
"PreprocessingTransporter": PreprocessingTransporter(),
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
"RandomStateTransporter": RandomStateTransporter(),
"TreeTransporter": TreeTransporter(),
},
SKLEARN_DECISION_TREE_TABLE,
)
================================================
FILE: pymilo/chains/ensemble_chain.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo chain for ensemble models."""
import copy
from ast import literal_eval
from numpy import ndarray, asarray
from ..chains.chain import AbstractChain
from ..transporters.feature_extraction_transporter import FeatureExtractorTransporter
from ..transporters.binmapper_transporter import BinMapperTransporter
from ..transporters.bunch_transporter import BunchTransporter
from ..transporters.transporter import Command
from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter
from ..transporters.generator_transporter import GeneratorTransporter
from ..transporters.lossfunction_transporter import LossFunctionTransporter
from ..transporters.preprocessing_transporter import PreprocessingTransporter
from ..transporters.randomstate_transporter import RandomStateTransporter
from ..transporters.treepredictor_transporter import TreePredictorTransporter
from ..pymilo_param import SKLEARN_ENSEMBLE_TABLE
from ..utils.util import check_str_in_iterable
from .util import serialize_possible_ml_model, deserialize_possible_ml_model
ENSEMBLE_CHAIN = {
"FeatureExtractorTransporter": FeatureExtractorTransporter(),
"PreprocessingTransporter": PreprocessingTransporter(),
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
"TreePredictorTransporter": TreePredictorTransporter(),
"BinMapperTransporter": BinMapperTransporter(),
"GeneratorTransporter": GeneratorTransporter(),
"RandomStateTransporter": RandomStateTransporter(),
"LossFunctionTransporter": LossFunctionTransporter(),
"BunchTransporter": BunchTransporter(),
}
class EnsembleModelChain(AbstractChain):
"""EnsembleModelChain developed to handle sklearn Ensemble ML model transportation."""
def serialize(self, ensemble_object):
"""
Return the serialized json string of the given ensemble model.
:param ensemble_object: given model to be get serialized
:type ensemble_object: any sklearn ensemble model
:return: the serialized json string of the given ensemble
"""
for transporter in self._transporters:
if transporter != "GeneralDataStructureTransporter":
self._transporters[transporter].transport(
ensemble_object, Command.SERIALIZE)
pt = ENSEMBLE_CHAIN["PreprocessingTransporter"]
fe = ENSEMBLE_CHAIN["FeatureExtractorTransporter"]
for key, value in ensemble_object.__dict__.items():
if isinstance(value, list):
has_inner_tuple_with_ml_model = False
for idx, item in enumerate(value):
if isinstance(item, tuple):
listed_tuple = list(item)
for inner_idx, inner_item in enumerate(listed_tuple):
if pt.is_preprocessing_module(inner_item):
listed_tuple[inner_idx] = pt.serialize_pre_module(inner_item)
elif fe.is_fe_module(inner_item):
listed_tuple[inner_idx] = fe.serialize_fe_module(inner_item)
else:
has_inner_model, result = serialize_possible_ml_model(inner_item)
if has_inner_model:
has_inner_tuple_with_ml_model = True
listed_tuple[inner_idx] = result
value[idx] = listed_tuple
else:
value[idx] = serialize_possible_ml_model(item)[1]
if has_inner_tuple_with_ml_model:
ensemble_object.__dict__[key] = {
"pymiloed-data-structure": "list of (str, estimator) tuples",
"pymiloed-data": value,
}
elif isinstance(value, dict):
if check_str_in_iterable("pymilo-bunch", value):
new_value = {}
for inner_key, inner_value in value["pymilo-bunch"].items():
new_value[inner_key] = serialize_possible_ml_model(inner_value)[1]
value["pymilo-bunch"] = new_value
else:
new_value = {}
for inner_key, inner_value in value.items():
new_value[inner_key] = serialize_possible_ml_model(inner_value)[1]
ensemble_object.__dict__[key] = new_value
elif isinstance(value, ndarray):
has_inner_model, result = serialize_models_in_ndarray(value)
if has_inner_model:
ensemble_object.__dict__[key] = result
else:
ensemble_object.__dict__[key] = serialize_possible_ml_model(value)[1]
self._transporters["GeneralDataStructureTransporter"].transport(ensemble_object, Command.SERIALIZE)
return ensemble_object.__dict__
def deserialize(self, ensemble, is_inner_model=False):
"""
Return the associated sklearn ensemble model of the given ensemble.
:param ensemble: given json string of a ensemble model to get deserialized to associated sklearn ensemble model
:type ensemble: obj
:param is_inner_model: determines whether it is an inner ensemble model of a super ml model
:type is_inner_model: boolean
:return: associated sklearn ensemble model
"""
data = None
if is_inner_model:
data = ensemble["data"]
else:
data = ensemble.data
for transporter in self._transporters:
if transporter != "GeneralDataStructureTransporter":
self._transporters[transporter].transport(
ensemble, Command.DESERIALIZE, is_inner_model)
pt = ENSEMBLE_CHAIN["PreprocessingTransporter"]
fe = ENSEMBLE_CHAIN["FeatureExtractorTransporter"]
for key, value in data.items():
if isinstance(value, dict):
if check_str_in_iterable("pymiloed-data-structure",
value) and value["pymiloed-data-structure"] == "list of (str, estimator) tuples":
listed_tuples = value["pymiloed-data"]
list_of_tuples = []
for listed_tuple in listed_tuples:
name, serialized_model = listed_tuple
retrieved_model = None
if pt.is_preprocessing_module(serialized_model):
retrieved_model = pt.deserialize_pre_module(serialized_model)
elif fe.is_fe_module(serialized_model):
retrieved_model = fe.deserialize_fe_module(serialized_model)
else:
retrieved_model = deserialize_possible_ml_model(serialized_model)[1]
list_of_tuples.append(
(name, retrieved_model)
)
data[key] = list_of_tuples
elif GeneralDataStructureTransporter().is_deserialized_ndarray(value):
has_inner_model, result = deserialize_models_in_ndarray(value)
if has_inner_model:
data[key] = result
if isinstance(value, list):
for idx, item in enumerate(value):
has_ml_model, result = deserialize_possible_ml_model(item)
if has_ml_model:
value[idx] = result
has_ml_model, result = deserialize_possible_ml_model(value)
if has_ml_model:
data[key] = result
self._transporters["GeneralDataStructureTransporter"].transport(ensemble, Command.DESERIALIZE, is_inner_model)
_type = None
raw_model = None
meta_learnings = ["StackingRegressor", "StackingClassifier", "VotingRegressor", "VotingClassifier"]
pipeline_models = ["Pipeline"]
if is_inner_model:
_type = ensemble["type"]
else:
_type = ensemble.type
if _type in meta_learnings:
raw_model = self._supported_models[_type](estimators=data["estimators"])
elif _type in pipeline_models:
raw_model = self._supported_models[_type](steps=data["steps"])
else:
raw_model = self._supported_models[_type]()
for item in data:
setattr(raw_model, item, data[item])
return raw_model
ensemble_chain = EnsembleModelChain(ENSEMBLE_CHAIN, SKLEARN_ENSEMBLE_TABLE)
def serialize_models_in_ndarray(ndarray_instance):
"""
Serialize the ml models inside the given ndarray.
:param ndarray_instance: given ndarray needed to get it's inner ML models serialized
:type ndarray_instance: numpy.ndarray
:return: dict
"""
if not isinstance(ndarray_instance, ndarray):
return None # throw error
ndarray_instance_copy = copy.deepcopy(ndarray_instance)
has_inner_model = True
dtype = ndarray_instance.dtype
new_list = []
for item in ndarray_instance:
if isinstance(item, ndarray):
has_inside_model, result = serialize_models_in_ndarray(item)
if not has_inside_model:
has_inner_model = False
break
else:
new_list.append(result)
else:
has_ml_model, result = serialize_possible_ml_model(item)
if has_ml_model:
new_list.append(result)
else:
has_inner_model = False
break
if not has_inner_model:
return False, ndarray_instance_copy
else:
return True, {
'pymiloed-ndarray-list': new_list,
'pymiloed-ndarray-dtype': str(dtype),
'pymiloed-data-structure': 'numpy.ndarray'
}
def deserialize_models_in_ndarray(serialized_ndarray):
"""
Deserializes possible ML models within the given ndarray instance.
:param serialized_ndarray: given ndarray to deserialize possible previously serialized inner ML models
:type serialized_ndarray: obj
:return: numpy.ndarray
"""
gdst = GeneralDataStructureTransporter()
if not gdst.is_deserialized_ndarray(serialized_ndarray):
return False, None # throw error
serialized_ndarray_copy = copy.deepcopy(serialized_ndarray)
has_inner_model = True
inner_list = serialized_ndarray['pymiloed-ndarray-list']
new_list = []
for _, item in enumerate(inner_list):
if gdst.is_deserialized_ndarray(item):
has_inside_model, result = deserialize_models_in_ndarray(item)
if not has_inside_model:
has_inside_model = False
break
else:
new_list.append(result)
else:
has_ml_model, result = deserialize_possible_ml_model(item)
if has_ml_model:
new_list.append(result)
else:
has_inner_model = False
break
if not has_inner_model:
return False, serialized_ndarray_copy
else:
dtype = serialized_ndarray['pymiloed-ndarray-dtype']
if dtype.startswith("["):
dtype = literal_eval(dtype)
return True, asarray(new_list, dtype=dtype)
================================================
FILE: pymilo/chains/linear_model_chain.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo chain for linear models."""
from .chain import AbstractChain
from ..transporters.baseloss_transporter import BaseLossTransporter
from ..transporters.transporter import Command
from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter
from ..transporters.lossfunction_transporter import LossFunctionTransporter
from ..transporters.preprocessing_transporter import PreprocessingTransporter
from ..utils.util import get_sklearn_type, is_iterable
from ..pymilo_param import SKLEARN_LINEAR_MODEL_TABLE
LINEAR_MODEL_CHAIN = {
"PreprocessingTransporter": PreprocessingTransporter(),
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
"BaseLossTransporter": BaseLossTransporter(),
"LossFunctionTransporter": LossFunctionTransporter(),
}
class LinearModelChain(AbstractChain):
"""LinearModelChain developed to handle sklearn Linear ML model transportation."""
def serialize(self, linear_model_object):
"""
Return the serialized json string of the given linear model.
:param linear_model_object: given model to be get serialized
:type linear_model_object: any sklearn linear model
:return: the serialized json string of the given linear model
"""
# first serializing the inner linear models...
for key in linear_model_object.__dict__:
if self.is_supported(linear_model_object.__dict__[key]):
linear_model_object.__dict__[key] = {
"pymilo-inner-model-data": self.transport(linear_model_object.__dict__[key], Command.SERIALIZE, True),
"pymilo-inner-model-type": get_sklearn_type(linear_model_object.__dict__[key]),
"pymilo-bypass": True
}
# now serializing non-linear model fields
for transporter in self._transporters:
self._transporters[transporter].transport(
linear_model_object, Command.SERIALIZE)
return linear_model_object.__dict__
def deserialize(self, linear_model, is_inner_model=False):
"""
Return the associated sklearn linear model of the given linear_model.
:param linear_model: given json string of a linear model to get deserialized to associated sklearn linear model
:type linear_model: obj
:param is_inner_model: determines whether it is an inner model of a super ml model
:type is_inner_model: boolean
:return: associated sklearn linear model
"""
raw_model = None
data = None
if is_inner_model:
raw_model = self._supported_models[linear_model["type"]]()
data = linear_model["data"]
else:
raw_model = self._supported_models[linear_model.type]()
data = linear_model.data
# first deserializing the inner linear models(one depth inner linear
# models have been deserialized -> TODO full depth).
for key in data:
if is_deserialized_linear_model(data[key]):
data[key] = self.transport({
"data": data[key]["pymilo-inner-model-data"],
"type": data[key]["pymilo-inner-model-type"]
}, Command.DESERIALIZE, is_inner_model=True)
# now deserializing non-linear models fields
for transporter in self._transporters:
self._transporters[transporter].transport(
linear_model, Command.DESERIALIZE, is_inner_model)
for item in data:
setattr(raw_model, item, data[item])
return raw_model
linear_chain = LinearModelChain(LINEAR_MODEL_CHAIN, SKLEARN_LINEAR_MODEL_TABLE)
def is_deserialized_linear_model(content):
"""
Check if the given content is a previously serialized model by Pymilo's Export or not.
:param content: given object to be authorized as a valid pymilo exported serialized model
:type content: any object
:return: check result as bool
"""
if not is_iterable(content):
return False
return "pymilo-inner-model-type" in content and "pymilo-inner-model-data" in content
================================================
FILE: pymilo/chains/naive_bayes_chain.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo chain for Naive Bayes models."""
from ..chains.chain import AbstractChain
from ..pymilo_param import SKLEARN_NAIVE_BAYES_TABLE
from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter
from ..transporters.preprocessing_transporter import PreprocessingTransporter
naive_bayes_chain = AbstractChain(
{
"PreprocessingTransporter": PreprocessingTransporter(),
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
},
SKLEARN_NAIVE_BAYES_TABLE,
)
================================================
FILE: pymilo/chains/neighbours_chain.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo chain for Neighbors models."""
from ..chains.chain import AbstractChain
from ..pymilo_param import SKLEARN_NEIGHBORS_TABLE
from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter
from ..transporters.neighbors_tree_transporter import NeighborsTreeTransporter
from ..transporters.preprocessing_transporter import PreprocessingTransporter
neighbors_chain = AbstractChain(
{
"PreprocessingTransporter": PreprocessingTransporter(),
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
"NeighborsTreeTransporter": NeighborsTreeTransporter(),
},
SKLEARN_NEIGHBORS_TABLE,
)
================================================
FILE: pymilo/chains/neural_network_chain.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo chain for Neural Network models."""
from ..chains.chain import AbstractChain
from ..pymilo_param import SKLEARN_NEURAL_NETWORK_TABLE
from ..transporters.adamoptimizer_transporter import AdamOptimizerTransporter
from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter
from ..transporters.preprocessing_transporter import PreprocessingTransporter
from ..transporters.randomstate_transporter import RandomStateTransporter
from ..transporters.sgdoptimizer_transporter import SGDOptimizerTransporter
neural_network_chain = AbstractChain(
{
"PreprocessingTransporter": PreprocessingTransporter(),
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
"RandomStateTransporter": RandomStateTransporter(),
"SGDOptimizer": SGDOptimizerTransporter(),
"AdamOptimizerTransporter": AdamOptimizerTransporter(),
},
SKLEARN_NEURAL_NETWORK_TABLE,
)
================================================
FILE: pymilo/chains/svm_chain.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo chain for SVM models."""
from ..chains.chain import AbstractChain
from ..pymilo_param import SKLEARN_SVM_TABLE
from ..transporters.preprocessing_transporter import PreprocessingTransporter
from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter
from ..transporters.randomstate_transporter import RandomStateTransporter
svm_chain = AbstractChain(
{
"PreprocessingTransporter": PreprocessingTransporter(),
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
"RandomStateTransporter": RandomStateTransporter(),
},
SKLEARN_SVM_TABLE,
)
================================================
FILE: pymilo/chains/util.py
================================================
# -*- coding: utf-8 -*-
"""useful utilities for chains."""
from .linear_model_chain import linear_chain
from .neural_network_chain import neural_network_chain
from .decision_tree_chain import decision_trees_chain
from .clustering_chain import clustering_chain
from .naive_bayes_chain import naive_bayes_chain
from .svm_chain import svm_chain
from .neighbours_chain import neighbors_chain
from .cross_decomposition_chain import cross_decomposition_chain
from ..utils.util import get_sklearn_type, check_str_in_iterable
MODEL_TYPE_TRANSPORTER = {
"LINEAR_MODEL": linear_chain.transport,
"NEURAL_NETWORK": neural_network_chain.transport,
"DECISION_TREE": decision_trees_chain.transport,
"CLUSTERING": clustering_chain.transport,
"NAIVE_BAYES": naive_bayes_chain.transport,
"SVM": svm_chain.transport,
"NEIGHBORS": neighbors_chain.transport,
"CROSS_DECOMPOSITION": cross_decomposition_chain.transport,
}
def get_concrete_transporter(model):
"""
Get associated transporter for the given concrete(not ensemble) ML model.
:param model: given model to get it's transporter
:type model: scikit ML model
:return: model category and transporter function
"""
if isinstance(model, str):
upper_model = model.upper()
if upper_model in MODEL_TYPE_TRANSPORTER.keys():
return upper_model, MODEL_TYPE_TRANSPORTER[upper_model]
if linear_chain.is_supported(model):
return "LINEAR_MODEL", linear_chain.transport
elif neural_network_chain.is_supported(model):
return "NEURAL_NETWORK", neural_network_chain.transport
elif decision_trees_chain.is_supported(model):
return "DECISION_TREE", decision_trees_chain.transport
elif clustering_chain.is_supported(model):
return "CLUSTERING", clustering_chain.transport
elif naive_bayes_chain.is_supported(model):
return "NAIVE_BAYES", naive_bayes_chain.transport
elif svm_chain.is_supported(model):
return "SVM", svm_chain.transport
elif neighbors_chain.is_supported(model):
return "NEIGHBORS", neighbors_chain.transport
elif cross_decomposition_chain.is_supported(model):
return "CROSS_DECOMPOSITION", cross_decomposition_chain.transport
else:
return None, None
def get_transporter(model):
"""
Get associated transporter for the given ML model.
:param model: given model to get it's transporter
:type model: scikit ML model or str
:return: model category and transporter function
"""
# String routing
if isinstance(model, str):
upper = model.upper()
if upper == "COMPOSE":
from .compose_chain import compose_chain
return "COMPOSE", compose_chain.transport
if upper == "ENSEMBLE":
from .ensemble_chain import ensemble_chain
return "ENSEMBLE", ensemble_chain.transport
# Object routing (check higher-level categories first)
from .compose_chain import compose_chain
if compose_chain.is_supported(model):
return "COMPOSE", compose_chain.transport
from .ensemble_chain import ensemble_chain
if ensemble_chain.is_supported(model):
return "ENSEMBLE", ensemble_chain.transport
return get_concrete_transporter(model)
def serialize_possible_ml_model(model):
"""
Serialize the given object if it is a supported ML model.
:param model: given object
:type model: any
:return: ML model flag and serialized result
"""
if isinstance(model, str):
return False, model
ml_category, transporter = get_transporter(model)
if transporter is None:
return False, model
from ..transporters.transporter import Command
return True, {
"pymilo-bypass": True,
"pymilo-inner-model-data": transporter(model, Command.SERIALIZE),
"pymilo-inner-model-type": get_sklearn_type(model),
"pymilo-ml-category": ml_category
}
def deserialize_possible_ml_model(serialized_model):
"""
Deserialize the given object if it is a previously serialized ML model.
:param serialized_model: given obj to check
:type serialized_model: obj
:return: ML model flag and deserialized result
"""
if check_str_in_iterable("pymilo-inner-model-type", serialized_model):
_, transporter = get_transporter(serialized_model["pymilo-ml-category"])
from ..transporters.transporter import Command
return True, transporter({
"data": serialized_model["pymilo-inner-model-data"],
"type": serialized_model["pymilo-inner-model-type"]
}, Command.DESERIALIZE, is_inner_model=True)
else:
return False, serialized_model
================================================
FILE: pymilo/exceptions/__init__.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo exceptions."""
from .pymilo_exception import PymiloException
from .serialize_exception import PymiloSerializationException
from .deserialize_exception import PymiloDeserializationException
================================================
FILE: pymilo/exceptions/deserialize_exception.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Deserialization Exception."""
from enum import Enum
from .pymilo_exception import PymiloException
class DeserializationErrorTypes(Enum):
"""An enum class to determine the type of deserialization errors."""
CORRUPTED_JSON_FILE = 1
INVALID_MODEL = 2
VALID_MODEL_INVALID_INTERNAL_STRUCTURE = 3
class PymiloDeserializationException(PymiloException):
"""
Handle exceptions associated with Deserialization.
There are 3 different types of deserialization exceptions:
1-CORRUPTED_JSON_FILE: This error type claims that the given json string file which is supposed to be an
output of Pymilo Export, is corrupted and can not be parsed as a valid json.
2-INVALID_MODEL: This error type claims that the given json string file(or object) is not a deserialized export of
a valid sklearn linear model.
3-VALID_MODEL_INVALID_INTERNAL_STRUCTURE: This error occurs when attempting to load a JSON file or object that
does not conform to the expected format of a serialized scikit-learn linear model.
The file may have been modified after being exported from Pymilo Export, causing it to become invalid.
"""
def __init__(self, meta_data):
"""
Initialize the PymiloDeserializationException instance.
:param meta_data: Details pertain to the populated error.
:type meta_data: dict [str:str]
:return: an instance of the PymiloDeserializationException class
"""
# Call the base class constructor with the parameters it needs
message = "Pymilo Deserialization failed since {reason}"
error_type = meta_data['error_type']
error_type_to_message = {
DeserializationErrorTypes.CORRUPTED_JSON_FILE:
'the given file is not a valid .json file.',
DeserializationErrorTypes.INVALID_MODEL:
'the given model is not supported or is not a valid model.',
DeserializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE:
'the given model has some non-standard customized internal objects or functions.'}
if error_type in error_type_to_message:
reason = error_type_to_message[error_type]
else:
reason = "an Unknown error occurred."
message.format(reason=reason)
super().__init__(message, meta_data)
def to_pymilo_log(self):
"""
Generate a comprehensive report of the populated error.
:return: a dictionary of error details.
"""
pymilo_report = super().to_pymilo_log()
if self.meta_data['error_type'] == DeserializationErrorTypes.CORRUPTED_JSON_FILE:
pymilo_report['object']['json_file'] = self.meta_data['json_file']
return pymilo_report
================================================
FILE: pymilo/exceptions/pymilo_exception.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Abstract Exception Class."""
import pymilo
import sklearn
import platform
from datetime import datetime, timezone
from abc import ABC, abstractmethod
class PymiloException(Exception, ABC):
"""An abstract class for handling pymilo associated exceptions."""
def __init__(self, message, meta_data):
"""
Initialize the PymiloException instance.
:param message: Error message associated with the populated error.
:type message: str
:param meta_data: Details pertain to the populated error.
:type meta_data: dict [str:str]
:return: an instance of the PymiloDeserializationException class
"""
# Call the base class constructor with the parameters it needs
super().__init__(message)
# gathered meta_data
self.message = message
self.meta_data = meta_data
def to_pymilo_log(self):
"""
Generate a comprehensive report of the populated error.
:return: error's details as dictionary
"""
pymilo_report = {
'os': {
'name': platform.system(),
'version': platform.version(),
'release': platform.release(),
'full-description': platform.platform()
},
'versions': {
'pymilo-version': pymilo.__version__,
'scikit-version': sklearn.__version__,
'python-version': platform.python_version()
},
'object': {
'type': type(self.meta_data['object']),
'content': self.meta_data['object']
},
'error': {
'date-utc': datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
'pymilo-error': self.message,
'inner-error': self.meta_data['error'] if "error" in self.meta_data else ""
}
}
return pymilo_report
def to_pymilo_issue(self):
"""
Generate an issue form from the populated error.
:return: issue form of the associated error as string
"""
pymilo_report = self.to_pymilo_log()
help_request = """
\n\nIn order to help us enhance Pymilo's functionality, please open an issue associated with this error and put the message below inside.\n
"""
associated_pymilo_class = "Export" if "Serialization" in self.message else "Import"
description = "#### Description\n Pymilo {pymilo_class} failed.".format(pymilo_class=associated_pymilo_class)
steps_to_produce = "\n#### Steps/Code to Reproduce\n It is auto-reported from the pymilo logger."
expected_behavior = "\n#### Expected Behavior\n A successful Pymilo {pymilo_class}.".format(
pymilo_class=associated_pymilo_class)
actual_behavior = "\n#### Actual Behavior\n Pymilo {pymilo_class} failed.".format(
pymilo_class=associated_pymilo_class)
operating_system = "#### Operating System\n {os}".format(os=pymilo_report['os']['full-description'])
python_version = "#### Python Version\n {python_version}".format(
python_version=pymilo_report['versions']["python-version"])
pymilo_version = "#### PyMilo Version\n {pymilo_version}".format(
pymilo_version=pymilo_report['versions']["pymilo-version"])
gathered_data = "#### Logged Data\n {logged_data}".format(logged_data=str(pymilo_report))
full_issue_form = help_request + description + steps_to_produce + expected_behavior + \
actual_behavior + operating_system + python_version + pymilo_version + gathered_data
return full_issue_form
def __str__(self):
"""
Override the base __str__ function.
:return: issue form of the associated error as string
"""
return self.to_pymilo_issue()
================================================
FILE: pymilo/exceptions/serialize_exception.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Serialization Exception."""
from enum import Enum
from .pymilo_exception import PymiloException
class SerializationErrorTypes(Enum):
"""An enum class used to determine the type of serialization errors."""
INVALID_MODEL = 1
VALID_MODEL_INVALID_INTERNAL_STRUCTURE = 2
class PymiloSerializationException(PymiloException):
"""
Handle exceptions associated with Serializations.
There are 2 different types of serialization exceptions:
1-INVALID_MODEL: This error type claims that the given model is not a valid sklearn's linear model.
2-VALID_MODEL_INVALID_INTERNAL_STRUCTURE: This error occurs when attempting to serialize a model that
is one of the sklearn's linear models but it's internal structure has changed in a way that can't be serialized.
"""
def __init__(self, meta_data):
"""
Initialize the PymiloSerializationException instance.
:param meta_data: Details pertain to the populated error.
:type meta_data: dict [str:str]
:return: an instance of the PymiloSerializationException class
"""
# Call the base class constructor with the parameters it needs
message = "Pymilo Serialization failed since "
error_type = meta_data['error_type']
error_type_to_message = {
SerializationErrorTypes.INVALID_MODEL: 'the given model is not supported or is not a valid model.',
SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE: 'the given model has some non-standard customized internal objects or functions.'}
if error_type in error_type_to_message:
message += error_type_to_message[error_type]
else:
message += "an Unknown error occurred."
super().__init__(message, meta_data)
def to_pymilo_log(self):
"""
Generate a comprehensive report of the populated error.
:return: error's details as dictionary
"""
pymilo_report = super().to_pymilo_log()
# TODO add any serializable field to `object` field of pymilo_report
return pymilo_report
================================================
FILE: pymilo/pymilo_func.py
================================================
# -*- coding: utf-8 -*-
"""Functions."""
import numpy as np
import sklearn
from .chains.util import get_transporter
from .transporters.transporter import Command
from .pymilo_param import SKLEARN_SUPPORTED_CATEGORIES, NOT_SUPPORTED, OVERVIEW
def get_sklearn_version():
"""
Return sklearn version.
:return: sklearn version as a str
"""
return sklearn.__version__
def get_sklearn_data(model):
"""
Return sklearn data by serializing given model.
:param model: given model
:type model: any sklearn's model class
:return: sklearn data
"""
_, transporter = get_transporter(model)
return transporter(model, Command.SERIALIZE)
def to_sklearn_model(import_obj):
"""
Deserialize the imported object as a sklearn model.
:param import_obj: given object
:type import_obj: pymilo.Import
:return: sklearn model
"""
_, transporter = get_transporter(import_obj.type)
return transporter(import_obj, Command.DESERIALIZE)
def compare_model_outputs(exported_output,
imported_output,
epsilon_error=10**(-8)):
"""
Check if the given models outputs are the same.
:param exported_output: exported model output
:type exported_output: dict
:param imported_output: imported model output
:type imported_output: dict
:param epsilon_error: error threshold for numeric comparisons
:type epsilon_error: float
:return: check result as bool
"""
if len(exported_output) != len(imported_output):
return False # TODO: throw exception
total_error = 0
for key in exported_output:
if key not in imported_output:
return False # TODO: throw exception
total_error += np.abs(imported_output[key] - exported_output[key])
return np.abs(total_error) < epsilon_error
def print_supported_ml_models():
"""
Print the supported sklearn ML models categorized by type.
:return: None
"""
print("Supported Machine Learning Models:")
for category, table in SKLEARN_SUPPORTED_CATEGORIES.items():
print(f"**{category}**:")
for model_name in table:
if table[model_name] != NOT_SUPPORTED:
print(f"- {model_name}")
def pymilo_help():
"""
Print PyMilo details.
:return: None
"""
print(OVERVIEW)
print("Repo : https://github.com/openscilab/pymilo")
print("Webpage : https://openscilab.com/\n")
================================================
FILE: pymilo/pymilo_obj.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo modules."""
import os
import re
import json
from copy import deepcopy
from warnings import warn
from traceback import format_exc
from .utils.util import get_sklearn_type, download_model
from concurrent.futures import ThreadPoolExecutor, as_completed
from .pymilo_func import get_sklearn_data, get_sklearn_version, to_sklearn_model
from .exceptions.serialize_exception import PymiloSerializationException, SerializationErrorTypes
from .exceptions.deserialize_exception import PymiloDeserializationException, DeserializationErrorTypes
from .pymilo_param import PYMILO_VERSION, UNEQUAL_PYMILO_VERSIONS, UNEQUAL_SKLEARN_VERSIONS
from .pymilo_param import INVALID_IMPORT_INIT_PARAMS, BATCH_IMPORT_INVALID_DIRECTORY
class Export:
"""
The Pymilo Export class facilitates exporting of models to json files.
>>> exported_model = Export(model) # the model could be any sklearn linear model.
>>> exported_model_serialized_path = os.path.join(os.getcwd(), "MODEL_NAME.json")
>>> exported_model.save(exported_model_serialized_path)
"""
def __init__(self, model):
"""
Initialize the Pymilo Export instance.
:param model: given model(any sklearn linear model)
:type model: any class of the sklearn's linear models
:return: an instance of the Pymilo Export class
"""
self.data = get_sklearn_data(deepcopy(model))
self.version = get_sklearn_version()
self.type = get_sklearn_type(model)
def save(self, file_adr):
"""
Save model in a file.
:param file_adr: file address
:type file_adr: str
:return: None
"""
with open(file_adr, 'w') as fp:
fp.write(self.to_json())
def to_json(self):
"""
Return a json-like representation of model.
:return: model's representation as str
"""
try:
return json.dumps(
{
"data": self.data,
"sklearn_version": self.version,
"pymilo_version": PYMILO_VERSION,
"model_type": self.type
},
indent=4
)
except Exception as e:
raise PymiloSerializationException(
{
'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE,
'error': {
'Exception': repr(e),
'Traceback': format_exc()},
'object': {
"data": self.data,
"sklearn_version": self.version,
"pymilo_version": PYMILO_VERSION,
"model_type": self.type},
})
@staticmethod
def batch_export(models, file_addr, run_parallel=False):
"""
Export a batch of models to individual JSON files in a specified directory.
This method takes a list of trained models and exports each one into a JSON file. The models
are exported concurrently using multiple threads, where each model is saved to a file named
'model_{index}.json' in the provided directory.
:param models: list of models to get exported.
:type models: list
:param file_addr: the directory where exported JSON files will be saved.
:type file_addr: str
:param run_parallel: flag indicating the parallel execution of exports
:type run_parallel: boolean
:return: the count of models exported successfully
"""
if not os.path.exists(file_addr):
os.mkdir(file_addr)
def export_model(model, index):
try:
Export(model).save(file_adr=os.path.join(file_addr, f"model_{index}.json"))
return 1
except Exception as _:
return 0
if run_parallel:
with ThreadPoolExecutor() as executor:
futures = [executor.submit(export_model, model, index) for index, model in enumerate(models)]
count = 0
for future in as_completed(futures):
count += future.result()
return count
else:
count = 0
for index, model in enumerate(models):
count += export_model(model, index)
return count
class Import:
"""
The Pymilo Import class facilitates importing of serialized models from either a designated file path or a JSON string dump.
>>> imported_model = Import(exported_model_serialized_path)
>>> imported_sklearn_model = imported_model.to_model()
>>> imported_sklearn_model.predict(x_test)
"""
def __init__(self, file_adr=None, json_dump=None, url=None):
"""
Initialize the Pymilo Import instance.
:param file_adr: the file path where the serialized model's JSON file is located.
:type file_adr: str or None
:param json_dump: the json dump of the associated model, it can be None(reading from the file_adr)
:type json_dump: str or None
:param url: url to exported JSON file
:type: str or None
:return: an instance of the Pymilo Import class
"""
serialized_model_obj = None
if url is not None:
serialized_model_obj = download_model(url)
elif json_dump is not None and isinstance(json_dump, str):
serialized_model_obj = json.loads(json_dump)
elif file_adr is not None:
with open(file_adr, 'r') as fp:
serialized_model_obj = json.load(fp)
else:
raise Exception(INVALID_IMPORT_INIT_PARAMS)
try:
if not serialized_model_obj["pymilo_version"] == PYMILO_VERSION:
warn(UNEQUAL_PYMILO_VERSIONS, category=Warning)
if not serialized_model_obj["sklearn_version"] == get_sklearn_version():
warn(UNEQUAL_SKLEARN_VERSIONS, category=Warning)
self.data = serialized_model_obj["data"]
self.version = serialized_model_obj["sklearn_version"]
self.type = serialized_model_obj["model_type"]
except Exception as e:
json_content = None
if json_dump and isinstance(json_dump, str):
json_content = json_dump
elif file_adr is not None:
with open(file_adr) as f:
json_content = f.readlines()
else:
json_content = serialized_model_obj
raise PymiloDeserializationException(
{
'json_file': json_content,
'error_type': DeserializationErrorTypes.CORRUPTED_JSON_FILE,
'error': {
'Exception': repr(e),
'Traceback': format_exc()},
'object': ""})
def to_model(self):
"""
Convert imported model to sklearn model.
:return: sklearn model
"""
return to_sklearn_model(self)
@staticmethod
def batch_import(file_addr, run_parallel=False):
"""
Import a batch of models from individual JSON files in a specified directory.
This method takes a directory containing JSON files and imports each one into a model.
The models are imported concurrently using multiple threads, ensuring that the files are
processed in the order determined by their numeric suffixes. The function returns the
successfully imported models in the same order as their filenames.
:param file_addr: the directory where the JSON files to be imported are located.
:type file_addr: str
:param run_parallel: flag indicating the parallel execution of imports
:type run_parallel: boolean
:return: a tuple containing the count of models imported successfully and a list of the
imported models in their filename order.
"""
if not os.path.exists(file_addr):
raise FileNotFoundError(BATCH_IMPORT_INVALID_DIRECTORY)
json_files = [f for f in os.listdir(file_addr) if f.endswith('.json')]
json_files.sort(key=lambda x: int(re.search(r'_(\d+)\.json$', x).group(1)))
models = [None] * len(json_files)
count = 0
def import_model(file_path, index):
try:
model = Import(file_path).to_model()
return index, model
except Exception as _:
return index, None
if run_parallel:
with ThreadPoolExecutor() as executor:
futures = {
executor.submit(
import_model,
os.path.join(
file_addr,
file),
index): index for index,
file in enumerate(json_files)}
for future in as_completed(futures):
index, model = future.result()
if model is not None:
models[index] = model
count += 1
return count, models
else:
count = 0
for index, file in enumerate(json_files):
model = Import(os.path.join(file_addr, file)).to_model()
if model is not None:
models[index] = model
count += 1
return count, models
================================================
FILE: pymilo/pymilo_param.py
================================================
# -*- coding: utf-8 -*-
"""Parameters and constants."""
import numpy as np
import sklearn.linear_model as linear_model
import sklearn.neural_network as neural_network
import sklearn.tree as tree
import sklearn.cluster as cluster
import sklearn.mixture as mixture
import sklearn.naive_bayes as naive_bayes
import sklearn.svm as svm
import sklearn.neighbors as neighbors
import sklearn.dummy as dummy
import sklearn.ensemble as ensemble
import sklearn.pipeline as pipeline
import sklearn.preprocessing as preprocessing
import sklearn.cross_decomposition as cross_decomposition
import sklearn.feature_extraction as feature_extraction
import sklearn.compose as compose
quantile_regressor_support = False
try:
from sklearn.linear_model import QuantileRegressor
quantile_regressor_support = True
except BaseException:
pass
glm_support = {
'GammaRegressor': False,
'PoissonRegressor': False,
'TweedieRegressor': False
}
try:
from sklearn.linear_model import TweedieRegressor
glm_support['TweedieRegressor'] = True
from sklearn.linear_model import PoissonRegressor
glm_support['PoissonRegressor'] = True
from sklearn.linear_model import GammaRegressor
glm_support['GammaRegressor'] = True
except BaseException:
pass
sgd_one_class_svm_support = False
try:
from sklearn.linear_model import SGDOneClassSVM
sgd_one_class_svm_support = True
except BaseException:
pass
bisecting_kmeans_support = False
try:
from sklearn.cluster import BisectingKMeans
bisecting_kmeans_support = True
except BaseException:
pass
hdbscan_support = False
try:
from sklearn.cluster import HDBSCAN
hdbscan_support = True
except BaseException:
pass
hist_gradient_boosting_support = False
try:
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier
hist_gradient_boosting_support = True
except BaseException:
pass
spline_transformer_support = False
try:
from sklearn.preprocessing import SplineTransformer
spline_transformer_support = True
except BaseException:
pass
target_encoder_support = False
try:
from sklearn.preprocessing import TargetEncoder
target_encoder_support = True
except BaseException:
pass
OVERVIEW = """
PyMilo is an open source Python package that provides a simple, efficient, and safe way for users to export pre-trained machine learning models in a transparent way.
"""
PYMILO_VERSION = "1.6"
NOT_SUPPORTED = "NOT_SUPPORTED"
PYMILO_VERSION_DOES_NOT_EXIST = "Corrupted JSON file, `pymilo_version` doesn't exist in this file."
UNEQUAL_PYMILO_VERSIONS = "warning: Installed PyMilo version differs from the PyMilo version used to create the JSON file."
UNEQUAL_SKLEARN_VERSIONS = "warning: Installed Scikit version differs from the Scikit version used to create the JSON file and it may prevent PyMilo from transporting seamlessly."
INVALID_IMPORT_INIT_PARAMS = "Invalid input parameters, you should either pass a valid file_adr or a json_dump or a url to initiate Import class."
URL_REGEX = r'^(http|https)://[a-zA-Z0-9.-_]+\.[a-zA-Z]{2,}(/\S*)?$'
DOWNLOAD_MODEL_FAILED = "Failed to download the JSON file, Server didn't respond."
INVALID_DOWNLOADED_MODEL = "The downloaded content is not a valid JSON file."
BATCH_IMPORT_INVALID_DIRECTORY = "The given directory does not exist."
CLI_ML_STREAMING_NOT_INSTALLED = """ML Streaming is not installed.
To install ML Streaming, run the following command:
pip install pymilo[streaming]"""
CLI_MORE_INFO = "For more information, visit the PyMilo README at https://github.com/openscilab/pymilo"
CLI_UNKNOWN_MODEL = "The provided ML model name is either invalid or unsupported."
SKLEARN_LINEAR_MODEL_TABLE = {
"DummyRegressor": dummy.DummyRegressor,
"DummyClassifier": dummy.DummyClassifier,
"LinearRegression": linear_model.LinearRegression,
"Ridge": linear_model.Ridge,
"RidgeCV": linear_model.RidgeCV,
"RidgeClassifier": linear_model.RidgeClassifier,
"RidgeClassifierCV": linear_model.RidgeClassifierCV,
"Lasso": linear_model.Lasso,
"LassoCV": linear_model.LassoCV,
"LassoLars": linear_model.LassoLars,
"LassoLarsCV": linear_model.LassoLarsCV,
"LassoLarsIC": linear_model.LassoLarsIC,
"MultiTaskLasso": linear_model.MultiTaskLasso,
"MultiTaskLassoCV": linear_model.MultiTaskLassoCV,
"ElasticNet": linear_model.ElasticNet,
"ElasticNetCV": linear_model.ElasticNetCV,
"MultiTaskElasticNet": linear_model.MultiTaskElasticNet,
"MultiTaskElasticNetCV": linear_model.MultiTaskElasticNetCV,
"OrthogonalMatchingPursuit": linear_model.OrthogonalMatchingPursuit,
"OrthogonalMatchingPursuitCV": linear_model.OrthogonalMatchingPursuitCV,
"BayesianRidge": linear_model.BayesianRidge,
"ARDRegression": linear_model.ARDRegression,
"LogisticRegression": linear_model.LogisticRegression,
"LogisticRegressionCV": linear_model.LogisticRegressionCV,
"TweedieRegressor": TweedieRegressor if glm_support['TweedieRegressor'] else NOT_SUPPORTED,
"PoissonRegressor": PoissonRegressor if glm_support['PoissonRegressor'] else NOT_SUPPORTED,
"GammaRegressor": GammaRegressor if glm_support['GammaRegressor'] else NOT_SUPPORTED,
"SGDRegressor": linear_model.SGDRegressor,
"SGDClassifier": linear_model.SGDClassifier,
"SGDOneClassSVM": SGDOneClassSVM if sgd_one_class_svm_support else NOT_SUPPORTED,
"Perceptron": linear_model.Perceptron,
"PassiveAggressiveRegressor": linear_model.PassiveAggressiveRegressor,
"PassiveAggressiveClassifier": linear_model.PassiveAggressiveClassifier,
"RANSACRegressor": linear_model.RANSACRegressor,
"TheilSenRegressor": linear_model.TheilSenRegressor,
"HuberRegressor": linear_model.HuberRegressor,
"QuantileRegressor": QuantileRegressor if quantile_regressor_support else NOT_SUPPORTED,
}
SKLEARN_NEURAL_NETWORK_TABLE = {
"MLPRegressor": neural_network.MLPRegressor,
"MLPClassifier": neural_network.MLPClassifier,
"BernoulliRBM": neural_network.BernoulliRBM,
}
SKLEARN_DECISION_TREE_TABLE = {
"DecisionTreeRegressor": tree.DecisionTreeRegressor,
"DecisionTreeClassifier": tree.DecisionTreeClassifier,
"ExtraTreeRegressor": tree.ExtraTreeRegressor,
"ExtraTreeClassifier": tree.ExtraTreeClassifier
}
SKLEARN_CLUSTERING_TABLE = {
"KMeans": cluster.KMeans,
"MiniBatchKMeans": cluster.MiniBatchKMeans,
"BisectingKMeans": BisectingKMeans if bisecting_kmeans_support else NOT_SUPPORTED,
"AffinityPropagation": cluster.AffinityPropagation,
"MeanShift": cluster.MeanShift,
"SpectralClustering": cluster.SpectralClustering,
"SpectralBiclustering": cluster.SpectralBiclustering,
"SpectralCoclustering": cluster.SpectralCoclustering,
"AgglomerativeClustering": cluster.AgglomerativeClustering,
"FeatureAgglomeration": cluster.FeatureAgglomeration,
"DBSCAN": cluster.DBSCAN,
"HDBSCAN": HDBSCAN if hdbscan_support else NOT_SUPPORTED,
"OPTICS": cluster.OPTICS,
"Birch": cluster.Birch,
"GaussianMixture": mixture.GaussianMixture,
"BayesianGaussianMixture": mixture.BayesianGaussianMixture,
}
SKLEARN_NAIVE_BAYES_TABLE = {
"GaussianNB": naive_bayes.GaussianNB,
"MultinomialNB": naive_bayes.MultinomialNB,
"ComplementNB": naive_bayes.ComplementNB,
"BernoulliNB": naive_bayes.BernoulliNB,
"CategoricalNB": naive_bayes.CategoricalNB,
}
SKLEARN_SVM_TABLE = {
"LinearSVC": svm.LinearSVC,
"LinearSVR": svm.LinearSVR,
"NuSVC": svm.NuSVC,
"NuSVR": svm.NuSVR,
"OneClassSVM": svm.OneClassSVM,
"SVC": svm.SVC,
"SVR": svm.SVR,
}
SKLEARN_NEIGHBORS_TABLE = {
"KNeighborsRegressor": neighbors.KNeighborsRegressor,
"KNeighborsClassifier": neighbors.KNeighborsClassifier,
"RadiusNeighborsRegressor": neighbors.RadiusNeighborsRegressor,
"RadiusNeighborsClassifier": neighbors.RadiusNeighborsClassifier,
"NearestNeighbors": neighbors.NearestNeighbors,
"NearestCentroid": neighbors.NearestCentroid,
"LocalOutlierFactor": neighbors.LocalOutlierFactor,
}
SKLEARN_ENSEMBLE_TABLE = {
"AdaBoostRegressor": ensemble.AdaBoostRegressor,
"AdaBoostClassifier": ensemble.AdaBoostClassifier,
"BaggingRegressor": ensemble.BaggingRegressor,
"BaggingClassifier": ensemble.BaggingClassifier,
"ExtraTreesClassifier": ensemble.ExtraTreesClassifier,
"ExtraTreesRegressor": ensemble.ExtraTreesRegressor,
"GradientBoostingRegressor": ensemble.GradientBoostingRegressor,
"GradientBoostingClassifier": ensemble.GradientBoostingClassifier,
"IsolationForest": ensemble.IsolationForest,
"RandomForestClassifier": ensemble.RandomForestClassifier,
"RandomForestRegressor": ensemble.RandomForestRegressor,
"RandomTreesEmbedding": ensemble.RandomTreesEmbedding,
"StackingRegressor": ensemble.StackingRegressor,
"StackingClassifier": ensemble.StackingClassifier,
"VotingRegressor": ensemble.VotingRegressor,
"VotingClassifier": ensemble.VotingClassifier,
"HistGradientBoostingRegressor": HistGradientBoostingRegressor if hist_gradient_boosting_support else NOT_SUPPORTED,
"HistGradientBoostingClassifier": HistGradientBoostingClassifier if hist_gradient_boosting_support else NOT_SUPPORTED,
####
"Pipeline": pipeline.Pipeline,
}
SKLEARN_PREPROCESSING_TABLE = {
"StandardScaler": preprocessing.StandardScaler,
"MinMaxScaler": preprocessing.MinMaxScaler,
"OneHotEncoder": preprocessing.OneHotEncoder,
"LabelBinarizer": preprocessing.LabelBinarizer,
"LabelEncoder": preprocessing.LabelEncoder,
"Binarizer": preprocessing.Binarizer,
"FunctionTransformer": preprocessing.FunctionTransformer,
"KernelCenterer": preprocessing.KernelCenterer,
"MultiLabelBinarizer": preprocessing.MultiLabelBinarizer,
"MaxAbsScaler": preprocessing.MaxAbsScaler,
"Normalizer": preprocessing.Normalizer,
"OrdinalEncoder": preprocessing.OrdinalEncoder,
"PolynomialFeatures": preprocessing.PolynomialFeatures,
"RobustScaler": preprocessing.RobustScaler,
"QuantileTransformer": preprocessing.QuantileTransformer,
"KBinsDiscretizer": preprocessing.KBinsDiscretizer,
"PowerTransformer": preprocessing.PowerTransformer,
"SplineTransformer": SplineTransformer if spline_transformer_support else NOT_SUPPORTED,
"TargetEncoder": TargetEncoder if target_encoder_support else NOT_SUPPORTED,
}
SKLEARN_FEATURE_EXTRACTION_TABLE = {
# for raw data:
"DictVectorizer": feature_extraction.DictVectorizer,
"FeatureHasher": feature_extraction.FeatureHasher,
# for image data:
"PatchExtractor": feature_extraction.image.PatchExtractor,
# for text data:
"CountVectorizer": feature_extraction.text.CountVectorizer,
"HashingVectorizer": feature_extraction.text.HashingVectorizer,
"TfidfTransformer": feature_extraction.text.TfidfTransformer,
"TfidfVectorizer": feature_extraction.text.TfidfVectorizer,
}
SKLEARN_CROSS_DECOMPOSITION_TABLE = {
"PLSRegression": cross_decomposition.PLSRegression,
"PLSCanonical": cross_decomposition.PLSCanonical,
"CCA": cross_decomposition.CCA,
}
SKLEARN_COMPOSE_TABLE = {
"ColumnTransformer": compose.ColumnTransformer,
"TransformedTargetRegressor": compose.TransformedTargetRegressor,
}
KEYS_NEED_PREPROCESSING_BEFORE_DESERIALIZATION = {
"_label_binarizer": preprocessing.LabelBinarizer, # in Ridge Classifier
"active_": np.int32, # in Lasso Lars
"n_nonzero_coefs_": np.int64, # in OMP-CV
"scores_": dict, # in Logistic Regression CV,
"_base_loss": {}, # BaseLoss in Logistic Regression,
"loss_function_": {}, # LossFunction in SGD Classifier,
"estimator_": {}, # LinearRegression model inside RANSAC
}
NUMPY_TYPE_DICT = {
"numpy.intc": np.intc,
"numpy.int32": np.int32,
"numpy.int64": np.int64,
"numpy.float64": np.float64,
"numpy.infinity": lambda _: np.inf,
"numpy.uint8": np.uint8,
"numpy.uint64": np.uint64,
"numpy.dtype": np.dtype,
"numpy.nan": np.nan,
}
EXPORTED_MODELS_PATH = {
"LINEAR_MODEL": "exported_linear_models",
"NEURAL_NETWORK": "exported_neural_networks",
"DECISION_TREE": "exported_decision_trees",
"CLUSTERING": "exported_clusterings",
"NAIVE_BAYES": "exported_naive_bayes",
"SVM": "exported_svms",
"NEIGHBORS": "exported_neighbors",
"ENSEMBLE": "exported_ensembles",
"CROSS_DECOMPOSITION": "exported_cross_decomposition",
"COMPOSE": "exported_composes",
}
SKLEARN_SUPPORTED_CATEGORIES = {
"LINEAR_MODEL": SKLEARN_LINEAR_MODEL_TABLE,
"NEURAL_NETWORK": SKLEARN_NEURAL_NETWORK_TABLE,
"DECISION_TREE": SKLEARN_DECISION_TREE_TABLE,
"CLUSTERING": SKLEARN_CLUSTERING_TABLE,
"NAIVE_BAYES": SKLEARN_NAIVE_BAYES_TABLE,
"SVM": SKLEARN_SVM_TABLE,
"NEIGHBORS": SKLEARN_NEIGHBORS_TABLE,
"ENSEMBLE": SKLEARN_ENSEMBLE_TABLE,
"CROSS_DECOMPOSITION": SKLEARN_CROSS_DECOMPOSITION_TABLE,
"COMPOSE": SKLEARN_COMPOSE_TABLE,
}
================================================
FILE: pymilo/streaming/__init__.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo ML Streaming."""
from .pymilo_client import PymiloClient
from .pymilo_server import PymiloServer
from .compressor import Compression
from .communicator import CommunicationProtocol
================================================
FILE: pymilo/streaming/communicator.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Communication Mediums."""
from __future__ import annotations
import uuid
import json
import asyncio
from typing import Any, Dict, List, Optional, Union
import uvicorn
import requests
import websockets
from enum import Enum
from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect, HTTPException
from .interfaces import ClientCommunicator
from .param import (
PYMILO_INVALID_URL,
PYMILO_CLIENT_WEBSOCKET_NOT_CONNECTED,
REST_API_PREFIX,
MSG_DOWNLOAD_REQUEST,
MSG_UPLOAD_REQUEST,
MSG_ATTRIBUTE_CALL_REQUEST,
MSG_ATTRIBUTE_TYPE_REQUEST,
MSG_REST_DOWNLOAD_REQUEST,
MSG_REST_UPLOAD_REQUEST,
MSG_REST_ATTRIBUTE_CALL_REQUEST,
MSG_REST_ATTRIBUTE_TYPE_REQUEST,
)
from .util import validate_websocket_url, validate_http_url
class RESTClientCommunicator(ClientCommunicator):
"""Facilitate working with the communication medium from the client side for the REST protocol."""
def __init__(self, server_url: str) -> None:
"""
Initialize the Pymilo RESTClientCommunicator instance.
:param server_url: the url to which PyMilo Server listens
"""
is_valid, server_url = validate_http_url(server_url)
if not is_valid:
raise Exception(PYMILO_INVALID_URL)
self._server_url = server_url.rstrip("/") + "/api/v1"
self.session = requests.Session()
retries = requests.adapters.Retry(
total=10,
backoff_factor=0.1,
status_forcelist=[500, 502, 503, 504]
)
adapter = requests.adapters.HTTPAdapter(max_retries=retries)
self.session.mount('http://', adapter)
self.session.mount('https://', adapter)
def download(self, client_id: str, model_id: str) -> str:
"""
Request for the remote ML model to download.
:param client_id: ID of the requesting client
:param model_id: ID of the model to download
"""
url = f"{self._server_url}/clients/{client_id}/models/{model_id}/download"
response = self.session.get(url, timeout=5)
response.raise_for_status()
return response.json()["payload"]
def upload(self, client_id: str, model_id: str, model: Any) -> bool:
"""
Upload the local ML model to the remote server.
:param client_id: ID of the client
:param model_id: ID of the model
:param model: serialized model content
"""
url = f"{self._server_url}/clients/{client_id}/models/{model_id}/upload"
response = self.session.post(url, json=model, timeout=5)
return response.status_code == 200
def attribute_call(self, client_id: str, model_id: str, call_payload: Dict) -> Dict:
"""
Delegate the requested attribute call to the remote server.
:param client_id: ID of the client
:param model_id: ID of the model
:param call_payload: payload containing attribute name, args, and kwargs
"""
url = f"{self._server_url}/clients/{client_id}/models/{model_id}/attribute-call"
response = self.session.post(url, json=call_payload, timeout=5)
response.raise_for_status()
return response.json()
def attribute_type(self, client_id: str, model_id: str, type_payload: Dict) -> Dict:
"""
Identify the attribute type of the requested attribute.
:param client_id: ID of the client
:param model_id: ID of the model
:param type_payload: payload containing attribute data to inspect
"""
url = f"{self._server_url}/clients/{client_id}/models/{model_id}/attribute-type"
response = self.session.post(url, json=type_payload, timeout=5)
response.raise_for_status()
return response.json()
def register_client(self) -> str:
"""Register client in the PyMiloServer."""
response = self.session.get(f"{self._server_url}/clients/register", timeout=5)
response.raise_for_status()
return response.json()["client_id"]
def remove_client(self, client_id: str) -> bool:
"""
Remove client from the PyMiloServer.
:param client_id: id of the client to remove
"""
response = self.session.delete(f"{self._server_url}/clients/{client_id}", timeout=5)
return response.status_code == 200
def register_model(self, client_id: str) -> str:
"""
Register ML model in the PyMiloServer.
:param client_id: id of the client who owns the model
"""
response = self.session.post(f"{self._server_url}/clients/{client_id}/models/register", timeout=5)
response.raise_for_status()
return response.json()["ml_model_id"]
def remove_model(self, client_id: str, model_id: str) -> bool:
"""
Remove ML model from the PyMiloServer.
:param client_id: client owning the model
:param model_id: model to remove
"""
response = self.session.delete(f"{self._server_url}/clients/{client_id}/models/{model_id}", timeout=5)
return response.status_code == 200
def get_ml_models(self, client_id: str) -> List[str]:
"""
Get all ML models registered for this specific client in the PyMiloServer.
:param client_id: client whose models are being queried
"""
response = self.session.get(f"{self._server_url}/clients/{client_id}/models", timeout=5)
response.raise_for_status()
return response.json()["ml_models_id"]
def grant_access(self, allower_id: str, allowee_id: str, model_id: str) -> bool:
"""
Grant access to a model to another client.
:param allower_id: ID of the client granting access
:param allowee_id: ID of the client being granted access
:param model_id: ID of the model being shared
"""
url = f"{self._server_url}/clients/{allower_id}/grant/{allowee_id}/models/{model_id}"
response = self.session.post(url, timeout=5)
return response.status_code == 200
def revoke_access(self, revoker_id: str, revokee_id: str, model_id: str) -> bool:
"""
Revoke previously granted model access.
:param revoker_id: ID of the client revoking access
:param revokee_id: ID of the client whose access is being revoked
:param model_id: ID of the model
"""
url = f"{self._server_url}/clients/{revoker_id}/revoke/{revokee_id}/models/{model_id}"
response = self.session.post(url, timeout=5)
return response.status_code == 200
def get_allowance(self, allower_id: str) -> Dict[str, List[str]]:
"""
Get the list of all allowees and their allowed models from a given allower.
:param allower_id: ID of the allower
"""
response = self.session.get(f"{self._server_url}/clients/{allower_id}/allowances", timeout=5)
response.raise_for_status()
return response.json()["allowance"]
def get_allowed_models(self, allower_id: str, allowee_id: str) -> List[str]:
"""
Get the list of models that one client is allowed to access from another.
:param allower_id: ID of the model owner
:param allowee_id: ID of the requesting client
"""
url = f"{self._server_url}/clients/{allower_id}/allowances/{allowee_id}"
response = self.session.get(url, timeout=5)
response.raise_for_status()
return response.json()["allowed_models"]
class RESTServerCommunicator():
"""Facilitate working with the communication medium from the server side for the REST protocol."""
def __init__(
self,
ps,
host: str = "127.0.0.1",
port: int = 8000,
):
"""
Initialize the Pymilo RESTServerCommunicator instance.
:param ps: reference to the PyMilo server
:param host: the url to which PyMilo Server listens
:param port: the port to which PyMilo Server listens
"""
self._ps = ps
self.host = host
self.port = port
self.app = FastAPI()
self.setup_routes()
def setup_routes(self):
"""Configure endpoints to handle RESTClientCommunicator requests."""
@self.app.get(f"{REST_API_PREFIX}/health")
async def health():
return {"status": "ok"}
@self.app.get(f"{REST_API_PREFIX}/clients/register")
async def request_client_id():
client_id = str(uuid.uuid4())
self._ps.init_client(client_id)
return {"client_id": client_id}
@self.app.delete(f"{REST_API_PREFIX}/clients/{{client_id}}")
async def remove_client(client_id: str):
is_succeed, detail_message = self._ps.remove_client(client_id)
if not is_succeed:
raise HTTPException(status_code=404, detail=detail_message)
return {"client_id": client_id}
@self.app.post(f"{REST_API_PREFIX}/clients/{{client_id}}/models/register")
async def request_model(client_id: str):
model_id = str(uuid.uuid4())
is_succeed, detail_message = self._ps.init_ml_model(client_id, model_id)
if not is_succeed:
raise HTTPException(status_code=404, detail=detail_message)
return {"client_id": client_id, "ml_model_id": model_id}
@self.app.delete(f"{REST_API_PREFIX}/clients/{{client_id}}/models/{{ml_model_id}}")
async def remove_model(client_id: str, ml_model_id: str):
is_succeed, detail_message = self._ps.remove_ml_model(client_id, ml_model_id)
if not is_succeed:
raise HTTPException(status_code=404, detail=detail_message)
return {"client_id": client_id, "ml_model_id": ml_model_id}
@self.app.get(f"{REST_API_PREFIX}/clients/{{client_id}}/models")
async def get_client_models(client_id: str):
return {"client_id": client_id, "ml_models_id": self._ps.get_ml_models(client_id)}
@self.app.post(f"{REST_API_PREFIX}/clients/{{allower_id}}/grant/{{allowee_id}}/models/{{model_id}}")
async def grant_model_access(allower_id: str, allowee_id: str, model_id: str):
is_succeed, detail_message = self._ps.grant_access(allower_id, allowee_id, model_id)
if not is_succeed:
raise HTTPException(status_code=404, detail=detail_message)
return {
"allower_id": allower_id,
"allowee_id": allowee_id,
"allowed_model_id": model_id
}
@self.app.post(f"{REST_API_PREFIX}/clients/{{revoker_id}}/revoke/{{revokee_id}}/models/{{model_id}}")
async def revoke_model_access(revoker_id: str, revokee_id: str, model_id: str):
is_succeed, detail_message = self._ps.revoke_access(revoker_id, revokee_id, model_id)
if not is_succeed:
raise HTTPException(status_code=404, detail=detail_message)
return {
"revoker_id": revoker_id,
"revokee_id": revokee_id,
"revoked_model_id": model_id
}
@self.app.get(f"{REST_API_PREFIX}/clients/{{allower_id}}/allowances")
async def get_allowance(allower_id: str):
allowance, reason = self._ps.get_clients_allowance(allower_id)
if allowance is None:
raise HTTPException(status_code=404, detail=reason)
return {"allower_id": allower_id, "allowance": allowance}
@self.app.get(f"{REST_API_PREFIX}/clients/{{allower_id}}/allowances/{{allowee_id}}")
async def get_allowed_models(allower_id: str, allowee_id: str):
models, reason = self._ps.get_allowed_models(allower_id, allowee_id)
if models is None:
raise HTTPException(status_code=404, detail=reason)
return {"allower_id": allower_id, "allowee_id": allowee_id, "allowed_models": models}
@self.app.get(f"{REST_API_PREFIX}/clients/{{client_id}}/models/{{ml_model_id}}/download")
async def download_model(client_id: str, ml_model_id: str):
is_valid, reason = self._ps._validate_id(client_id, ml_model_id)
if not is_valid:
raise HTTPException(status_code=404, detail=reason)
return {
"message": MSG_REST_DOWNLOAD_REQUEST.format(client_id=client_id, ml_model_id=ml_model_id),
"payload": self._ps.export_model(client_id, ml_model_id)
}
@self.app.post(f"{REST_API_PREFIX}/clients/{{client_id}}/models/{{ml_model_id}}/upload")
async def upload_model(client_id: str, ml_model_id: str, request: Request):
model_data = self.parse(await request.json()).get("model")
if model_data is None:
raise HTTPException(status_code=400, detail="Missing 'model' in request")
is_valid, reason = self._ps._validate_id(client_id, ml_model_id)
if not is_valid:
raise HTTPException(status_code=404, detail=reason)
return {
"message": MSG_REST_UPLOAD_REQUEST.format(client_id=client_id, ml_model_id=ml_model_id),
"payload": self._ps.update_model(client_id, ml_model_id, model_data)
}
@self.app.post(f"{REST_API_PREFIX}/clients/{{client_id}}/models/{{ml_model_id}}/attribute-call")
async def attribute_call(client_id: str, ml_model_id: str, request: Request):
request_payload = self.parse(await request.json())
is_valid, reason = self._ps._validate_id(client_id, ml_model_id)
if not is_valid:
raise HTTPException(status_code=404, detail=reason)
result = self._ps.execute_model(request_payload)
return {
"message": MSG_REST_ATTRIBUTE_CALL_REQUEST.format(client_id=client_id, ml_model_id=ml_model_id),
"payload": result or "The ML model has been updated in place."
}
@self.app.post(f"{REST_API_PREFIX}/clients/{{client_id}}/models/{{ml_model_id}}/attribute-type")
async def attribute_type(client_id: str, ml_model_id: str, request: Request):
request = self.parse(await request.json())
is_valid, reason = self._ps._validate_id(client_id, ml_model_id)
if not is_valid:
raise HTTPException(status_code=404, detail=reason)
is_callable, field_value = self._ps.is_callable_attribute(request)
return {
"message": MSG_REST_ATTRIBUTE_TYPE_REQUEST.format(client_id=client_id, ml_model_id=ml_model_id),
"attribute type": "method" if is_callable else "field",
"attribute value": "" if is_callable else field_value,
}
def parse(self, body: str) -> Dict:
"""
Parse the compressed encrypted body of the request.
:param body: request body
"""
return json.loads(
self._ps._compressor.extract(
self._ps._encryptor.decrypt(body)
)
)
def run(self):
"""Run internal fastapi server."""
uvicorn.run(self.app, host=self.host, port=self.port)
class WebSocketClientCommunicator(ClientCommunicator):
"""Facilitate working with the communication medium from the client side for the WebSocket protocol."""
def __init__(
self,
server_url: str = "ws://127.0.0.1:8000"
):
"""
Initialize the WebSocketClientCommunicator instance.
:param server_url: the WebSocket server URL to connect to.
"""
is_valid, url = validate_websocket_url(server_url)
if not is_valid:
raise Exception(PYMILO_INVALID_URL)
self.server_url = url
self.websocket = None
self.connection_established = asyncio.Event()
if asyncio._get_running_loop() is None:
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
else:
self.loop = asyncio.get_event_loop()
self.loop.run_until_complete(self.connect())
def is_socket_closed(self) -> bool:
"""Check if the WebSocket connection is closed."""
if self.websocket is None:
return True
elif hasattr(self.websocket, "closed"):
return self.websocket.closed
elif hasattr(self.websocket, "state"):
return self.websocket.state is websockets.protocol.State.CLOSED
async def connect(self):
"""Establish a WebSocket connection with the server."""
if self.is_socket_closed():
self.websocket = await websockets.connect(self.server_url)
print("Connected to the WebSocket server.")
self.connection_established.set()
async def disconnect(self):
"""Close the WebSocket connection."""
if self.websocket:
await self.websocket.close()
self.websocket = None
def _disconnect_sync(self) -> None:
"""Close the WebSocket connection synchronously if still open."""
if self.websocket and not self.is_socket_closed():
if self.loop and not self.loop.is_closed():
self.loop.run_until_complete(self.disconnect())
def close(self) -> None:
"""
Close the WebSocket connection synchronously.
This method should be called before the event loop is closed
to ensure proper cleanup.
"""
self._disconnect_sync()
def __del__(self) -> None:
"""Clean up WebSocket connection on object destruction."""
self._disconnect_sync()
async def send_message(self, action: str, payload: Optional[Dict] = None) -> Dict:
"""
Send a message to the WebSocket server.
:param action: the type of action to perform (e.g., 'download', 'upload').
:param payload: the payload associated with the action.
"""
await self.connection_established.wait()
if self.is_socket_closed():
raise RuntimeError(PYMILO_CLIENT_WEBSOCKET_NOT_CONNECTED)
message = json.dumps({"action": action, "payload": payload or {}})
await self.websocket.send(message)
response = await self.websocket.recv()
return json.loads(response)
def _check_response_error(self, response: Dict) -> None:
"""
Check if the server response contains an error and raise an exception if so.
:param response: the server's response.
"""
if "error" in response:
raise RuntimeError(response["error"])
def download(self, client_id: str, model_id: str) -> str:
"""
Request for the remote ML model to download.
:param client_id: ID of the requesting client
:param model_id: ID of the model to download
"""
response = self.loop.run_until_complete(
self.send_message("download", {
"client_id": client_id,
"ml_model_id": model_id,
})
)
self._check_response_error(response)
return response.get("payload")
def upload(self, client_id: str, model_id: str, model: Any) -> bool:
"""
Upload the local ML model to the remote server.
:param client_id: ID of the client
:param model_id: ID of the model
:param model: serialized model content
"""
response = self.loop.run_until_complete(
self.send_message("upload", {
"client_id": client_id,
"ml_model_id": model_id,
"model": model,
})
)
self._check_response_error(response)
return True
def attribute_call(self, client_id: str, model_id: str, call_payload: Dict) -> Dict:
"""
Delegate the requested attribute call to the remote server.
:param client_id: ID of the client
:param model_id: ID of the model
:param call_payload: payload containing attribute name, args, and kwargs
"""
response = self.loop.run_until_complete(
self.send_message("attribute_call", {
"client_id": client_id,
"ml_model_id": model_id,
"call_payload": call_payload,
})
)
self._check_response_error(response)
return response
def attribute_type(self, client_id: str, model_id: str, type_payload: Dict) -> Dict:
"""
Identify the attribute type of the requested attribute.
:param client_id: ID of the client
:param model_id: ID of the model
:param type_payload: payload containing attribute data to inspect
"""
response = self.loop.run_until_complete(
self.send_message("attribute_type", {
"client_id": client_id,
"ml_model_id": model_id,
"type_payload": type_payload,
})
)
self._check_response_error(response)
return response
def register_client(self) -> str:
"""Register client in the PyMiloServer."""
response = self.loop.run_until_complete(
self.send_message("register_client")
)
self._check_response_error(response)
return response["client_id"]
def remove_client(self, client_id: str) -> bool:
"""
Remove client from the PyMiloServer.
:param client_id: id of the client to remove
"""
response = self.loop.run_until_complete(
self.send_message("remove_client", {"client_id": client_id})
)
self._check_response_error(response)
return True
def register_model(self, client_id: str) -> str:
"""
Register ML model in the PyMiloServer.
:param client_id: id of the client who owns the model
"""
response = self.loop.run_until_complete(
self.send_message("register_model", {"client_id": client_id})
)
self._check_response_error(response)
return response["ml_model_id"]
def remove_model(self, client_id: str, model_id: str) -> bool:
"""
Remove ML model from the PyMiloServer.
:param client_id: client owning the model
:param model_id: model to remove
"""
response = self.loop.run_until_complete(
self.send_message("remove_model", {
"client_id": client_id,
"ml_model_id": model_id,
})
)
self._check_response_error(response)
return True
def get_ml_models(self, client_id: str) -> List[str]:
"""
Get all ML models registered for this specific client in the PyMiloServer.
:param client_id: client whose models are being queried
"""
response = self.loop.run_until_complete(
self.send_message("get_ml_models", {"client_id": client_id})
)
self._check_response_error(response)
return response["ml_models_id"]
def grant_access(self, allower_id: str, allowee_id: str, model_id: str) -> bool:
"""
Grant access to a model to another client.
:param allower_id: ID of the client granting access
:param allowee_id: ID of the client being granted access
:param model_id: ID of the model being shared
"""
response = self.loop.run_until_complete(
self.send_message("grant_access", {
"allower_id": allower_id,
"allowee_id": allowee_id,
"model_id": model_id,
})
)
self._check_response_error(response)
return True
def revoke_access(self, revoker_id: str, revokee_id: str, model_id: str) -> bool:
"""
Revoke previously granted model access.
:param revoker_id: ID of the client revoking access
:param revokee_id: ID of the client whose access is being revoked
:param model_id: ID of the model
"""
response = self.loop.run_until_complete(
self.send_message("revoke_access", {
"revoker_id": revoker_id,
"revokee_id": revokee_id,
"model_id": model_id,
})
)
self._check_response_error(response)
return True
def get_allowance(self, allower_id: str) -> Dict[str, List[str]]:
"""
Get the list of all allowees and their allowed models from a given allower.
:param allower_id: ID of the allower
"""
response = self.loop.run_until_complete(
self.send_message("get_allowance", {"allower_id": allower_id})
)
self._check_response_error(response)
return response["allowance"]
def get_allowed_models(self, allower_id: str, allowee_id: str) -> List[str]:
"""
Get the list of models that one client is allowed to access from another.
:param allower_id: ID of the model owner
:param allowee_id: ID of the requesting client
"""
response = self.loop.run_until_complete(
self.send_message("get_allowed_models", {
"allower_id": allower_id,
"allowee_id": allowee_id,
})
)
self._check_response_error(response)
return response["allowed_models"]
class WebSocketServerCommunicator:
"""Facilitate working with the communication medium from the server side for the WebSocket protocol."""
def __init__(
self,
ps,
host: str = "127.0.0.1",
port: int = 8000,
):
"""
Initialize the WebSocketServerCommunicator instance.
:param ps: reference to the PyMilo server.
:param host: the WebSocket server host address.
:param port: the WebSocket server port.
"""
self._ps = ps
self.host = host
self.port = port
self.app = FastAPI()
self.active_connections: list[WebSocket] = []
self._action_handlers = {
"register_client": self._handle_register_client,
"remove_client": self._handle_remove_client,
"register_model": self._handle_register_model,
"remove_model": self._handle_remove_model,
"get_ml_models": self._handle_get_ml_models,
"grant_access": self._handle_grant_access,
"revoke_access": self._handle_revoke_access,
"get_allowance": self._handle_get_allowance,
"get_allowed_models": self._handle_get_allowed_models,
"download": self._handle_download,
"upload": self._handle_upload,
"attribute_call": self._handle_attribute_call,
"attribute_type": self._handle_attribute_type,
}
self.setup_routes()
def setup_routes(self):
"""Configure the WebSocket endpoint to handle client connections."""
@self.app.websocket("/")
async def websocket_endpoint(websocket: WebSocket):
await self.connect(websocket)
try:
while True:
message = await websocket.receive_text()
await self.handle_message(websocket, message)
except WebSocketDisconnect:
self.disconnect(websocket)
async def connect(self, websocket: WebSocket) -> None:
"""
Accept a WebSocket connection and store it.
:param websocket: the WebSocket connection to accept.
"""
await websocket.accept()
self.active_connections.append(websocket)
def disconnect(self, websocket: WebSocket) -> None:
"""
Handle WebSocket disconnection.
:param websocket: the WebSocket connection to remove.
"""
self.active_connections.remove(websocket)
async def handle_message(self, websocket: WebSocket, message: str) -> None:
"""
Handle messages received from WebSocket clients.
:param websocket: the WebSocket connection from which the message was received.
:param message: the message received from the client.
"""
try:
message = json.loads(message)
action = message.get("action")
print(f"Server received action: {action}")
raw_payload = message.get("payload", {})
payload = self.parse(raw_payload) if raw_payload else {}
handler = self._action_handlers.get(action)
if handler:
response = handler(payload)
else:
response = {"error": f"Unknown action: {action}"}
await websocket.send_text(json.dumps(response))
except Exception as e:
await websocket.send_text(json.dumps({"error": str(e)}))
def _handle_register_client(self, payload: dict) -> dict:
"""
Handle client registration requests.
:param payload: the payload (empty for this action).
"""
client_id = str(uuid.uuid4())
self._ps.init_client(client_id)
return {
"message": "Client registered successfully.",
"client_id": client_id,
}
def _handle_remove_client(self, payload: dict) -> dict:
"""
Handle client removal requests.
:param payload: the payload containing the client ID to remove.
"""
client_id = payload.get("client_id")
is_succeed, detail_message = self._ps.remove_client(client_id)
if not is_succeed:
return {"error": detail_message}
return {
"message": "Client removed successfully.",
"client_id": client_id,
}
def _handle_register_model(self, payload: dict) -> dict:
"""
Handle ML model registration requests.
:param payload: the payload containing the client ID.
"""
client_id = payload.get("client_id")
ml_model_id = str(uuid.uuid4())
is_succeed, detail_message = self._ps.init_ml_model(client_id, ml_model_id)
if not is_succeed:
return {"error": detail_message}
return {
"message": "Model registered successfully.",
"client_id": client_id,
"ml_model_id": ml_model_id,
}
def _handle_remove_model(self, payload: dict) -> dict:
"""
Handle ML model removal requests.
:param payload: the payload containing the client ID and model ID.
"""
client_id = payload.get("client_id")
ml_model_id = payload.get("ml_model_id")
is_succeed, detail_message = self._ps.remove_ml_model(client_id, ml_model_id)
if not is_succeed:
return {"error": detail_message}
return {
"message": "Model removed successfully.",
"client_id": client_id,
"ml_model_id": ml_model_id,
}
def _handle_get_ml_models(self, payload: dict) -> dict:
"""
Handle requests to get all ML models for a client.
:param payload: the payload containing the client ID.
"""
client_id = payload.get("client_id")
return {
"message": "ML models retrieved successfully.",
"client_id": client_id,
"ml_models_id": self._ps.get_ml_models(client_id),
}
def _handle_grant_access(self, payload: dict) -> dict:
"""
Handle requests to grant model access to another client.
:param payload: the payload containing allower_id, allowee_id, and model_id.
"""
allower_id = payload.get("allower_id")
allowee_id = payload.get("allowee_id")
model_id = payload.get("model_id")
is_succeed, detail_message = self._ps.grant_access(allower_id, allowee_id, model_id)
if not is_succeed:
return {"error": detail_message}
return {
"message": "Access granted successfully.",
"allower_id": allower_id,
"allowee_id": allowee_id,
"allowed_model_id": model_id,
}
def _handle_revoke_access(self, payload: dict) -> dict:
"""
Handle requests to revoke model access from another client.
:param payload: the payload containing revoker_id, revokee_id, and model_id.
"""
revoker_id = payload.get("revoker_id")
revokee_id = payload.get("revokee_id")
model_id = payload.get("model_id")
is_succeed, detail_message = self._ps.revoke_access(revoker_id, revokee_id, model_id)
if not is_succeed:
return {"error": detail_message}
return {
"message": "Access revoked successfully.",
"revoker_id": revoker_id,
"revokee_id": revokee_id,
"revoked_model_id": model_id,
}
def _handle_get_allowance(self, payload: dict) -> dict:
"""
Handle requests to get all allowances for a client.
:param payload: the payload containing the allower_id.
"""
allower_id = payload.get("allower_id")
allowance, reason = self._ps.get_clients_allowance(allower_id)
if allowance is None:
return {"error": reason}
return {
"message": "Allowance retrieved successfully.",
"allower_id": allower_id,
"allowance": allowance,
}
def _handle_get_allowed_models(self, payload: dict) -> dict:
"""
Handle requests to get allowed models for a specific allowee.
:param payload: the payload containing allower_id and allowee_id.
"""
allower_id = payload.get("allower_id")
allowee_id = payload.get("allowee_id")
models, reason = self._ps.get_allowed_models(allower_id, allowee_id)
if models is None:
return {"error": reason}
return {
"message": "Allowed models retrieved successfully.",
"allower_id": allower_id,
"allowee_id": allowee_id,
"allowed_models": models,
}
def _handle_download(self, payload: dict) -> dict:
"""
Handle download requests.
:param payload: the payload containing the ids associated with the requested model for download.
"""
client_id = payload.get("client_id")
ml_model_id = payload.get("ml_model_id")
is_valid, reason = self._ps._validate_id(client_id, ml_model_id)
if not is_valid:
return {"error": reason}
return {
"message": MSG_DOWNLOAD_REQUEST.format(client_id=client_id, ml_model_id=ml_model_id),
"payload": self._ps.export_model(client_id, ml_model_id),
}
def _handle_upload(self, payload: dict) -> dict:
"""
Handle upload requests.
:param payload: the payload containing the model data to upload.
"""
client_id = payload.get("client_id")
ml_model_id = payload.get("ml_model_id")
encrypted_model = payload.get("model")
if encrypted_model is None:
return {"error": "Missing 'model' in request"}
decrypted_model = self.parse(encrypted_model)
model_data = decrypted_model.get("model")
if model_data is None:
return {"error": "Missing 'model' in decrypted request"}
is_valid, reason = self._ps._validate_id(client_id, ml_model_id)
if not is_valid:
return {"error": reason}
self._ps.update_model(client_id, ml_model_id, model_data)
return {
"message": MSG_UPLOAD_REQUEST.format(client_id=client_id, ml_model_id=ml_model_id),
}
def _handle_attribute_call(self, payload: dict) -> dict:
"""
Handle attribute call requests.
:param payload: the payload containing the attribute call details.
"""
client_id = payload.get("client_id")
ml_model_id = payload.get("ml_model_id")
encrypted_call_payload = payload.get("call_payload")
if encrypted_call_payload is None:
return {"error": "Missing 'call_payload' in request"}
call_payload = self.parse(encrypted_call_payload)
is_valid, reason = self._ps._validate_id(client_id, ml_model_id)
if not is_valid:
return {"error": reason}
result = self._ps.execute_model(call_payload)
return {
"message": MSG_ATTRIBUTE_CALL_REQUEST.format(client_id=client_id, ml_model_id=ml_model_id),
"payload": result if result else "The ML model has been updated in place.",
}
def _handle_attribute_type(self, payload: dict) -> dict:
"""
Handle attribute type queries.
:param payload: the payload containing the attribute to query.
"""
client_id = payload.get("client_id")
ml_model_id = payload.get("ml_model_id")
encrypted_type_payload = payload.get("type_payload")
if encrypted_type_payload is None:
return {"error": "Missing 'type_payload' in request"}
type_payload = self.parse(encrypted_type_payload)
is_valid, reason = self._ps._validate_id(client_id, ml_model_id)
if not is_valid:
return {"error": reason}
is_callable, field_value = self._ps.is_callable_attribute(type_payload)
return {
"message": MSG_ATTRIBUTE_TYPE_REQUEST.format(client_id=client_id, ml_model_id=ml_model_id),
"attribute type": "method" if is_callable else "field",
"attribute value": "" if is_callable else field_value,
}
def parse(self, message: Union[str, Dict]) -> Dict:
"""
Parse the encrypted and compressed message.
:param message: the encrypted and compressed message to parse.
"""
if isinstance(message, dict):
return message
return json.loads(
self._ps._compressor.extract(
self._ps._encryptor.decrypt(message)
)
)
def run(self):
"""Run the internal FastAPI server."""
uvicorn.run(self.app, host=self.host, port=self.port)
class CommunicationProtocol(Enum):
"""Communication protocol."""
REST = {
"CLIENT": RESTClientCommunicator,
"SERVER": RESTServerCommunicator,
}
WEBSOCKET = {
"CLIENT": WebSocketClientCommunicator,
"SERVER": WebSocketServerCommunicator,
}
================================================
FILE: pymilo/streaming/compressor.py
================================================
# -*- coding: utf-8 -*-
"""Implementations of Compressor interface."""
import gzip
import zlib
import lzma
import bz2
import json
import base64
from enum import Enum
from pymilo.streaming.interfaces import Compressor
class DummyCompressor(Compressor):
"""A dummy implementation of the Compressor interface."""
@staticmethod
def compress(payload):
"""Compress the given payload in a dummy way, simply just return it (no compression applied)."""
return payload if isinstance(payload, str) else json.dumps(payload)
@staticmethod
def extract(payload):
"""Extract the given payload in a dummy way, simply just return it (no Extraction applied)."""
return payload
class GZIPCompressor(Compressor):
"""GZIP implementation of the Compressor interface."""
@staticmethod
def compress(payload):
"""Compress the given payload using gzip."""
if isinstance(payload, str):
data = payload.encode('utf-8')
else:
data = json.dumps(payload).encode('utf-8')
compressed_data = gzip.compress(data)
return base64.b64encode(compressed_data).decode('utf-8')
@staticmethod
def extract(payload):
"""Extract the given payload using gzip."""
data = base64.b64decode(payload)
return gzip.decompress(data).decode('utf-8')
class ZLIBCompressor(Compressor):
"""ZLIB implementation of the Compressor interface."""
@staticmethod
def compress(payload):
"""Compress the given payload using zlib."""
if isinstance(payload, str):
data = payload.encode('utf-8')
else:
data = json.dumps(payload).encode('utf-8')
compressed_data = zlib.compress(data)
return base64.b64encode(compressed_data).decode('utf-8')
@staticmethod
def extract(payload):
"""Extract the given payload using zlib."""
data = base64.b64decode(payload)
return zlib.decompress(data).decode('utf-8')
class LZMACompressor(Compressor):
"""LZMA implementation of the Compressor interface."""
@staticmethod
def compress(payload):
"""Compress the given payload using lzma."""
if isinstance(payload, str):
data = payload.encode('utf-8')
else:
data = json.dumps(payload).encode('utf-8')
compressed_data = lzma.compress(data)
return base64.b64encode(compressed_data).decode('utf-8')
@staticmethod
def extract(payload):
"""Extract the given payload using lzma."""
data = base64.b64decode(payload)
return lzma.decompress(data).decode('utf-8')
class BZ2Compressor(Compressor):
"""BZ2 implementation of the Compressor interface."""
@staticmethod
def compress(payload):
"""Compress the given payload using bz2."""
if isinstance(payload, str):
data = payload.encode('utf-8')
else:
data = json.dumps(payload).encode('utf-8')
compressed_data = bz2.compress(data)
return base64.b64encode(compressed_data).decode('utf-8')
@staticmethod
def extract(payload):
"""Extract the given payload using bz2."""
data = base64.b64decode(payload)
return bz2.decompress(data).decode('utf-8')
class Compression(Enum):
"""Compression method used in end to end communication."""
NULL = DummyCompressor
GZIP = GZIPCompressor
ZLIB = ZLIBCompressor
LZMA = LZMACompressor
BZ2 = BZ2Compressor
================================================
FILE: pymilo/streaming/encryptor.py
================================================
# -*- coding: utf-8 -*-
"""Implementations of Encryptor interface."""
from .interfaces import Encryptor
class DummyEncryptor(Encryptor):
"""A dummy implementation of the Encryptor interface."""
@staticmethod
def encrypt(payload):
"""Encrypt the given payload in a dummy way, simply just return it (no encryption applied)."""
return payload
@staticmethod
def decrypt(payload):
"""Decrypt the given payload in a dummy way, simply just return it (no decryption applied)."""
return payload
================================================
FILE: pymilo/streaming/interfaces.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo ML Streaming Interfaces."""
from abc import ABC, abstractmethod
class Compressor(ABC):
"""
Compressor Interface.
Each Compressor has methods to compress the given payload or extract it back to the original one.
"""
@abstractmethod
def compress(payload):
"""
Compress the given payload.
:param payload: payload to get compressed
:type payload: str
:return: the compressed version
"""
@abstractmethod
def extract(payload):
"""
Extract the given previously compressed payload.
:param payload: payload to get extracted
:type payload: str
:return: the extracted version
"""
class Encryptor(ABC):
"""
Encryptor Interface.
Each Encryptor has methods to encrypt the given payload or decrypt it back to the original one.
"""
@abstractmethod
def encrypt(payload):
"""
Encrypt the given payload.
:param payload: payload to get encrypted
:type payload: str
:return: the encrypted version
"""
@abstractmethod
def decrypt(payload):
"""
Decrypt the given previously encrypted payload.
:param payload: payload to get decrypted
:type payload: str
:return: the decrypted version
"""
class ClientCommunicator(ABC):
"""
ClientCommunicator Interface.
Defines the contract for client-server communication. Each implementation is responsible for:
- Registering and removing clients and models
- Uploading and downloading ML models
- Handling delegated attribute access
- Managing model allowances between clients
"""
@abstractmethod
def register_client(self):
"""
Register the client in the remote server.
:return: newly allocated client ID
:rtype: str
"""
@abstractmethod
def remove_client(self, client_id):
"""
Remove the client from the remote server.
:param client_id: client ID to remove
:type client_id: str
:return: success status
:rtype: bool
"""
@abstractmethod
def register_model(self, client_id):
"""
Register an ML model for the given client.
:param client_id: client ID
:type client_id: str
:return: newly allocated model ID
:rtype: str
"""
@abstractmethod
def remove_model(self, client_id, model_id):
"""
Remove the specified ML model for the client.
:param client_id: client ID
:type client_id: str
:param model_id: model ID
:type model_id: str
:return: success status
:rtype: bool
"""
@abstractmethod
def get_ml_models(self, client_id):
"""
Get the list of ML models for the given client.
:param client_id: client ID
:type client_id: str
:return: list of model IDs
:rtype: list[str]
"""
@abstractmethod
def grant_access(self, allower_id, allowee_id, model_id):
"""
Grant access to a model from one client to another.
:param allower_id: client who owns the model
:type allower_id: str
:param allowee_id: client to be granted access
:type allowee_id: str
:param model_id: model ID
:type model_id: str
:return: success status
:rtype: bool
"""
@abstractmethod
def revoke_access(self, revoker_id, revokee_id, model_id):
"""
Revoke model access from one client to another.
:param revoker_id: client who owns the model
:type revoker_id: str
:param revokee_id: client to be revoked
:type revokee_id: str
:param model_id: model ID
:type model_id: str
:return: success status
:rtype: bool
"""
@abstractmethod
def get_allowance(self, allower_id):
"""
Get all clients and models this client has allowed.
:param allower_id: client who granted access
:type allower_id: str
:return: dictionary mapping allowee_id to list of model_ids
:rtype: dict
"""
@abstractmethod
def get_allowed_models(self, allower_id, allowee_id):
"""
Get the list of model IDs that `allowee_id` is allowed to access from `allower_id`.
:param allower_id: model owner
:type allower_id: str
:param allowee_id: recipient
:type allowee_id: str
:return: list of allowed model IDs
:rtype: list[str]
"""
@abstractmethod
def upload(self, client_id, model_id, model):
"""
Upload the local ML model to the remote server.
:param client_id: ID of the client
:param model_id: ID of the model
:param model: serialized model content
:return: True if upload was successful, False otherwise
"""
@abstractmethod
def download(self, client_id, model_id):
"""
Download the remote ML model.
:param client_id: ID of the requesting client
:param model_id: ID of the model to download
:return: string serialized model
"""
@abstractmethod
def attribute_call(self, client_id, model_id, call_payload):
"""
Execute an attribute call on the remote server.
:param client_id: ID of the client
:param model_id: ID of the model
:param call_payload: payload containing attribute name, args, and kwargs
:return: remote server response
"""
@abstractmethod
def attribute_type(self, client_id, model_id, type_payload):
"""
Identify the attribute type (method or field) on the remote model.
:param client_id: client ID
:param model_id: model ID
:param type_payload: payload containing targeted attribute
:return: remote server response
"""
================================================
FILE: pymilo/streaming/param.py
================================================
# -*- coding: utf-8 -*-
"""Streaming Parameters and constants."""
PYMILO_CLIENT_INVALID_MODE = "Invalid mode, the given mode should be either `LOCAL`[default] or `DELEGATE`."
PYMILO_CLIENT_MODEL_SYNCHED = "PyMiloClient synched the local ML model with the remote one successfully."
PYMILO_CLIENT_LOCAL_MODEL_UPLOADED = "PyMiloClient uploaded the local model successfully."
PYMILO_CLIENT_LOCAL_MODEL_UPLOAD_FAILED = "PyMiloClient failed to upload the local model."
PYMILO_CLIENT_INVALID_ATTRIBUTE = "This attribute doesn't exist in either PymiloClient or the inner ML model."
PYMILO_CLIENT_FAILED_TO_DOWNLOAD_REMOTE_MODEL = "PyMiloClient failed to download the remote ML model."
PYMILO_SERVER_NON_EXISTENT_ATTRIBUTE = "The requested attribute doesn't exist in this model."
PYMILO_INVALID_URL = "The given URL is not valid."
PYMILO_CLIENT_WEBSOCKET_NOT_CONNECTED = "WebSocket is not connected."
REST_API_PREFIX = "/api/v1"
MSG_DOWNLOAD_REQUEST = "Download request from client: {client_id} for model: {ml_model_id}"
MSG_UPLOAD_REQUEST = "Upload request from client: {client_id} for model: {ml_model_id}"
MSG_ATTRIBUTE_CALL_REQUEST = "Attribute call request from client: {client_id} for model: {ml_model_id}"
MSG_ATTRIBUTE_TYPE_REQUEST = "Attribute type request from client: {client_id} for model: {ml_model_id}"
MSG_REST_DOWNLOAD_REQUEST = "/download request from client: {client_id} for model: {ml_model_id}"
MSG_REST_UPLOAD_REQUEST = "/upload request from client: {client_id} for model: {ml_model_id}"
MSG_REST_ATTRIBUTE_CALL_REQUEST = "/attribute_call request from client: {client_id} for model: {ml_model_id}"
MSG_REST_ATTRIBUTE_TYPE_REQUEST = "/attribute_type request from client: {client_id} for model: {ml_model_id}"
================================================
FILE: pymilo/streaming/pymilo_client.py
================================================
# -*- coding: utf-8 -*-
"""PyMiloClient for RESTFull Protocol."""
from enum import Enum
from .encryptor import DummyEncryptor
from .compressor import Compression
from ..pymilo_obj import Export, Import
from .param import PYMILO_CLIENT_INVALID_MODE, PYMILO_CLIENT_MODEL_SYNCHED, \
PYMILO_CLIENT_LOCAL_MODEL_UPLOADED, PYMILO_CLIENT_LOCAL_MODEL_UPLOAD_FAILED, \
PYMILO_CLIENT_INVALID_ATTRIBUTE, PYMILO_CLIENT_FAILED_TO_DOWNLOAD_REMOTE_MODEL
from .communicator import CommunicationProtocol
from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter
class PymiloClient:
"""Facilitate working with the PyMilo server."""
class Mode(Enum):
"""fallback state of the PyMiloClient."""
LOCAL = 1
DELEGATE = 2
def __init__(
self,
model=None,
mode=Mode.LOCAL,
compressor=Compression.NULL,
server_url="127.0.0.1:8000",
communication_protocol=CommunicationProtocol.REST,
):
"""
Initialize the Pymilo PymiloClient instance.
:param model: the ML model PyMiloClient wrapped around
:type model: Any
:param mode: the mode in which PymiloClient should work, either LOCAL mode or DELEGATE
:type mode: str (LOCAL|DELEGATE)
:param compressor: the compression method to be used in client-server communications
:type compressor: pymilo.streaming.compressor.Compression
:param server_url: the url to which PyMilo Server listens
:type server_url: str
:param communication_protocol: The communication protocol to be used by PymiloClient
:type communication_protocol: pymilo.streaming.communicator.CommunicationProtocol
:return: an instance of the Pymilo PymiloClient class
"""
self.model = model
self.client_id = "0x_client_id"
self.ml_model_id = "0x_ml_model_id"
self._mode = mode
self._compressor = compressor.value
self._encryptor = DummyEncryptor()
self._communicator = communication_protocol.value["CLIENT"](server_url)
def encrypt_compress(self, body):
"""
Compress and Encrypt body payload.
:param body: body payload of the request
:type body: dict
:return: the compressed and encrypted version of the body payload
"""
return self._encryptor.encrypt(
self._compressor.compress(body)
)
def toggle_mode(self, mode=Mode.LOCAL):
"""
Toggle the PyMiloClient mode, either from LOCAL to DELEGATE or vice versa.
:return: None
"""
if mode not in PymiloClient.Mode.__members__.values():
raise Exception(PYMILO_CLIENT_INVALID_MODE)
if mode != self._mode:
self._mode = mode
def download(self):
"""
Request for the remote ML model to download.
:return: None
"""
serialized_model = self._communicator.download(
self.client_id,
self.ml_model_id
)
if serialized_model is None:
print(PYMILO_CLIENT_FAILED_TO_DOWNLOAD_REMOTE_MODEL)
return
self.model = Import(file_adr=None, json_dump=serialized_model).to_model()
print(PYMILO_CLIENT_MODEL_SYNCHED)
def upload(self):
"""
Upload the local ML model to the remote server.
:return: None
"""
succeed = self._communicator.upload(
self.client_id,
self.ml_model_id,
self.encrypt_compress({"model": Export(self.model).to_json()})
)
if succeed:
print(PYMILO_CLIENT_LOCAL_MODEL_UPLOADED)
else:
print(PYMILO_CLIENT_LOCAL_MODEL_UPLOAD_FAILED)
def register(self):
"""
Register client in the remote server.
:return: None
"""
self.client_id = self._communicator.register_client()
def deregister(self):
"""
Deregister client in the remote server.
:return: None
"""
self._communicator.remove_client(self.client_id)
self.client_id = "0x_client_id"
def register_ml_model(self):
"""
Register ML model in the remote server.
:return: None
"""
self.ml_model_id = self._communicator.register_model(self.client_id)
def deregister_ml_model(self):
"""
Deregister ML model in the remote server.
:return: None
"""
self._communicator.remove_model(self.client_id, self.ml_model_id)
self.ml_model_id = "0x_ml_model_id"
def get_ml_models(self):
"""
Get all registered ml models in the remote server for this client.
:return: list of ml model ids
"""
return self._communicator.get_ml_models(self.client_id)
def grant_access(self, allowee_id):
"""
Grant access to one of this client's models to another client.
:param allowee_id: The client ID to grant access to
:return: True if successful, False otherwise
"""
return self._communicator.grant_access(
self.client_id,
allowee_id,
self.ml_model_id
)
def revoke_access(self, revokee_id):
"""
Revoke access previously granted to another client.
:param revokee_id: The client ID to revoke access from
:return: True if successful, False otherwise
"""
return self._communicator.revoke_access(
self.client_id,
revokee_id,
self.ml_model_id
)
def get_allowance(self):
"""
Get a dictionary of all clients who have access to this client's models.
:return: Dict of allowee_id -> list of model_ids
"""
return self._communicator.get_allowance(self.client_id)
def get_allowed_models(self, allower_id):
"""
Get a list of models you are allowed to access from another client.
:param allower_id: The client ID who owns the models
:return: list of allowed model IDs
"""
return self._communicator.get_allowed_models(allower_id, self.client_id)
def close(self):
"""
Close the client connection and release resources.
:return: None
"""
if hasattr(self._communicator, 'close'):
self._communicator.close()
def __enter__(self):
"""
Enter the context manager.
:return: self
"""
return self
def __exit__(self, _exc_type, _exc_val, _exc_tb):
"""
Exit the context manager and close the connection.
:return: False
"""
self.close()
return False
def __del__(self):
"""Clean up resources on object destruction."""
self.close()
def __getattr__(self, attribute):
"""
Overwrite the __getattr__ default function to extract requested.
1. If self._mode is LOCAL, extract the requested from inner ML model and returns it
2. If self._mode is DELEGATE, returns a wrapper relayer which delegates the request to the remote server by execution
:return: Any
"""
if self._mode == PymiloClient.Mode.LOCAL:
if attribute in dir(self.model):
return getattr(self.model, attribute)
else:
raise AttributeError(PYMILO_CLIENT_INVALID_ATTRIBUTE)
elif self._mode == PymiloClient.Mode.DELEGATE:
gdst = GeneralDataStructureTransporter()
response = self._communicator.attribute_type(
self.client_id,
self.ml_model_id,
self.encrypt_compress({
"attribute": attribute,
"client_id": self.client_id,
"ml_model_id": self.ml_model_id,
})
)
if response["attribute type"] == "field":
return gdst.deserialize(response, "attribute value", None)
def relayer(*args, **kwargs):
payload = {
"client_id": self.client_id,
"ml_model_id": self.ml_model_id,
'attribute': attribute,
'args': args,
'kwargs': kwargs,
}
payload["args"] = gdst.serialize(payload, "args", None)
payload["kwargs"] = gdst.serialize(payload, "kwargs", None)
result = self._communicator.attribute_call(
self.client_id,
self.ml_model_id,
self.encrypt_compress(payload)
)
return gdst.deserialize(result, "payload", None)
return relayer
================================================
FILE: pymilo/streaming/pymilo_server.py
================================================
# -*- coding: utf-8 -*-
"""PyMiloServer for RESTFull protocol."""
from ..pymilo_obj import Export, Import
from .compressor import Compression
from .encryptor import DummyEncryptor
from .communicator import CommunicationProtocol
from .param import PYMILO_SERVER_NON_EXISTENT_ATTRIBUTE
from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter
class PymiloServer:
"""Facilitate streaming the ML models."""
def __init__(
self,
port=8000,
host="127.0.0.1",
compressor=Compression.NULL,
communication_protocol=CommunicationProtocol.REST,
):
"""
Initialize the Pymilo PymiloServer instance.
:param model: the ML model which will be streamed
:type model: any
:param port: the port to which PyMiloServer listens
:type port: int
:param host: the url to which PyMilo Server listens
:type host: str
:param compressor: the compression method to be used in client-server communications
:type compressor: pymilo.streaming.compressor.Compression
:param communication_protocol: The communication protocol to be used by PymiloServer
:type communication_protocol: pymilo.streaming.communicator.CommunicationProtocol
:return: an instance of the PymiloServer class
"""
self._compressor = compressor.value
self._encryptor = DummyEncryptor()
# In-memory storage (replace with a database for persistence)
self.communicator = communication_protocol.value["SERVER"](ps=self, host=host, port=port)
self._clients = {}
self._allowance = {}
def export_model(self, client_id, ml_model_id):
"""
Export the ML model to string json dump using PyMilo Export class.
:return: str
"""
return Export(self._clients[client_id][ml_model_id]).to_json()
def update_model(self, client_id, ml_model_id, serialized_model):
"""
Update the PyMilo Server's ML model.
:param serialized_model: the json dump of a pymilo export ml model
:type serialized_model: str
:return: None
"""
self._clients[client_id][ml_model_id] = Import(file_adr=None, json_dump=serialized_model).to_model()
def execute_model(self, request):
"""
Execute the request attribute call from PyMilo Client.
:param request: request obj containing requested attribute to call with the associated args and kwargs
:type request: obj
:return: str | dict
"""
gdst = GeneralDataStructureTransporter()
attribute = request["attribute"] if isinstance(request, dict) else request.attribute
_client_id = request["client_id"] if isinstance(request, dict) else request.client_id
_ml_model_id = request["ml_model_id"] if isinstance(request, dict) else request.ml_model_id
_ml_model = self._clients[_client_id][_ml_model_id]
retrieved_attribute = getattr(_ml_model, attribute, None)
if retrieved_attribute is None:
raise Exception(PYMILO_SERVER_NON_EXISTENT_ATTRIBUTE)
arguments = {
'args': request["args"] if isinstance(request, dict) else request.args,
'kwargs': request["kwargs"] if isinstance(request, dict) else request.kwargs,
}
args = gdst.deserialize(arguments, 'args', None)
kwargs = gdst.deserialize(arguments, 'kwargs', None)
output = retrieved_attribute(*args, **kwargs)
if isinstance(output, type(_ml_model)):
self._clients[_client_id][_ml_model_id] = output
return None
return gdst.serialize({'output': output}, 'output', None)
def is_callable_attribute(self, request):
"""
Check whether the requested attribute is callable or not.
:param request: request obj containing requested attribute to check it's type
:type request: obj
:return: True if it is callable False otherwise
"""
attribute = request["attribute"] if isinstance(request, dict) else request.attribute
_client_id = request["client_id"] if isinstance(request, dict) else request.client_id
_ml_model_id = request["ml_model_id"] if isinstance(request, dict) else request.ml_model_id
_ml_model = self._clients[_client_id][_ml_model_id]
retrieved_attribute = getattr(_ml_model, attribute, None)
if callable(retrieved_attribute):
return True, None
else:
return False, GeneralDataStructureTransporter().serialize({'output': retrieved_attribute}, 'output', None)
def _validate_id(self, client_id, ml_model_id):
"""
Validate the provided client ID and machine learning model ID.
:param client_id: The ID of the client to validate.
:type client_id: str
:param ml_model_id: The ID of the machine learning model to validate.
:type ml_model_id: str
:return: A tuple containing a boolean indicating validity and an error message if invalid.
"""
if client_id not in self._clients:
return False, "The given client_id is invalid."
if ml_model_id not in self._clients[client_id]:
return False, "The given client_id is valid but requested ml_model_id is invalid."
return True, None
def init_client(self, client_id):
"""
Initialize a new client with the given client ID.
:param client_id: The ID of the client to initialize.
:type client_id: str
:return: A tuple containing a boolean indicating success and an error message if the client already exists.
"""
if client_id in self._clients:
return False, f"The client with client_id: {client_id} already exists."
self._clients[client_id] = {}
self._allowance[client_id] = {}
return True, None
def remove_client(self, client_id):
"""
Remove an existing client by the given client ID.
:param client_id: The ID of the client to remove.
:type client_id: str
:return: A tuple containing a boolean indicating success and an error message if the client does not exist.
"""
if client_id not in self._clients:
return False, f"The client with client_id: {client_id} doesn't exist."
del self._clients[client_id]
del self._allowance[client_id]
return True, None
def grant_access(self, allower_id, allowee_id, allowed_model_id):
"""
Allow a client to access a specific machine learning model of another client.
:param allower_id: The ID of the client granting access.
:type allower_id: str
:param allowee_id: The ID of the client being granted access.
:type allowee_id: str
:param allowed_model_id: The ID of the machine learning model to be accessed.
:type allowed_model_id: str
:return: A tuple containing a boolean indicating success and an error message if the operation fails.
"""
if allower_id not in self._clients:
return False, f"The allower client with client_id: {allower_id} doesn't exist."
if allowee_id not in self._clients:
return False, f"The allowee client with client_id: {allowee_id} doesn't exist."
if allowed_model_id not in self._clients[allower_id]:
return False, f"The model with ml_model_id: {allowed_model_id} doesn't exist for the allower client with client_id: {allower_id}."
if allowed_model_id in self._allowance.get(allower_id).get(allowee_id, []):
return False, f"The model with ml_model_id: {allowed_model_id} is already allowed for the allowee client with client_id: {allowee_id} by the allower client with client_id: {allower_id}."
if allowee_id not in self._allowance[allower_id]:
self._allowance[allower_id][allowee_id] = [allowed_model_id]
return True, None
self._allowance[allower_id][allowee_id].append(allowed_model_id)
return True, None
def revoke_access(self, allower_id, allowee_id, allowed_model_id=None):
"""
Revoke a client's access to a specific machine learning model of another client.
:param allower_id: The ID of the client revoking access.
:type allower_id: str
:param allowee_id: The ID of the client whose access is being revoked.
:type allowee_id: str
:param allowed_model_id: The ID of the machine learning model whose access is being revoked.
:type allowed_model_id: str
:return: A tuple containing a boolean indicating success and an error message if the operation fails.
"""
if allower_id not in self._clients:
return False, f"The allower client with client_id: {allower_id} doesn't exist."
if allowee_id not in self._clients:
return False, f"The allowee client with client_id: {allowee_id} doesn't exist."
if allowed_model_id is None:
if allowee_id in self._allowance[allower_id]:
del self._allowance[allower_id][allowee_id]
return True, None
if allowed_model_id not in self._clients[allower_id]:
return False, f"The model with ml_model_id: {allowed_model_id} doesn't exist for the allower client with client_id: {allower_id}."
if allowee_id not in self._allowance[allower_id]:
return False, f"The allowee client with client_id: {allowee_id} doesn't have any access granted by the allower client with client_id: {allower_id}."
if allowed_model_id not in self._allowance[allower_id][allowee_id]:
return False, f"The model with ml_model_id: {allowed_model_id} is not allowed for the allowee client with client_id: {allowee_id} by the allower client with client_id: {allower_id}."
self._allowance[allower_id][allowee_id].remove(allowed_model_id)
return True, None
def get_allowed_models(self, allower_id, allowee_id):
"""
Retrieve a list of machine learning model IDs that a client is allowed to access from another client.
:param allower_id: The ID of the client who granted access.
:type allower_id: str
:param allowee_id: The ID of the client who has been granted access.
:type allowee_id: str
:return: A list of allowed machine learning model IDs or an error message if access is not granted.
"""
if allower_id not in self._clients:
return None, f"The allower client with client_id: {allower_id} doesn't exist."
if allowee_id not in self._clients:
return None, f"The allowee client with client_id: {allowee_id} doesn't exist."
return self._allowance.get(allower_id).get(allowee_id, []), None
def get_clients_allowance(self, client_id):
"""
Retrieve the allowance dictionary for a given client.
:param client_id: The ID of the client whose allowance is being retrieved.
:type client_id: str
:return: A dictionary containing the allowance information for the client.
"""
if client_id not in self._allowance:
return None, f"The client with client_id: {client_id} doesn't exist."
return self._allowance[client_id], None
def get_clients(self):
"""
Retrieve a list of all registered client IDs.
:return: A list of client IDs.
"""
return [id for id in self._clients.keys()]
def init_ml_model(self, client_id, ml_model_id):
"""
Initialize a new machine learning model for a given client.
:param client_id: The ID of the client to associate with the model.
:type client_id: str
:param ml_model_id: The ID of the machine learning model to initialize.
:type ml_model_id: str
:return: A tuple containing a boolean indicating success and an error message if the model already exists or the client ID is invalid.
"""
if client_id not in self._clients:
return False, "The given client_id is invalid."
if ml_model_id in self._clients[client_id]:
return False, f"The given ml_model_id: {ml_model_id} already exists within ml models of the client with client_id of {client_id}."
self._clients[client_id][ml_model_id] = {}
return True, None
def set_ml_model(self, client_id, ml_model_id, ml_model):
"""
Set or update the machine learning model for a given client.
:param client_id: The ID of the client.
:type client_id: str
:param ml_model_id: The ID of the machine learning model.
:type ml_model_id: str
:param ml_model: The machine learning model object to be set.
:type ml_model: obj
:return: None
"""
self._clients[client_id][ml_model_id] = ml_model
def remove_ml_model(self, client_id, ml_model_id):
"""
Remove an existing machine learning model for a given client.
:param client_id: The ID of the client.
:type client_id: str
:param ml_model_id: The ID of the machine learning model to remove.
:type ml_model_id: str
:return: A tuple containing a boolean indicating success and an error message if the client ID or model ID is invalid.
"""
if client_id not in self._clients:
return False, "The given client_id is invalid."
if ml_model_id not in self._clients[client_id]:
return False, f"The client with client_id: {client_id} doesn't have any model with ml_model_id of {ml_model_id}."
del self._clients[client_id][ml_model_id]
return True, None
def get_ml_models(self, client_id):
"""
Retrieve a list of all machine learning model IDs associated with a given client.
:param client_id: The ID of the client.
:type client_id: str
:return: A list of machine learning model IDs.
"""
return [id for id in self._clients[client_id].keys()]
================================================
FILE: pymilo/streaming/util.py
================================================
# -*- coding: utf-8 -*-
"""ML Streaming utility module."""
import os
import re
from ..pymilo_param import URL_REGEX
def validate_websocket_url(url: str) -> str:
"""
Validate a WebSocket URL and add the 'ws://' protocol if missing.
:param url: The WebSocket URL to validate.
:type url: str
:return: A tuple where the first element is a boolean indicating whether the URL is valid,
and the second element is the possibly corrected URL (or None if invalid).
"""
pattern = r"^(ws|wss)://"
if not re.match(pattern, url):
protocol = "ws://"
url = protocol + url
full_pattern = r"^(ws|wss)://([a-zA-Z0-9.-]+)(:[0-9]+)?(/.*)?$"
if not re.match(full_pattern, url):
return False, None
return True, url
def validate_http_url(url: str) -> str:
"""
Validate a HTTP URL and add the 'http://' protocol if missing.
:param url: The HTTP URL to validate.
:type url: str
:return: A tuple where the first element is a boolean indicating whether the URL is valid,
and the second element is the possibly corrected URL (or None if invalid).
"""
pattern = r"^(http|https)://"
if not re.match(pattern, url):
protocol = "http://"
url = protocol + url
full_pattern = r"^(http|https)://([a-zA-Z0-9.-]+)(:[0-9]+)?(/.*)?$"
if not re.match(full_pattern, url):
return False, None
return True, url
def generate_dockerfile(
dockerfile_name="Dockerfile",
model_path=None,
compression='NULL',
protocol='REST',
port=8000,
init_model=None,
bare=False
):
"""
Generate a Dockerfile for running a PyMilo server with specified configurations.
:param dockerfile_name: Name of the dockerfile.
:type dockerfile_name: str
:param model_path: Path or URL to the exported model JSON file.
:type model_path: str
:param compression: Compression method (default: NULL).
:type compression: str
:param protocol: Communication protocol (default: REST).
:type protocol: str
:param port: Port for the PyMilo server (default: 8000).
:type port: int
:param init_model: The model that the server initialized with.
:type init_model: boolean
:param bare: A flag that sets if the server runs without an internal ML model.
:type bare: boolean
"""
dockerfile_content = f"""# Use an official Python runtime as a parent image
FROM python:3.11-slim
# Set the working directory in the container
WORKDIR /app
# Install pymilo
RUN pip install pymilo[streaming]
"""
is_url = False
if model_path:
if re.match(URL_REGEX, model_path):
is_url = True
else:
dockerfile_content += f"\nCOPY {os.path.basename(model_path)} /app/model.json"
# Expose the specified port
dockerfile_content += f"\nEXPOSE {port}"
cmd = "CMD [\"python\", \"-m\", \"pymilo\""
cmd += f", \"--compression\", \"{compression}\""
cmd += f", \"--protocol\", \"{protocol}\""
cmd += f", \"--port\", \"{port}\""
if model_path:
if is_url:
cmd += f", \"--load\", \"{model_path}\""
else:
cmd += f", \"--load\", \"/app/model.json\""
elif init_model:
cmd += f", \"--init\" \"{init_model}\""
elif bare:
cmd += f", \"--bare\""
cmd += "]"
dockerfile_content += f"\n{cmd}\n"
with open(dockerfile_name, 'w') as f:
f.write(dockerfile_content)
================================================
FILE: pymilo/transporters/__init__.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo transporters."""
================================================
FILE: pymilo/transporters/adamoptimizer_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Adam optimizer object transporter."""
from .transporter import AbstractTransporter
from ..utils.util import check_str_in_iterable
from sklearn.neural_network._stochastic_optimizers import AdamOptimizer
class AdamOptimizerTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle AdamOptimizer field."""
def serialize(self, data, key, model_type):
"""
Serialize instances of the AdamOptimizer class.
Record the `learning_rate`, `beta_1`, `beta_2` and `epsilon` fields of AdamOptimizer object.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if isinstance(data[key], AdamOptimizer):
optimizer = data[key]
data[key] = {
"pymilo-bypass": True,
'pymilo-adamoptimizer': {
"params": data["coefs_"] + data["intercepts_"],
'type': "AdamOptimizer",
'beta_1': optimizer.beta_1,
'beta_2': optimizer.beta_2,
'epsilon': optimizer.epsilon
}
}
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize the special _optimizer field of the AdamOptimizer.
The associated _optimizer field of the pymilo serialized model, is extracted through
it's previously serialized parameters.
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if check_str_in_iterable("pymilo-adamoptimizer", content):
optimizer = content["pymilo-adamoptimizer"]
if (optimizer["type"] == "AdamOptimizer"):
return AdamOptimizer(
params=optimizer["params"],
beta_1=optimizer['beta_1'],
beta_2=optimizer['beta_2'],
epsilon=optimizer['epsilon'])
else:
return content
else:
return content
================================================
FILE: pymilo/transporters/baseloss_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Base loss transporter."""
# Handle python 3.5 issues.
from .transporter import AbstractTransporter
from ..utils.util import check_str_in_iterable
glm_models = [
'GammaRegressor',
'PoissonRegressor',
'TweedieRegressor'
]
legacy_version = False
try:
from sklearn._loss.loss import BaseLoss
# So the python version is >= 3.8
from sklearn.linear_model._glm import GammaRegressor
from sklearn.linear_model._glm import PoissonRegressor
from sklearn.linear_model._glm import TweedieRegressor
except BaseException: # pragma: no cover
# if all bypasses are true, then we either don't have
# TweedieRegression(3.5) or we have other kind of TweedieRegreesion(3.7)
try:
from sklearn._loss.glm_distribution import (
ExponentialDispersionModel,
TweedieDistribution,
EDM_DISTRIBUTIONS,
)
from sklearn.linear_model._glm.link import (
BaseLink,
IdentityLink,
LogLink,
)
from sklearn.linear_model._glm import GammaRegressor
from sklearn.linear_model._glm import PoissonRegressor
from sklearn.linear_model._glm import TweedieRegressor
legacy_version = True
except BaseException:
# there is no glm models.
pass
class BaseLossTransporter(AbstractTransporter): # pragma: no cover
"""Customized PyMilo Transporter developed to handle BaseLoss field."""
def serialize(self, data, key, model_type):
"""
Serialize the special by-default unserializable BaseLoss field of the Tweedie, Poisson and Gamma regression.
serialize the data[key] of the given model which type is model_type.
basically in order to fully serialize a model, we should traverse over all the keys of its data dictionary and
pass it through the chain of associated transporters to get fully serialized.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which data dictionary is given as the data param
:type model_type: str
:return: pymilo serialized output of data[key]
"""
# bypass when it's not supported
# special legacy mode.
if model_type in glm_models:
if not legacy_version:
# Handling latest GLMs with Loss function of GLMs
if isinstance(data[key], BaseLoss):
if model_type == "TweedieRegressor":
data[key] = {
"power": data["power"],
"link": data["link"],
"pymilo_glm_base_loss": True
}
elif model_type == "PoissonRegressor":
data[key] = {
"pymilo_glm_base_loss": True
# nothing for now.
}
elif model_type == "GammaRegressor":
data[key] = {
"pymilo_glm_base_loss": True
# nothing for now
}
return data[key]
else:
# it's legacy version of GLMs
if key == "_family_instance":
if model_type == "TweedieRegressor":
data["_family_instance"] = {
"family": {
'state': 'not-direct-serializable',
'value': {
'power': data["power"]
}
}
}
elif model_type == "PoissonRegressor":
data["_family_instance"] = {
"family": {
'state': 'direct-serializable',
'value': "poisson"
}
}
elif model_type == "GammaRegressor":
data["_family_instance"] = {
"family": {
'state': 'direct-serializable',
'value': "gamma"
}
}
return data["_family_instance"]
elif key == "_link_instance":
return "sklean-mirror-link"
elif key == "link":
if data[key] in ['auto', 'identity', 'log']:
data[key] = {
'state': 'direct-serializable',
'value': data[key]
}
return data[key]
else:
if isinstance(data[key], LogLink):
data[key] = {
'state': 'not-direct-serializable',
'value': {
'abstract-class': "LogLink"
}
}
return data[key]
elif isinstance(data[key], IdentityLink):
data[key] = {
'state': 'not-direct-serializable',
'value': {
'abstract-class': "IdentityLink"
}
}
return data[key]
else:
# isinstance(data[key], BaseLink) == True
data[key] = {
'state': 'not-direct-serializable',
'value': {
'abstract-class': "BaseLink"
}
}
return data[key]
else:
return data[key]
else:
return data[key]
def get_deserialized_base_loss(self, model_type, content):
"""
Extract the original BaseLoss object out of the associated core data recorded by pymilo.
:param model_type: the model type of the ML model, which data dictionary is given as the data param
:type model_type: str
:param content: the internal data dictionary of the given model
:type content: dict
:return: original BaseLoss field
"""
if model_type == "TweedieRegressor":
if not ("power" in content and "link" in content):
return None # TODO EXCEPTION HANDLING
power, link = content["power"], content["link"]
return TweedieRegressor(power=power, link=link)._get_loss()
elif model_type == "PoissonRegressor":
return PoissonRegressor()._get_loss()
elif model_type == "GammaRegressor":
return GammaRegressor()._get_loss()
else:
return content
def deserialize(self, data, key, model_type):
"""
Deserialize the previously pymilo made serializable BaseLoss field to its original form.
deserialize the special loss_function_ of the SGDClassifier, SGDOneClassSVM, Perceptron and PassiveAggressiveClassifier.
the associated loss_function_ field of the pymilo serialized model, is extracted through the SGDClassifier's _get_loss_function function
with enough feeding of the needed inputs.
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which internal serialized data dictionary is given as the data param.
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
# bypass when it's not supported
# special legacy mode.
if model_type in glm_models:
if not legacy_version:
# latest GLMs or irrelevant models.
content = data[key]
if not check_str_in_iterable(
"pymilo_glm_base_loss", content):
return content
return self.get_deserialized_base_loss(model_type, content)
else:
# it's legacy version of GLMs
if key == "_family_instance":
# family field retrieval...
family = data["_family_instance"]["family"]
if family['state'] == 'direct-serializable':
family = family['value']
else:
family = TweedieDistribution(
power=family['value']['power'])
if isinstance(family, ExponentialDispersionModel):
return family
elif family in EDM_DISTRIBUTIONS:
return EDM_DISTRIBUTIONS[family]()
else:
raise ValueError(
"The family must be an instance of class"
" ExponentialDispersionModel or an element of"
" ['normal', 'poisson', 'gamma', 'inverse-gaussian']"
"; got (family={0})".format(family))
elif key == "link":
if data[key]['state'] == 'direct-serializable':
data[key] = data[key]['value']
return data[key]
else:
innerMap = {
'LogLink': LogLink,
'IdentityLink': IdentityLink,
'BaseLink': BaseLink
}
return innerMap[data[key]['value']['abstract-class']]()
elif key == "_link_instance":
# make sure it has been deserialized.
try:
data["link"] = self.deserialize(
data, "link", model_type)
except BaseException:
# it has been serialized.
pass
if isinstance(data["link"], BaseLink):
return data["link"]
else:
if data["link"] == "auto":
if isinstance(
data["_family_instance"],
TweedieDistribution):
if data["_family_instance"].power() <= 0:
return IdentityLink()
if data["_family_instance"].power() >= 1:
return LogLink()
else:
raise ValueError(
"No default link known for the "
"specified distribution family. Please "
"set link manually, i.e. not to 'auto'; "
"got (link='auto', family={})".format(
data["family"]))
elif data["link"] == "identity":
return IdentityLink()
elif data["link"] == "log":
return LogLink()
else:
raise ValueError(
"The link must be an instance of class Link or "
"an element of ['auto', 'identity', 'log']; "
"got (link={0})".format(
data["link"]))
else:
return data[key]
else:
return data[key]
================================================
FILE: pymilo/transporters/binmapper_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo BinMapper transporter."""
from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
from ..utils.util import is_primitive, check_str_in_iterable
from .transporter import AbstractTransporter
from .general_data_structure_transporter import GeneralDataStructureTransporter
class BinMapperTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle _BinMapper objects."""
def serialize(self, data, key, model_type):
"""
Serialize _BinMapper object[useful in HistGradientBoosting(Regressor,Classifier)].
serialize the data[key] of the given model which type is model_type.
basically in order to fully serialize a model, we should traverse over all the keys of its data dictionary and
pass it through the chain of associated transporters to get fully serialized.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which data dictionary is given as the data param
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if isinstance(data[key], _BinMapper):
binMapper = data[key]
_data = binMapper.__dict__
gdst = GeneralDataStructureTransporter()
for _key in _data:
_data[_key] = gdst.serialize(_data, _key, model_type + ":_BinMapper")
return {
"pymilo-bypass": True,
"pymilo-binmapper": {
"__dict__": _data
}
}
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize previously pymilo serialized _BinMapper object[useful in HistGradientBoosting(Regressor,Classifier)].
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which internal serialized data dictionary is given as the data param
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if is_primitive(content) or content is None:
return content
if check_str_in_iterable("pymilo-binmapper", content):
__dict__ = content["pymilo-binmapper"]["__dict__"]
binMapper = _BinMapper()
gdst = GeneralDataStructureTransporter()
for key in __dict__:
__dict__[key] = gdst.deserialize(__dict__, key, model_type + ":_BinMapper")
for key in __dict__:
setattr(binMapper, key, __dict__[key])
return binMapper
return content
================================================
FILE: pymilo/transporters/bisecting_tree_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo BisectingTree(sklearn.cluster._bisect_k_means) object transporter."""
from ..pymilo_param import SKLEARN_CLUSTERING_TABLE, NOT_SUPPORTED
from .transporter import AbstractTransporter
from .general_data_structure_transporter import GeneralDataStructureTransporter
from ..utils.util import check_str_in_iterable
bisecting_tree_support = SKLEARN_CLUSTERING_TABLE["BisectingKMeans"] != NOT_SUPPORTED
if bisecting_tree_support:
from sklearn.cluster._bisect_k_means import _BisectingTree
class BisectingTreeTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle BisectingTree object."""
def serialize(self, data, key, model_type):
"""
Serialize instances of the BisectingTree class.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if bisecting_tree_support and isinstance(data[key], _BisectingTree):
data[key] = self.serialize_bisecting_tree(data[key], GeneralDataStructureTransporter())
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize _BisectingTree fields of the Decision Trees.
The associated tree_ field of the pymilo serialized model, is extracted through
it's previously serialized parameters.
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated JSON file of the ML model generated by pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if bisecting_tree_support and is_pymilo_serialized_bisecting_tree(content):
return self.deserialize_bisecting_tree(content, GeneralDataStructureTransporter())
else:
return content
def serialize_bisecting_tree(self, bisecting_tree, gdst=None):
"""
Serialize the bisecting_tree object recursively.
:param bisecting_tree: the bisecting_tree object which is going to get serialized.
:type bisecting_tree: dict
:param gdst: an instance of GeneralDataStructureTransporter class.
:type gdst: GeneralDataStructureTransporter
:return: pymilo-serialized bisecting_tree
"""
if (gdst is None):
gdst = GeneralDataStructureTransporter()
data = bisecting_tree.__dict__
data["pymilo_model_type"] = "_BisectingTree"
for key, value in data.items():
if (isinstance(value, _BisectingTree)):
data[key] = self.serialize_bisecting_tree(value, gdst)
else:
data[key] = gdst.serialize(data, key, str(_BisectingTree))
return data
def deserialize_bisecting_tree(self, bisecting_tree_obj, gdst=None):
"""
Deserialize the bisecting_tree object recursively.
:param bisecting_tree_obj: the bisecting_tree object which is going to get deserialized.
:type bisecting_tree_obj: dict
:param gdst: an instance of GeneralDataStructureTransporter class.
:type gdst: GeneralDataStructureTransporter
:return: _BisectingTree object generated from bisecting_tree_obj
"""
if (gdst is None):
gdst = GeneralDataStructureTransporter()
data = bisecting_tree_obj
for key, value in data.items():
if is_pymilo_serialized_bisecting_tree(value):
data[key] = self.deserialize_bisecting_tree(value, gdst)
else:
data[key] = gdst.deserialize(data, key, str(_BisectingTree))
center = data["center"]
indices = data["indices"]
score = data["score"]
reconstructed_bisecting_tree = _BisectingTree(center, indices, score)
for item in data.keys():
setattr(reconstructed_bisecting_tree, item, data[item])
return reconstructed_bisecting_tree
def is_pymilo_serialized_bisecting_tree(psbt):
"""
Check whether the given object is a bisecting tree which is serialized by pymilo.
:param psbt: the pymilo serialized bisecting tree object
:type psbt: dict
:return: boolean
"""
return (
check_str_in_iterable("pymilo_model_type", psbt) and
psbt["pymilo_model_type"] == "_BisectingTree"
)
================================================
FILE: pymilo/transporters/bunch_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Bunch transporter."""
from .transporter import AbstractTransporter
from ..utils.util import check_str_in_iterable
bunch_support = False
try:
from sklearn.utils._bunch import Bunch
bunch_support = True
except BaseException:
pass
class BunchTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle Bunch objects."""
def serialize(self, data, key, model_type):
"""
Serialize Bunch object.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if bunch_support and isinstance(data[key], Bunch):
bunch = data[key]
_dict = {}
for key, value in bunch.items():
_dict[key] = value
return {
"pymilo-bypass": True,
"pymilo-bunch": _dict,
}
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize previously pymilo serialized Bunch object.
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if bunch_support and check_str_in_iterable("pymilo-bunch", content):
bunch = Bunch()
for key, value in content["pymilo-bunch"].items():
bunch[key] = value
return bunch
else:
return content
================================================
FILE: pymilo/transporters/cfnode_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo CFnode(from sklearn.cluster._birch) object transporter."""
from sklearn.cluster._birch import _CFNode
from sklearn.cluster._birch import _CFSubcluster
from .transporter import AbstractTransporter
from .general_data_structure_transporter import GeneralDataStructureTransporter
from ..utils.util import has_named_parameter, check_str_in_iterable
class CFNodeTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle CFnode object."""
def __init__(self):
"""
Initialize the CFNodeTransporter instance.
:return: an instance of the CFNodeTransporter class
"""
self.all_cfnodes = set()
self.retrieved_cfnodes = {}
def reset(self):
"""
Reset the CFNodeTransporter's internal data structures.
:return: None
"""
self.all_cfnodes = set()
self.retrieved_cfnodes = {}
def serialize(self, data, key, model_type):
"""
Serialize data[key] if it is an instance of _CFNode.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if isinstance(data[key], _CFNode):
data[key] = self.serialize_cfnode(data[key], GeneralDataStructureTransporter())
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize data[key] if it is a pymilo serialized _CFNode object.
:param data: the internal data dictionary of the associated JSON file of the ML model generated by pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if check_str_in_iterable("pymilo_model_type", content) and content["pymilo_model_type"] == "_CFNode":
return self.deserialize_cfnode(content, GeneralDataStructureTransporter())
else:
return content
def serialize_cfnode(self, cfnode, gdst):
"""
Serialize given _CFnode instance recursively.
:param cfnode: given _CFnode object to get serialized
:type cfnode: sklearn.cluster._birch._CFNode
:param gdst: an instance of GeneralDataStructureTransporter class
:type gdst: pymilo.transporters.general_data_structure_transporter.GeneralDataStructureTransporter
:return: dict
"""
data = cfnode.__dict__
cfnode_id = self.get_cfnode_id(cfnode)
data["pymilo_cfnode_id"] = cfnode_id
data["pymilo_model_type"] = "_CFNode"
self.all_cfnodes.add(cfnode_id)
for key, value in data.items():
if (isinstance(value, _CFNode)):
value_id = self.get_cfnode_id(value)
data[key] = {
"pymilo_model_type": "_CFNode",
"pymilo_cfnode_value": "PYMILO_CFNODE_RECURSION" if value_id in self.all_cfnodes else self.serialize_cfnode(
value,
gdst),
"pymilo_cfnode_id": value_id,
}
elif (isinstance(value, list) and key == "subclusters_"):
if len(value) > 0:
if isinstance(value[0], _CFSubcluster):
data[key] = {"pymilo_model_type": "_CFSubcluster", "pymilo_subclusters_value": [
self.serialize_cfsubcluster(cf_subcluster, gdst) for cf_subcluster in value], }
else:
data[key] = gdst.serialize(data, key, str(_CFNode))
return data
def deserialize_cfnode(self, cfnode_pymiloed_obj, gdst):
"""
Deserialize given serialized object of _CFnode class recursively.
:param cfnode_pymiloed_obj: given serialized _CFnode object to get deserialized
:type cfnode_pymiloed_obj: obj
:param gdst: an instance of GeneralDataStructureTransporter class
:type gdst: pymilo.transporters.general_data_structure_transporter.GeneralDataStructureTransporter
:return: sklearn.cluster._birch._CFNode
"""
# this object is a previously pymiloed cfnode object
if not cfnode_pymiloed_obj["pymilo_cfnode_id"] in self.retrieved_cfnodes.keys():
self.retrieved_cfnodes[cfnode_pymiloed_obj["pymilo_cfnode_id"]] = self.get_base_cfnode(cfnode_pymiloed_obj)
current_cfnode = self.retrieved_cfnodes[cfnode_pymiloed_obj["pymilo_cfnode_id"]]
# init non left, right and subcluster.
for key, value in cfnode_pymiloed_obj.items():
if check_str_in_iterable("pymilo_model_type", value) and value["pymilo_model_type"] == "_CFNode":
if value["pymilo_cfnode_id"] in self.retrieved_cfnodes.keys():
cfnode_pymiloed_obj[key] = self.retrieved_cfnodes[value["pymilo_cfnode_id"]]
# case of recursion.
else:
new_cfnode = self.deserialize_cfnode(value["pymilo_cfnode_value"], gdst)
self.retrieved_cfnodes[value["pymilo_cfnode_id"]] = new_cfnode
cfnode_pymiloed_obj[key] = new_cfnode
elif isinstance(value, dict) and "pymilo_model_type" in value and value["pymilo_model_type"] == "_CFSubcluster":
# has a >0 length subclusters_ fields.
cfnode_pymiloed_obj[key] = [self.deserialize_cfsubcluster(
subcluster, gdst) for subcluster in value["pymilo_subclusters_value"]]
else:
cfnode_pymiloed_obj[key] = gdst.deserialize(cfnode_pymiloed_obj, key, str(_CFNode))
for key, value in cfnode_pymiloed_obj.items():
setattr(current_cfnode, key, value)
return current_cfnode
def serialize_cfsubcluster(self, cfsubcluster, gdst):
"""
Serialize given _CFSubcluster instance.
:param cfsubcluster: given _CFSubcluster object to get serialized
:type cfsubcluster: sklearn.cluster._birch._CFSubcluster
:param gdst: an instance of GeneralDataStructureTransporter class
:type gdst: pymilo.transporters.general_data_structure_transporter.GeneralDataStructureTransporter
:return: dict
"""
data = cfsubcluster.__dict__
for key, value in data.items():
if (isinstance(value, _CFNode)):
data[key] = self.serialize_cfnode(value, gdst)
else:
data[key] = gdst.serialize(data, key, str(_CFSubcluster))
return data
def deserialize_cfsubcluster(self, cfsubcluster_pymiloed_obj, gdst):
"""
Deserialize given serialized object of _CFSubcluster class recursively.
:param cfsubcluster_pymiloed_obj: given serialized _CFSubcluster object to get deserialized
:type cfsubcluster_pymiloed_obj: obj
:param gdst: an instance of GeneralDataStructureTransporter class
:type gdst: pymilo.transporters.general_data_structure_transporter.GeneralDataStructureTransporter
:return: sklearn.cluster._birch._CFSubcluster
"""
for key, value in cfsubcluster_pymiloed_obj.items():
if check_str_in_iterable("pymilo_model_type", value) and value["pymilo_model_type"] == "_CFNode":
cfsubcluster_pymiloed_obj[key] = self.deserialize_cfnode(value["pymilo_cfnode_value"], gdst)
else:
cfsubcluster_pymiloed_obj[key] = gdst.deserialize(cfsubcluster_pymiloed_obj, key, str(_CFSubcluster))
subcluster_instance = _CFSubcluster()
for key, value in cfsubcluster_pymiloed_obj.items():
setattr(subcluster_instance, key, value)
return subcluster_instance
def get_cfnode_id(self, cfnode):
"""
Create a unique id for the given cfnode.
:param cfnode: given _CFnode object to generate it's id.
:type cfnode: sklearn.cluster._birch._CFNode
:return: str
"""
if not isinstance(cfnode, _CFNode):
return "None"
else:
return str(cfnode).split(" at ")[1][:-1]
def get_base_cfnode(self, cfnode_pymiloed_obj):
"""
Create a basic _CFNode instance from constructor parameters existing in cfnode_pymiloed_obj.
:param cfnode_pymiloed_obj: given serialized _CFnode object to generate it's basic _CFNode instance
:type cfnode_pymiloed_obj: sklearn.cluster._birch._CFNode
:return: _CFNode
"""
threshold = cfnode_pymiloed_obj["threshold"]
branching_factor = cfnode_pymiloed_obj["branching_factor"]
is_leaf = cfnode_pymiloed_obj["is_leaf"]
n_features = cfnode_pymiloed_obj["n_features"]
dtype = GeneralDataStructureTransporter().list_to_ndarray(cfnode_pymiloed_obj["init_centroids_"]).dtype
if has_named_parameter(_CFNode.__init__, "dtype"):
return _CFNode(
threshold=threshold,
branching_factor=branching_factor,
is_leaf=is_leaf,
n_features=n_features,
dtype=dtype,
)
else:
return _CFNode(
threshold=threshold,
branching_factor=branching_factor,
is_leaf=is_leaf,
n_features=n_features,
)
================================================
FILE: pymilo/transporters/compose_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Compose transporter."""
from ..utils.util import check_str_in_iterable
from .transporter import AbstractTransporter
from .general_data_structure_transporter import GeneralDataStructureTransporter
from .preprocessing_transporter import PreprocessingTransporter
from .feature_extraction_transporter import FeatureExtractorTransporter
from ..chains.util import serialize_possible_ml_model, deserialize_possible_ml_model
COMPOSE_CHAIN = {
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
"PreprocessingTransporter": PreprocessingTransporter(),
"FeatureExtractorTransporter": FeatureExtractorTransporter(),
}
class ComposeTransporter(AbstractTransporter):
"""Compose object dedicated Transporter."""
def is_compose_internal_model(self, internal_model):
"""
Check whether the given content is a nested estimator/module.
This is used to detect objects that should be (de)serialized when found inside
compose models (e.g., inside ColumnTransformer.transformers).
:param internal_model: given object
:type internal_model: any
:return: bool
"""
pt = COMPOSE_CHAIN["PreprocessingTransporter"]
fe = COMPOSE_CHAIN["FeatureExtractorTransporter"]
if isinstance(internal_model, dict):
return (
check_str_in_iterable("pymilo-inner-model-type", internal_model) or
pt.is_preprocessing_module(internal_model) or
fe.is_fe_module(internal_model)
)
return (
self._is_ml_model(internal_model) or
pt.is_preprocessing_module(internal_model) or
fe.is_fe_module(internal_model)
)
def _is_ml_model(self, obj):
"""
Check if the object is an ML model that needs serialization.
:param obj: given object
:type obj: any
:return: bool
"""
return hasattr(obj, 'fit') and (hasattr(obj, 'predict') or hasattr(obj, 'transform'))
def serialize_compose_internal_model(self, internal_model):
"""
Serialize internal model of compose objects.
:param internal_model: given sklearn internal model
:type internal_model: sklearn model or function
:return: pymilo serialized internal_model
"""
# Handle preprocessing modules
pt = COMPOSE_CHAIN["PreprocessingTransporter"]
if pt.is_preprocessing_module(internal_model):
return pt.serialize_pre_module(internal_model)
# Handle feature extraction modules
fe = COMPOSE_CHAIN["FeatureExtractorTransporter"]
if fe.is_fe_module(internal_model):
return fe.serialize_fe_module(internal_model)
# Handle ML models (including compose/ensemble/concrete estimators) using the project-wide schema.
if self._is_ml_model(internal_model):
has_ml_model, result = serialize_possible_ml_model(internal_model)
if has_ml_model:
return result
return internal_model
def deserialize_compose_internal_model(self, serialized_internal_model):
"""
Deserialize internal model of compose objects.
:param serialized_internal_model: serialized internal model(by pymilo)
:type serialized_internal_model: dict
:return: retrieved associated sklearn internal model
"""
# Preprocessing / feature extraction modules
pt = COMPOSE_CHAIN["PreprocessingTransporter"]
if pt.is_preprocessing_module(serialized_internal_model):
return pt.deserialize_pre_module(serialized_internal_model)
fe = COMPOSE_CHAIN["FeatureExtractorTransporter"]
if fe.is_fe_module(serialized_internal_model):
return fe.deserialize_fe_module(serialized_internal_model)
# Project-wide nested-model schema
has_ml_model, result = deserialize_possible_ml_model(serialized_internal_model)
if has_ml_model:
return result
return serialized_internal_model
def _serialize_nested(self, obj):
"""
Recursively serialize nested structures containing internal models.
:param obj: object to serialize (dict, list, tuple, or internal model)
:type obj: any
:return: serialized object with internal models converted to pymilo format
"""
if isinstance(obj, dict):
return {k: self._serialize_nested(v) for k, v in obj.items()}
if isinstance(obj, list):
return [self._serialize_nested(v) for v in obj]
if isinstance(obj, tuple):
return tuple(self._serialize_nested(v) for v in obj)
if self.is_compose_internal_model(obj):
return self.serialize_compose_internal_model(obj)
return obj
def _deserialize_nested(self, obj):
"""
Recursively deserialize nested structures containing serialized internal models.
:param obj: object to deserialize (dict, list, tuple, or serialized internal model)
:type obj: any
:return: deserialized object with internal models restored to sklearn objects
"""
if isinstance(obj, dict):
# Try leaf deserialization first (nested model schemas are dicts)
if self.is_compose_internal_model(obj):
return self.deserialize_compose_internal_model(obj)
return {k: self._deserialize_nested(v) for k, v in obj.items()}
if isinstance(obj, list):
return [self._deserialize_nested(v) for v in obj]
if isinstance(obj, tuple):
return tuple(self._deserialize_nested(v) for v in obj)
return obj
def serialize(self, data, key, model_type):
"""
Serialize Compose object.
Serialize the data[key] of the given model which type is model_type.
To fully serialize a model, we should traverse over all the keys of its data dictionary and
pass it through the chain of associated transporters to get fully serialized.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which data dictionary is given as the data param
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if isinstance(data[key], (dict, list, tuple)):
return self._serialize_nested(data[key])
if self.is_compose_internal_model(data[key]):
return self.serialize_compose_internal_model(data[key])
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize previously pymilo serialized compose object.
Deserialize the data[key] of the given model which type is model_type.
To fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which internal serialized data dictionary is given as the data param
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
if isinstance(data[key], (dict, list, tuple)):
return self._deserialize_nested(data[key])
if self.is_compose_internal_model(data[key]):
return self.deserialize_compose_internal_model(data[key])
return data[key]
================================================
FILE: pymilo/transporters/feature_extraction_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Feature Extraction transporter."""
from scipy.sparse import csr_matrix
from ..pymilo_param import SKLEARN_FEATURE_EXTRACTION_TABLE
from ..utils.util import check_str_in_iterable, get_sklearn_type
from .transporter import AbstractTransporter, Command
from .general_data_structure_transporter import GeneralDataStructureTransporter
from .randomstate_transporter import RandomStateTransporter
FEATURE_EXTRACTION_CHAIN = {
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
"RandomStateTransporter": RandomStateTransporter(),
}
class FeatureExtractorTransporter(AbstractTransporter):
"""Feature Extractor object dedicated Transporter."""
def serialize(self, data, key, model_type):
"""
Serialize Feature Extractor object.
serialize the data[key] of the given model which type is model_type.
basically in order to fully serialize a model, we should traverse over all the keys of its data dictionary and
pass it through the chain of associated transporters to get fully serialized.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which data dictionary is given as the data param
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if self.is_fe_module(data[key]):
return self.serialize_fe_module(data[key])
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize previously pymilo serialized feature extraction object.
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which internal serialized data dictionary is given as the data param
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if self.is_fe_module(content):
return self.deserialize_fe_module(content)
return content
def is_fe_module(self, fe_module):
"""
Check whether the given module is a sklearn Feature Extraction module or not.
:param fe_module: given object
:type fe_module: any
:return: bool
"""
if isinstance(fe_module, dict):
return check_str_in_iterable(
"pymilo-feature_extraction-type",
fe_module) and fe_module["pymilo-feature_extraction-type"] in SKLEARN_FEATURE_EXTRACTION_TABLE
return get_sklearn_type(fe_module) in SKLEARN_FEATURE_EXTRACTION_TABLE
def serialize_fe_module(self, fe_module):
"""
Serialize Feature Extraction object.
:param fe_module: given sklearn feature extraction module
:type fe_module: sklearn.feature_extraction
:return: pymilo serialized fe_module
"""
# add one depth inner preprocessing module population
for key, value in fe_module.__dict__.items():
if self.is_fe_module(value):
fe_module.__dict__[key] = self.serialize_fe_module(value)
elif isinstance(value, csr_matrix):
fe_module.__dict__[key] = {
"pymilo-bypass": True,
"pymilo-csr_matrix": FEATURE_EXTRACTION_CHAIN["GeneralDataStructureTransporter"].serialize_dict(
value.__dict__
)
}
for transporter in FEATURE_EXTRACTION_CHAIN:
FEATURE_EXTRACTION_CHAIN[transporter].transport(
fe_module, Command.SERIALIZE)
return {
"pymilo-bypass": True,
"pymilo-feature_extraction-type": get_sklearn_type(fe_module),
"pymilo-feature_extraction-data": fe_module.__dict__
}
def deserialize_fe_module(self, serialized_fe_module):
"""
Deserialize Feature Extraction object.
:param serialized_fe_module: serializezd feature extraction module(by pymilo)
:type serialized_fe_module: dict
:return: retrieved associated sklearn.feature_extraction module
"""
data = serialized_fe_module["pymilo-feature_extraction-data"]
associated_type = SKLEARN_FEATURE_EXTRACTION_TABLE[serialized_fe_module["pymilo-feature_extraction-type"]]
retrieved_fe_module = associated_type()
for key in data:
# add one depth inner feature extraction module population
if self.is_fe_module(data[key]):
data[key] = self.deserialize_fe_module(data[key])
elif check_str_in_iterable("pymilo-csr_matrix", data[key]):
csr_matrix_dict = FEATURE_EXTRACTION_CHAIN["GeneralDataStructureTransporter"].get_deserialized_dict(
data[key]["pymilo-csr_matrix"])
cm = csr_matrix(csr_matrix_dict['_shape'])
for _key in csr_matrix_dict:
setattr(cm, _key, csr_matrix_dict[_key])
data[key] = cm
for transporter in FEATURE_EXTRACTION_CHAIN:
data[key] = FEATURE_EXTRACTION_CHAIN[transporter].deserialize(data, key, "")
for key in data:
setattr(retrieved_fe_module, key, data[key])
return retrieved_fe_module
================================================
FILE: pymilo/transporters/function_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Function transporter."""
from ..utils.util import import_function, check_str_in_iterable
from .transporter import AbstractTransporter
from types import FunctionType, BuiltinFunctionType
from numpy import ufunc
array_function_dispatcher_support = False
try:
from numpy._core._multiarray_umath import _ArrayFunctionDispatcher
array_function_dispatcher_support = True
except ImportError:
try:
from numpy.core._multiarray_umath import _ArrayFunctionDispatcher
array_function_dispatcher_support = True
except ImportError:
pass
class FunctionTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle function field transportation."""
def serialize(self, data, key, model_type):
"""
Serialize Function type fields.
Record associated function's name and it's parent module in order to retrieve it accordingly.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if isinstance(data[key], ufunc):
function = data[key]
data[key] = {
"function_name": function.__name__,
"function_module": "numpy",
}
return data[key]
elif isinstance(data[key], (FunctionType, BuiltinFunctionType)) or (
array_function_dispatcher_support and
isinstance(data[key], _ArrayFunctionDispatcher)):
function = data[key]
data[key] = {
"function_name": function.__name__,
"function_module": function.__module__,
}
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize serialized function objects back to it's original function type object.
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if check_str_in_iterable("function_name", content):
return import_function(
content["function_module"],
content["function_name"]
)
else:
return content
================================================
FILE: pymilo/transporters/general_data_structure_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo GeneralDataStructure transporter."""
import numpy as np
from ast import literal_eval
from ..pymilo_param import NUMPY_TYPE_DICT
from ..utils.util import get_homogeneous_type, all_same, prefix_list
from ..utils.util import is_primitive, check_str_in_iterable
from .transporter import AbstractTransporter
class GeneralDataStructureTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle fields with general datastructures."""
def _is_remainder_cols_list(self, obj):
"""
Check whether the given object is sklearn's internal _RemainderColsList.
:param obj: given object
:type obj: any
:return: bool
"""
try:
return (
obj.__class__.__name__ == "_RemainderColsList" and
"_column_transformer" in obj.__class__.__module__
)
except Exception:
return False
def serialize_tuple(self, tuple_field):
"""
Check for non-serializable fields in tuple and serialize them.
1. Serialize inner np.ndarray fields in tuple
2. Serialize inner slice fields in tuple
3. Convert sklearn's _RemainderColsList to list
:param tuple_field: given tuple
:type tuple_field: tuple
:return: serializable tuple
"""
new_tuple = tuple()
for item in tuple_field:
if (isinstance(item, np.ndarray)):
new_tuple += (self.deep_serialize_ndarray(item),)
elif self._is_remainder_cols_list(item):
new_tuple += (list(item),)
elif isinstance(item, slice):
new_tuple += ({
"pymilo-slice": True,
"start": item.start,
"stop": item.stop,
"step": item.step,
},)
else:
new_tuple += (item,)
return {
"pymilo-tuple": new_tuple,
}
# dict serializer for Logistic regression CV
def serialize_dict(self, dictionary):
"""
Make all the fields of the given dictionary serializable.
1. Changing ndarray values to list,
2. save unserializable values of numpy.int32|int64 types in an serializable custom object form.
:param dictionary: given dictionary
:type dictionary: dict
:return: fully serializable dictionary
"""
black_list_key_values = []
for key in dictionary:
# check inner field as a np.ndarray
if isinstance(dictionary[key], np.ndarray):
dictionary[key] = self.deep_serialize_ndarray(dictionary[key])
# check inner field as sklearn's _RemainderColsList
if self._is_remainder_cols_list(dictionary[key]):
dictionary[key] = list(dictionary[key])
# check inner field as a slice
if isinstance(dictionary[key], slice):
dictionary[key] = {
"pymilo-slice": True,
"start": dictionary[key].start,
"stop": dictionary[key].stop,
"step": dictionary[key].step,
}
# check inner field as np.int32
if isinstance(key, np.int32):
new_value = {
"np-type": "numpy.int32",
"key-value": dictionary[key]
}
black_list_key_values.append([key, new_value])
if isinstance(key, np.int64):
new_value = {
"np-type": "numpy.int64",
"key-value": dictionary[key]
}
black_list_key_values.append([key, new_value])
for black_key_value in black_list_key_values:
prev_key = black_key_value[0]
new_value = black_key_value[1]
del dictionary[prev_key]
dictionary[int(prev_key)] = new_value
return dictionary
def serialize(self, data, key, model_type):
"""
Serialize the general datastructures.
1. handling numpy infinity(which is an issue in ransac model)
2. unserializable type numpy.int32
3. unserializable type numpy.int64
4. list type which may contain unserializable type numpy.int32|int64
5. object of unserializable numpy.ndarray class
6. dictionary serialization
7. tuple serialization
Serialize the data[key] of the given model which its type is model_type.
basically in order to fully serialize a model, we should traverse over all the keys of its data dictionary and
pass it through the chain of associated transporters to get fully serialized.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which its data dictionary is given as the data param.
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if not (isinstance(data[key], object) or isinstance(data[key], str)):
if np.isnan(data[key]): # throws exception on object & str types
data[key] = {
"np-type": "numpy.nan",
"value": "NaN"
}
elif isinstance(data[key], type):
raw_type = str(data[key])
raw_type = "numpy" + str(raw_type).split("numpy")[-1][:-2]
if raw_type in NUMPY_TYPE_DICT.keys():
data[key] = {
"np-type": "numpy.dtype",
"value": raw_type
}
# 1. Handling numpy infinity, ransac
elif isinstance(data[key], np.float64):
if np.inf == data[key]:
data[key] = {
"np-type": "numpy.infinity",
"value": "infinite" # added for compatibility
}
else:
data[key] = {"value": data[key], "np-type": "numpy.float64"}
elif isinstance(data[key], np.intc):
data[key] = {"value": int(data[key]), "np-type": "numpy.intc"}
elif isinstance(data[key], np.int32):
data[key] = {"value": int(data[key]), "np-type": "numpy.int32"}
elif isinstance(data[key], np.int64):
data[key] = {"value": int(data[key]), "np-type": "numpy.int64"}
elif isinstance(data[key], np.uint64):
data[key] = {"value": int(data[key]), "np-type": "numpy.uint64"}
elif isinstance(data[key], list):
new_list = []
for item in data[key]:
if isinstance(item, np.int32):
new_list.append(
{"value": int(item), "np-type": "numpy.int32"})
elif isinstance(item, np.int64):
new_list.append(
{"value": int(item), "np-type": "numpy.int64"})
elif isinstance(item, np.ndarray):
new_list.append(self.deep_serialize_ndarray(item))
elif isinstance(item, tuple):
new_list.append(self.serialize_tuple(item))
elif self._is_remainder_cols_list(item):
new_list.append(list(item))
elif isinstance(item, slice):
new_list.append({
"pymilo-slice": True,
"start": item.start,
"stop": item.stop,
"step": item.step
})
else:
new_list.append(item)
data[key] = new_list
elif isinstance(data[key], np.ndarray):
data[key] = self.deep_serialize_ndarray(data[key])
elif isinstance(data[key], set):
data[key] = {
"pymilo-set": list(data[key])
}
elif isinstance(data[key], dict):
data[key] = self.serialize_dict(data[key])
elif isinstance(data[key], slice):
data[key] = {
"pymilo-slice": True,
"start": data[key].start,
"stop": data[key].stop,
"step": data[key].step
}
elif isinstance(data[key], tuple):
data[key] = self.serialize_tuple(data[key])
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize the general datastructures.
1. Dictionary deserialization
2. Deep conversion of lists to numpy.ndarray class
3. Convert custom serializable object of np.int32|int64 to the main np.int32|int64 type
deserialize the special loss_function_ of the SGDClassifier, SGDOneClassSVM, Perceptron and PassiveAggressiveClassifier.
the associated loss_function_ field of the pymilo serialized model, is extracted through the SGDClassifier's _get_loss_function function
with enough feeding of the needed inputs.
deserialize the data[key] of the given model which its type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which its internal serialized data dictionary is given as the data param.
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
if isinstance(data[key], dict):
if 'pymilo-bypass' in data[key]:
return data[key]
else:
return self.get_deserialized_dict(data[key])
elif isinstance(data[key], list):
return self.get_deserialized_list(data[key])
elif isinstance(data[key], dict) and data[key].get("pymilo-slice", False):
return slice(data[key]["start"], data[key]["stop"], data[key]["step"])
elif self.is_numpy_primary_type(data[key]):
return self.get_deserialized_regular_primary_types(data[key])
else:
# TODO
return data[key]
def get_deserialized_dict(self, content):
"""
Deserialize the given previously made serializable dictionary.
1. convert numpy types values which previously made serializable to its original form
2. deep conversion of list values to nd arrays
It is mainly used in serializing/deserializing the "scores_" field in Logistic regression([+CV]).
:param content: given dictionary
:type content: dict
:return: the original dictionary
"""
black_list_key_values = []
if not isinstance(content, dict):
return content
if check_str_in_iterable("pymilo-tuple", content):
return tuple(self.get_deserialized_list(content["pymilo-tuple"]))
if check_str_in_iterable("pymilo-set", content):
return set(self.get_deserialized_list(content["pymilo-set"]))
if check_str_in_iterable("pymilo-slice", content):
return slice(content["start"], content["stop"], content["step"])
if self.is_deserialized_ndarray(content):
return self.deep_deserialize_ndarray(content)
if check_str_in_iterable("np-type", content) and check_str_in_iterable("value", content):
return self.get_deserialized_regular_primary_types(content)
for key in content:
if isinstance(content[key], dict):
content[key] = self.get_deserialized_dict(content[key])
elif isinstance(content[key], list):
new_list = []
for item in content[key]:
if self.is_deserialized_ndarray(item):
new_list.append(self.deep_deserialize_ndarray(item))
else:
new_list.append(self.deserialize_primitive_type(item))
content[key] = new_list
if check_str_in_iterable(
"np-type", content[key]):
new_key = NUMPY_TYPE_DICT[content[key]["np-type"]](key)
new_value = content[key]["key-value"]
black_list_key_values.append([key, new_key, new_value])
for black_key_value in black_list_key_values:
prev_key, new_key, new_value = black_key_value
del content[prev_key]
content[new_key] = new_value
return content
def get_deserialized_list(self, content):
"""
Deserialize the given list to its original form.
1. convert previously made serializable numpy types to its original form
2. convert list to nd array
It is mainly used in serializing/deserializing the "active_" array field in Lasso Lars.
:param content: given list to get
:type content: list
:return: the original list
"""
new_list = []
for item in content:
if isinstance(item, dict) and check_str_in_iterable("pymilo-tuple", item):
new_list.append(tuple(self.get_deserialized_list(item["pymilo-tuple"])))
elif self.is_deserialized_ndarray(item):
new_list.append(self.deep_deserialize_ndarray(item))
elif isinstance(item, dict) and item.get("pymilo-slice", False):
new_list.append(slice(item["start"], item["stop"], item["step"]))
else:
new_list.append(self.deserialize_primitive_type(item))
return new_list
def get_deserialized_regular_primary_types(self, content):
"""
Deserialize the given item to its original form.
1. handling np.int32 type
2. handling np.int64 type
3. handling np.infinity type
:param content: given item needed to get back to its original form
:type content: object
:return: the associated np.int32|np.int64|np.inf
"""
if "np-type" in content:
if content["np-type"] == "numpy.dtype":
if isinstance(content["value"], str):
# when the value is the associated type name like numpy.float64
return NUMPY_TYPE_DICT[content["value"]]
else:
return NUMPY_TYPE_DICT[content["np-type"]](NUMPY_TYPE_DICT[content['value']])
if content["np-type"] == "numpy.nan":
return NUMPY_TYPE_DICT[content["np-type"]]
return NUMPY_TYPE_DICT[content["np-type"]](content['value'])
def is_numpy_primary_type(self, content):
"""
Check whether the given object is a numpy primary type.
:type content: given object to get checked whether it is a numpy primary type or not
:return: boolean representing whether the associated content is a numpy primary type or not
"""
if is_primitive(content):
return False
current_supported_primary_types = NUMPY_TYPE_DICT.values()
if check_str_in_iterable("np-type", content) and content["np-type"] in current_supported_primary_types:
return True
else:
return False
def ndarray_to_list(self, ndarray_item):
"""
Convert the given ndarray to its fully listed format.
1. convert itself to a list
2. iterate over it's elements and apply ndarray to list conversion if it's eligible
:param ndarray_item: given ndarray needed to get converted to it's fully listed form
:type ndarray_item: numpy.ndarray
:return: list
"""
if isinstance(ndarray_item, np.ndarray):
listed_ndarray = ndarray_item.tolist()
new_list = []
for item in listed_ndarray:
new_list.append(self.ndarray_to_list(item))
return new_list
else:
return ndarray_item
def list_to_ndarray(self, list_item):
"""
Convert the given list to its fully ndarray format.
1. iterate over it's elements and apply list to ndarray conversion if it's eligible
2. convert the coarse-grained list to ndarray
:param list_item: given list needed to get converted to it's np.ndarray form
:type list_item: list
:return: numpy.ndarray
"""
if isinstance(list_item, list):
if len(list_item) == 0:
return np.asarray(list_item)
new_list = []
for item in list_item:
new_list.append(self.list_to_ndarray(item))
is_homogeneous_type, the_homogeneous_type = get_homogeneous_type(
new_list)
if is_homogeneous_type:
if the_homogeneous_type in [int, float, str, bool]:
return np.asarray(new_list)
elif the_homogeneous_type == np.ndarray:
is_homogeneous_type, _ = get_homogeneous_type(
[x.dtype for x in new_list])
if (is_homogeneous_type):
if all_same([len(x) for x in new_list]):
try:
return np.asarray(new_list)
except Exception as _:
# when we have a list of ndarrays with different shapes.
return new_list
return np.asarray(new_list, dtype=object)
else:
return self.deserialize_primitive_type(list_item)
def deserialize_primitive_type(self, primitive):
"""
Deserialize the given primitive data type.
:param primitive: given primitive needed to get deserialized to it's pure primitive form
:type primitive: pure python primitive or dict
:return: pure python primitive or numpy primitive data type
"""
if is_primitive(primitive):
return primitive
elif check_str_in_iterable("np-type", primitive):
return self.get_deserialized_regular_primary_types(primitive)
else:
return primitive
def deep_serialize_ndarray(self, ndarray):
"""
Serialize the given ndarray.
:param ndarray_item: given ndarray needed to get serialized to
:type ndarray_item: numpy.ndarray
:return: dict
"""
if (not (isinstance(ndarray, np.ndarray))):
return None # throw error
listed_ndarray = ndarray.tolist()
dtype = ndarray.dtype
new_list = []
for item in listed_ndarray:
if isinstance(item, np.ndarray):
new_list.append(self.deep_serialize_ndarray(item))
else:
new_list.append(item)
return {
'pymiloed-ndarray-list': new_list,
'pymiloed-ndarray-dtype': str(dtype),
'pymiloed-ndarray-shape': ndarray.shape,
'pymiloed-data-structure': 'numpy.ndarray'
}
def is_deserialized_ndarray(self, deserialized_ndarray):
"""
Check whether the given input is a previously pymilo-deserialized ndarray.
:param deserialized_ndarray: given input to get checked
:type deserialized_ndarray: obj
:return: bool
"""
if not (isinstance(deserialized_ndarray, dict)):
return False
if not (
'pymiloed-data-structure' in deserialized_ndarray and deserialized_ndarray['pymiloed-data-structure'] == 'numpy.ndarray'):
return False
return True
def deep_deserialize_ndarray(self, deserialized_ndarray):
"""
Deserialize the given deserialized_ndarray to its fully ndarray format.
:param deserialized_ndarray: given deserialized_ndarray needed to get deserialized to it's np.ndarray form
:type deserialized_ndarray: dict
:return: numpy.ndarray
"""
if not self.is_deserialized_ndarray(deserialized_ndarray):
return None # throw error
inner_list = deserialized_ndarray['pymiloed-ndarray-list']
dtype = deserialized_ndarray['pymiloed-ndarray-dtype']
shape = deserialized_ndarray['pymiloed-ndarray-shape']
if dtype.startswith("["):
dtype = literal_eval(dtype)
new_list = []
for item in inner_list:
if self.is_deserialized_ndarray(item):
new_list.append(self.deep_deserialize_ndarray(item))
else:
if len(shape) == 1:
# shape in form if [int] so inner items should not be list.
# convert each inner item to tuple(if it a list)
if isinstance(item, list):
new_list.append(tuple(item))
else:
new_list.append(item)
else:
new_list.append(item)
pre_result = np.asarray(new_list, dtype=dtype)
if dtype == "object" and hasattr(new_list[0], "dtype"):
# check if inner items have specific dtype.
pre_result = np.asarray(new_list)
if not prefix_list(list(pre_result.shape), shape):
return pre_result.reshape(shape)
return pre_result
================================================
FILE: pymilo/transporters/generator_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Generator transporter."""
from numpy.random._generator import Generator
from ..utils.util import is_primitive, check_str_in_iterable
from .transporter import AbstractTransporter
from numpy.random import default_rng
class GeneratorTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle Generator objects."""
def serialize(self, data, key, model_type):
"""
Serialize Generator object.
serialize the data[key] of the given model which type is model_type.
basically in order to fully serialize a model, we should traverse over all the keys of its data dictionary and
pass it through the chain of associated transporters to get fully serialized.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which data dictionary is given as the data param
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if isinstance(data[key], Generator):
generator = data[key]
data[key] = {
"pymilo-bypass": True,
"pymilo-generator": {
"state": generator.__getstate__()
}
}
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize previously pymilo serialized Generator object.
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which internal serialized data dictionary is given as the data param
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if is_primitive(content) or content is None:
return content
if check_str_in_iterable("pymilo-generator", content):
serialized_generator = content["pymilo-generator"]
generator = default_rng(0)
generator.__setstate__(serialized_generator["state"])
return generator
return content
================================================
FILE: pymilo/transporters/lossfunction_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Loss function transporter."""
from .transporter import AbstractTransporter
from ..utils.util import is_primitive, check_str_in_iterable
from sklearn.linear_model._stochastic_gradient import SGDClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingClassifier
from ..pymilo_param import SKLEARN_ENSEMBLE_TABLE, NOT_SUPPORTED
if SKLEARN_ENSEMBLE_TABLE["HistGradientBoostingRegressor"] != NOT_SUPPORTED:
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier
loss_function_dict = {}
sklearn_baseloss_support = False
try:
from sklearn._loss.loss import BaseLoss
sklearn_baseloss_support = True
loss_function_dict["sklearn._loss.loss.BaseLoss"] = BaseLoss
except BaseException:
pass
hist_baseloss_support = False
try:
from sklearn.ensemble._hist_gradient_boosting.loss import BaseLoss
hist_baseloss_support = True
loss_function_dict["sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss"] = BaseLoss
except BaseException:
pass
gb_losses_support = False
try:
from sklearn.ensemble._gb_losses import LossFunction
gb_losses_support = True
loss_function_dict["sklearn.ensemble._gb_losses.LossFunction"] = LossFunction
except BaseException:
pass
class LossFunctionTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle Loss function field."""
def serialize(self, data, key, model_type):
"""
Serialize the special loss_function_ of the SGDClassifier, SGDOneClassSVM, Perceptron and PassiveAggressiveClassifier.
serialize the data[key] of the given model which type is model_type.
basically in order to fully serialize a model, we should traverse over all the keys of its data dictionary and
pass it through the chain of associated transporters to get fully serialized.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which data dictionary is given as the data param
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if (
(model_type == "SGDClassifier" and (key == "loss_function_" or key == "_loss_function_")) or
(model_type == "SGDOneClassSVM" and (key == "loss_function_" or key == "_loss_function_")) or
(model_type == "Perceptron" and (key == "loss_function_" or key == "_loss_function_")) or
(model_type == "PassiveAggressiveClassifier" and (key == "loss_function_" or key == "_loss_function_"))
):
data[key] = {
"pymilo-sgd-loss": data["loss"]
}
if sklearn_baseloss_support:
if isinstance(data[key], BaseLoss):
if (
model_type == "GradientBoostingRegressor" or
model_type == "GradientBoostingClassifier"):
data[key] = {
"pymilo-ensemble-loss": {
"loss-library": "sklearn._loss.loss.BaseLoss",
"loss": data["loss"],
"constant_hessian": data[key].__dict__["constant_hessian"],
"n_classes": data[key].__dict__["n_classes"],
"alpha": data["alpha"],
"model_type": model_type,
}
}
elif (
model_type == "HistGradientBoostingRegressor" or
model_type == "HistGradientBoostingClassifier"):
data[key] = {
"pymilo-ensemble-loss": {
"loss-library": "sklearn._loss.loss.BaseLoss",
"loss": data["loss"],
"constant_hessian": data[key].__dict__["constant_hessian"],
"n_trees_per_iteration_": data["n_trees_per_iteration_"],
"model_type": model_type,
}
}
if gb_losses_support:
if isinstance(data[key], LossFunction):
if model_type == "GradientBoostingRegressor":
data[key] = {
"pymilo-ensemble-loss": {
"loss-library": "sklearn.ensemble._gb_losses.LossFunction",
"loss": data["loss"],
"alpha": data["alpha"],
"model_type": model_type,
}
}
elif model_type == "GradientBoostingClassifier":
data[key] = {
"pymilo-ensemble-loss": {
"loss-library": "sklearn.ensemble._gb_losses.LossFunction",
"len(classes_)": len(data["classes_"]),
"loss": data["loss"],
"n_classes_": data["n_classes_"],
"model_type": model_type,
}
}
if hist_baseloss_support:
if isinstance(data[key], BaseLoss):
if (
model_type == "HistGradientBoostingRegressor" or
model_type == "HistGradientBoostingClassifier"):
data[key] = {
"pymilo-ensemble-loss": {
"loss-library": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss",
"loss": data["loss"],
"hessians_are_constant": data[key].__dict__["hessians_are_constant"],
"n_threads": data[key].__dict__["n_threads"],
"n_trees_per_iteration_": data["n_trees_per_iteration_"],
"model_type": model_type,
}
}
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize the special loss_function_ of the SGDClassifier, SGDOneClassSVM, Perceptron and PassiveAggressiveClassifier.
the associated loss_function_ field of the pymilo serialized model, is extracted through
the SGDClassifier's _get_loss_function function with enough feeding of the needed inputs.
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which internal serialized data dictionary is given as the data param
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if is_primitive(content) or content is None:
return content
if check_str_in_iterable("pymilo-sgd-loss", content):
return SGDClassifier(
loss=content["pymilo-sgd-loss"])._get_loss_function(
content["pymilo-sgd-loss"])
if check_str_in_iterable("pymilo-ensemble-loss", content):
ensemble_loss = content["pymilo-ensemble-loss"]
model_type = ensemble_loss["model_type"]
loss_type = ensemble_loss["loss-library"]
if loss_type == "sklearn._loss.loss.BaseLoss":
sample_weight = None if ensemble_loss["constant_hessian"] else True
if model_type == "GradientBoostingRegressor":
return GradientBoostingRegressor(
loss=ensemble_loss["loss"],
alpha=ensemble_loss["alpha"])._get_loss(sample_weight)
elif model_type == "GradientBoostingClassifier":
gbs = GradientBoostingClassifier(loss=ensemble_loss["loss"])
gbs.__dict__["n_classes_"] = ensemble_loss["n_classes"]
return gbs._get_loss(sample_weight)
elif model_type == "HistGradientBoostingRegressor":
return HistGradientBoostingRegressor(
loss=ensemble_loss["loss"])._get_loss(sample_weight)
elif model_type == "HistGradientBoostingClassifier":
n_trees_per_iteration_ = ensemble_loss["n_trees_per_iteration_"]
hgbc = HistGradientBoostingClassifier()
hgbc.__dict__["n_trees_per_iteration_"] = n_trees_per_iteration_
return hgbc._get_loss(sample_weight)
elif loss_type == "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss" and model_type in ["HistGradientBoostingRegressor", "HistGradientBoostingClassifier"]:
sample_weight = None if ensemble_loss["hessians_are_constant"] else True
if model_type == "HistGradientBoostingRegressor":
return HistGradientBoostingRegressor(
loss=ensemble_loss["loss"])._get_loss(sample_weight, ensemble_loss["n_threads"])
elif model_type == "HistGradientBoostingClassifier":
n_trees_per_iteration_ = ensemble_loss["n_trees_per_iteration_"]
hgbc = HistGradientBoostingClassifier(loss=ensemble_loss["loss"])
hgbc.__dict__["n_trees_per_iteration_"] = n_trees_per_iteration_
return hgbc._get_loss(sample_weight, ensemble_loss["n_threads"])
elif loss_type == "sklearn.ensemble._gb_losses.LossFunction" and model_type in ["GradientBoostingRegressor", "GradientBoostingClassifier"]:
from sklearn.ensemble._gb_losses import MultinomialDeviance, BinomialDeviance, LOSS_FUNCTIONS
if ensemble_loss["loss"] in ["deviance", "log_loss"]:
loss_class = (
MultinomialDeviance
if ensemble_loss["len(classes_)"] > 2
else BinomialDeviance
)
else:
loss_class = LOSS_FUNCTIONS[ensemble_loss["loss"]]
if model_type == "GradientBoostingRegressor":
if ensemble_loss["loss"] in ("huber", "quantile"):
return loss_class(ensemble_loss["alpha"])
else:
return loss_class()
elif model_type == "GradientBoostingClassifier":
return loss_class(ensemble_loss["n_classes_"])
return content
================================================
FILE: pymilo/transporters/neighbors_tree_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Tree(from sklearn.tree._tree) object transporter."""
from sklearn.neighbors._kd_tree import KDTree
from sklearn.neighbors._ball_tree import BallTree
from .transporter import AbstractTransporter
type_to_tree = {
"KDTree": KDTree,
"BallTree": BallTree
}
class NeighborsTreeTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle (pyi,pyx) NeighborsTreeTransporter object."""
def serialize(self, data, key, model_type):
"""
Serialize the special _tree field of the Neighbors model.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if isinstance(data[key], KDTree):
data[key] = {
'pymilo-bypass': True,
'pymilo-tree-type': "KDTree",
}
elif isinstance(data[key], BallTree):
data[key] = {
'pymilo-bypass': True,
'pymilo-tree-type': "BallTree",
}
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize the special _tree field of the Neighbors model.
:param data: the internal data dictionary of the associated JSON file of the ML model generated by pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if (key == "_tree" and content is not None and
(model_type == "KNeighborsRegressor" or
model_type == "KNeighborsClassifier" or
model_type == "RadiusNeighborsRegressor" or
model_type == "RadiusNeighborsClassifier" or
model_type == "NearestNeighbors" or
model_type == "NearestCentroid"
)):
_tree = type_to_tree[content["pymilo-tree-type"]](
data["_fit_X"],
data["leaf_size"],
data["effective_metric_"],
**data["effective_metric_params_"],
)
return _tree
else:
return content
================================================
FILE: pymilo/transporters/preprocessing_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Preprocessing transporter."""
from ..pymilo_param import SKLEARN_PREPROCESSING_TABLE
from ..utils.util import check_str_in_iterable, get_sklearn_type
from .transporter import AbstractTransporter, Command
from .general_data_structure_transporter import GeneralDataStructureTransporter
from .function_transporter import FunctionTransporter
from scipy.interpolate._bsplines import BSpline
PREPROCESSING_CHAIN = {
"GeneralDataStructureTransporter": GeneralDataStructureTransporter(),
"FunctionTransporter": FunctionTransporter(),
}
class PreprocessingTransporter(AbstractTransporter):
"""Preprocessing object dedicated Transporter."""
def serialize(self, data, key, model_type):
"""
Serialize Preprocessing object.
serialize the data[key] of the given model which type is model_type.
basically in order to fully serialize a model, we should traverse over all the keys of its data dictionary and
pass it through the chain of associated transporters to get fully serialized.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which data dictionary is given as the data param
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if self.is_preprocessing_module(data[key]):
return self.serialize_pre_module(data[key])
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize previously pymilo serialized preprocessing object.
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which internal serialized data dictionary is given as the data param
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if self.is_preprocessing_module(content):
return self.deserialize_pre_module(content)
return content
def is_preprocessing_module(self, pre_module):
"""
Check whether the given module is a sklearn Preprocessing module or not.
:param pre_module: given object
:type pre_module: any
:return: bool
"""
if isinstance(pre_module, dict):
return check_str_in_iterable(
"pymilo-preprocessing-type",
pre_module) and pre_module["pymilo-preprocessing-type"] in SKLEARN_PREPROCESSING_TABLE.keys()
return get_sklearn_type(pre_module) in SKLEARN_PREPROCESSING_TABLE.keys()
def serialize_pre_module(self, pre_module):
"""
Serialize Preprocessing object.
:param pre_module: given sklearn preprocessing module
:type pre_module: sklearn.preprocessing
:return: pymilo serialized pre_module
"""
# add one depth inner preprocessing module population
for key, value in pre_module.__dict__.items():
if self.is_preprocessing_module(value):
pre_module.__dict__[key] = self.serialize_pre_module(value)
elif isinstance(value, BSpline):
pre_module.__dict__[key] = self.serialize_spline(value)
elif isinstance(value, list):
if len(value) > 0 and isinstance(value[0], BSpline):
pre_module.__dict__[key] = [self.serialize_spline(bspline) for bspline in value]
for transporter in PREPROCESSING_CHAIN:
PREPROCESSING_CHAIN[transporter].transport(
pre_module, Command.SERIALIZE)
return {
"pymilo-bypass": True,
"pymilo-preprocessing-type": get_sklearn_type(pre_module),
"pymilo-preprocessing-data": pre_module.__dict__
}
def deserialize_pre_module(self, serialized_pre_module):
"""
Deserialize Preprocessing object.
:param serialized_pre_module: serializezd preprocessing module(by pymilo)
:type serialized_pre_module: dict
:return: retrieved associated sklearn.preprocessing module
"""
data = serialized_pre_module["pymilo-preprocessing-data"]
associated_type = SKLEARN_PREPROCESSING_TABLE[serialized_pre_module["pymilo-preprocessing-type"]]
retrieved_pre_module = associated_type()
for key in data:
# add one depth inner preprocessing module population
if self.is_preprocessing_module(data[key]):
data[key] = self.deserialize_pre_module(data[key])
# check inner field is BSpline
if self.is_bspline(data[key]):
data[key] = self.deserialize_spline(data[key])
# check inner field is [BSpline]
if isinstance(data[key], list) and len(data[key]) > 0 and self.is_bspline(data[key][0]):
data[key] = [self.deserialize_spline(serialized_bspline) for serialized_bspline in data[key]]
for transporter in PREPROCESSING_CHAIN:
data[key] = PREPROCESSING_CHAIN[transporter].deserialize(data, key, "")
for key in data:
setattr(retrieved_pre_module, key, data[key])
return retrieved_pre_module
def is_bspline(self, bspline):
"""
Check whether the given module is a scipy.interpolate._bsplines.BSpline or not.
:param bspline: given object
:type bspline: any
:return: bool
"""
if isinstance(bspline, dict):
return check_str_in_iterable(
"pymilo-preprocessing-type",
bspline) and bspline["pymilo-preprocessing-type"] == "BSpline"
return get_sklearn_type(bspline) == "BSpline"
def serialize_spline(self, bspline):
"""
Serialize scipy.interpolate._bsplines.BSpline object.
:param bspline: given scipy.interpolate._bsplines.BSpline module
:type bspline: scipy.interpolate._bsplines.BSpline
:return: pymilo serialized bspline
"""
for transporter in PREPROCESSING_CHAIN:
PREPROCESSING_CHAIN[transporter].transport(bspline, Command.SERIALIZE)
return {
"pymilo-bypass": True,
"pymilo-preprocessing-type": get_sklearn_type(bspline),
"pymilo-preprocessing-data": bspline.__dict__
}
def deserialize_spline(self, serialized_bspline):
"""
Deserialize BSpline object.
:param serialized_bspline: serialized BSpline object(by pymilo)
:type serialized_bspline: dict
:return: retrieved associated scipy.interpolate._bsplines.BSpline object
"""
data = serialized_bspline["pymilo-preprocessing-data"]
associated_type = BSpline
for key in list(data.keys()):
for transporter in PREPROCESSING_CHAIN:
data[key] = PREPROCESSING_CHAIN[transporter].deserialize(data, key, "")
# Handle both old scipy (t, c, k) and new scipy (_t, _c, _k) attribute names
t = data.get("t", data.get("_t"))
c = data.get("c", data.get("_c"))
k = data.get("k", data.get("_k"))
retrieved_pre_module = associated_type(t=t, k=k, c=c)
for key in data:
setattr(retrieved_pre_module, key, data[key])
return retrieved_pre_module
================================================
FILE: pymilo/transporters/randomstate_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo RandomState(MT19937) object transporter."""
import numpy as np
from .transporter import AbstractTransporter
from ..utils.util import check_str_in_iterable
class RandomStateTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle RandomState field."""
def serialize(self, data, key, model_type):
"""
Serialize instances of the RandomState class.
Record the `state` associated fields of random state object.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if isinstance(data[key], np.random.RandomState):
inner_random_state = data[key]
data[key] = {
"pymilo-bypass": True,
"pymilo-randomstate": (
inner_random_state.get_state()[0],
inner_random_state.get_state()[1].tolist(),
inner_random_state.get_state()[2],
inner_random_state.get_state()[3],
inner_random_state.get_state()[4],
),
}
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize the previously serialized RandomState object.
The associated _random_state field of the pymilo serialized NN model, is extracted through
it's previously serialized parameters.
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if check_str_in_iterable("pymilo-randomstate", content):
inner_random_state = content["pymilo-randomstate"]
inner_random_state = (
inner_random_state[0],
np.array(inner_random_state[1]),
inner_random_state[2],
inner_random_state[3],
inner_random_state[4],
)
_random_state = np.random.RandomState()
_random_state.set_state(inner_random_state)
return _random_state
else:
return content
================================================
FILE: pymilo/transporters/sgdoptimizer_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo SGDOptimizer object transporter."""
from .transporter import AbstractTransporter
from ..utils.util import check_str_in_iterable
from sklearn.neural_network._stochastic_optimizers import SGDOptimizer
class SGDOptimizerTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle SGDOptimizer field."""
def serialize(self, data, key, model_type):
"""
Serialize instances of the SGDOptimizer class.
Record the `learning_rate`, `momentum`, `decay` and `nesterov` fields of random state object.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if isinstance(data[key], SGDOptimizer):
optimizer = data[key]
data[key] = {
"pymilo-bypass": True,
'pymilo-sgdoptimizer': {
'type': "SGDOptimizer",
'learning_rate': optimizer.learning_rate,
'momentum': optimizer.momentum,
'decay': optimizer.decay,
'nesterov': optimizer.nesterov
}
}
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize the special _optimizer field of the SGDOptimizer.
The associated _optimizer field of the pymilo serialized model, is extracted through
it's previously serialized parameters.
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if check_str_in_iterable('pymilo-sgdoptimizer', content):
optimizer = content['pymilo-sgdoptimizer']
if (optimizer["type"] == "SGDOptimizer"):
return SGDOptimizer(
learning_rate=optimizer['learning_rate'],
momentum=optimizer['momentum'],
decay=optimizer['decay'],
nesterov=optimizer['nesterov'])
else:
return content
else:
return content
================================================
FILE: pymilo/transporters/transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Transporter."""
from ..utils.util import get_sklearn_type
from abc import ABC, abstractmethod
from enum import Enum
from ..utils.util import is_primitive, check_str_in_iterable
class Command(Enum):
"""Command is an enum class used to determine the type of transportation."""
SERIALIZE = 1
DESERIALIZE = 2
class Transporter(ABC):
"""
Transporter Interface.
Each Transporter transports(either serializes or deserializes) the input according to the given command.
"""
@abstractmethod
def serialize(self, data, key, model_type):
"""
Serialize the data[key] of the given model which type is model_type.
basically in order to fully serialize a model, we should traverse over all the keys of its data dictionary and
pass it through the chain of associated transporters to get fully serialized.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which data dictionary is given as the data param
:type model_type: str
:return: pymilo serialized output of data[key]
"""
@abstractmethod
def deserialize(self, data, key, model_type):
"""
Deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model
which is generated previously by pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which internal serialized data dictionary is given as the data param
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
@abstractmethod
def bypass(self, content):
"""
Determine whether to bypass transporting on this content or not.
:param content: either a ML model object's internal data dictionary(.__dict__) or an object associated with the json string of a pymilo serialized ML model.
:type content: object
:return: boolean, whether to bypass or not
"""
@abstractmethod
def reset(self):
"""
Reset internal data structures of the transport object.
Some Transporters may be stateful and have internal data structures getting filled during transportation.
:return: None
"""
@abstractmethod
def transport(self, request, command):
"""
Either serializes or deserializes the request according to the given command.
basically in order to fully transport a request, we should traverse over all the keys of its internal data dictionary and
pass it through the chain of associated transporters to get fully transported.
:param request: either a ML model object itself(when command is serialize) or
an object associated with the json string of a pymilo serialized ML model(when command is deserialize)
:type request: object
:param command: determines the type of transportation, it can be either Serialize or Deserialize
:type command: Command class
:return: pymilo transported output of data[key]
"""
class AbstractTransporter(Transporter):
"""Abstract Transporter with the implementation of the traversing through the given input according to the associated command."""
def bypass(self, content):
"""
Determine whether to bypass transporting on this content or not.
:param content: either a ML model object's internal data dictionary or an object associated with the json string of a pymilo serialized ML model.
:type content: object
:return: boolean, whether to bypass or not
"""
if is_primitive(content):
return False
if check_str_in_iterable("pymilo-bypass", content):
return content["pymilo-bypass"]
else:
return False
def transport(self, request, command, is_inner_model=False):
"""
Either serializes or deserializes the request according to the given command.
basically in order to fully transport a request, we should traverse over all the keys of its internal data dictionary and
pass it through the chain of associated transporters to get fully transported.
:param request: either a ML model object itself(when command is serialize) or an object associated with the json string of a pymilo serialized ML model(when command is deserialize)
:type request: object
:param command: determines the type of transportation, it can be either Serialize or Deserialize
:type command: Command class
:param is_inner_model: determines whether it is an inner linear model of a super ml model
:type is_inner_model: boolean
:return: pymilo transported output of data[key]
"""
if command == Command.SERIALIZE:
# request is a sklearn model
data = request.__dict__
for key in data:
if self.bypass(data[key]):
continue # by-pass!!
data[key] = self.serialize(
data, key, get_sklearn_type(request))
self.reset()
elif command == Command.DESERIALIZE:
# request is a pymilo-created import object
data = None
model_type = None
if is_inner_model:
data = request["data"]
model_type = request["type"]
else:
data = request.data
model_type = request.type
for key in data:
data[key] = self.deserialize(data, key, model_type)
self.reset()
return
else:
# TODO error handling.
return None
def reset(self):
"""
Reset internal data structures of the transport object.
Some Transporters may be stateful and have internal data structures getting filled during transportation.
:return: None
"""
return
================================================
FILE: pymilo/transporters/tree_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo Tree(from sklearn.tree._tree) object transporter."""
import numpy as np
from sklearn.tree._tree import Tree
from ..pymilo_param import NUMPY_TYPE_DICT
from .transporter import AbstractTransporter
from ..utils.util import check_str_in_iterable
from .general_data_structure_transporter import GeneralDataStructureTransporter
class TreeTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle (pyi,pyx) Tree object."""
def serialize(self, data, key, model_type):
"""
Serialize instances of the Tree class by recording the n_features, n_classes and n_outputs fields of tree object.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if isinstance(data[key], Tree):
gdst = GeneralDataStructureTransporter()
tree = data[key]
tree_inner_state = tree.__getstate__()
data[key] = {
'pymilo-bypass': True,
'pymilo-tree': {
'internal_state': {
"max_depth": tree_inner_state["max_depth"],
"node_count": tree_inner_state["node_count"],
"nodes": {
"types": [str(np.dtype(i).name) for i in tree_inner_state["nodes"][0]],
"field-names": list(tree_inner_state["nodes"][0].dtype.names),
"values": [node.tolist() for node in tree_inner_state["nodes"]],
},
"values": gdst.ndarray_to_list(tree_inner_state["values"]),
},
'n_features': tree.n_features,
'n_classes': gdst.ndarray_to_list(tree.n_classes),
'n_outputs': tree.n_outputs,
}
}
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize the special tree_ field of the Decision Trees.
The associated tree_ field of the pymilo serialized model, is extracted through
it's previously serialized parameters.
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated JSON file of the ML model generated by pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if check_str_in_iterable('pymilo-tree', content):
gdst = GeneralDataStructureTransporter()
tree_params = content['pymilo-tree']
tree_internal_state = tree_params["internal_state"]
nodes_dtype_spec = []
for idx, node_type in enumerate(tree_internal_state["nodes"]["types"]):
nodes_dtype_spec.append(
(tree_internal_state["nodes"]["field-names"][idx], NUMPY_TYPE_DICT["numpy." + node_type]))
nodes = [tuple(node)
for node in tree_internal_state["nodes"]["values"]]
nodes = np.array(nodes, dtype=nodes_dtype_spec)
tree_internal_state = {
"max_depth": tree_internal_state["max_depth"],
"node_count": tree_internal_state["node_count"],
"nodes": nodes,
"values": gdst.list_to_ndarray(tree_internal_state["values"]),
}
n_classes = np.ndarray(
shape=(np.intp(len(tree_params["n_classes"])),), dtype=np.intp)
for idx, _ in enumerate(n_classes):
n_classes[idx] = tree_params["n_classes"][idx]
_tree = Tree(
tree_params["n_features"],
n_classes,
tree_params["n_outputs"]
)
_tree.__setstate__(tree_internal_state)
return _tree
else:
return content
================================================
FILE: pymilo/transporters/treepredictor_transporter.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo TreePredictor transporter."""
from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor
from ..utils.util import check_str_in_iterable
from .transporter import AbstractTransporter
from .general_data_structure_transporter import GeneralDataStructureTransporter
class TreePredictorTransporter(AbstractTransporter):
"""Customized PyMilo Transporter developed to handle TreePredictor objects."""
def serialize(self, data, key, model_type):
"""
Serialize TreePredictor object[useful in HistGradientBoosting(Regressor,Classifier)].
serialize the data[key] of the given model which type is model_type.
basically in order to fully serialize a model, we should traverse over all the keys of its data dictionary and
pass it through the chain of associated transporters to get fully serialized.
:param data: the internal data dictionary of the given model
:type data: dict
:param key: the special key of the data param, which we're going to serialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which data dictionary is given as the data param
:type model_type: str
:return: pymilo serialized output of data[key]
"""
if isinstance(data[key], TreePredictor):
return self.serialize_tree_predictor(data[key])
elif isinstance(data[key], list):
return self.serialize_possible_inner_tree_predictor(data[key])
return data[key]
def deserialize(self, data, key, model_type):
"""
Deserialize previously pymilo serialized TreePredictor object[useful in HistGradientBoosting(Regressor,Classifier)].
deserialize the data[key] of the given model which type is model_type.
basically in order to fully deserialize a model, we should traverse over all the keys of its serialized data dictionary and
pass it through the chain of associated transporters to get fully deserialized.
:param data: the internal data dictionary of the associated json file of the ML model which is generated previously by
pymilo export.
:type data: dict
:param key: the special key of the data param, which we're going to deserialize its value(data[key])
:type key: object
:param model_type: the model type of the ML model, which internal serialized data dictionary is given as the data param
:type model_type: str
:return: pymilo deserialized output of data[key]
"""
content = data[key]
if self.is_serialized_treepredictor(content):
return self.deserialize_tree_predictor(content)
if isinstance(content, list):
return self.deserialize_possible_inner_tree_predictor(content)
return content
def is_treepredictor(self, treepredictor):
"""
Check if the given object is an instance of TreePredictor class.
:param treepredictor: given object to check
:type treepredictor: any
:return: bool
"""
return isinstance(treepredictor, TreePredictor)
def is_serialized_treepredictor(self, serialized_treepredictor):
"""
Check if the given object is a previously pymilo-serialized TreePredictor.
:param serialized_treepredictor: given object to check
:type serialized_treepredictor: any
:return: bool
"""
return check_str_in_iterable(
"pymiloed-data-structure",
serialized_treepredictor) and serialized_treepredictor["pymiloed-data-structure"] == "TreePredictor"
def serialize_tree_predictor(self, treepredictor):
"""
Serialize given Treepredictor instance.
:param treepredictor: given treepredictor to get serialized
:type treepredictor: Treepredictor
:return: dict
"""
gdst = GeneralDataStructureTransporter()
return {
"pymilo-bypass": True,
"pymiloed-data-structure": 'TreePredictor',
"pymiloed-data": {
"nodes": gdst.deep_serialize_ndarray(treepredictor.nodes),
"binned_left_cat_bitsets": gdst.deep_serialize_ndarray(treepredictor.binned_left_cat_bitsets),
"raw_left_cat_bitsets": gdst.deep_serialize_ndarray(treepredictor.raw_left_cat_bitsets),
},
}
def deserialize_tree_predictor(self, serialized_tree_predictor):
"""
Deserialize to pure Treepredictor object.
:param serialized_tree_predictor: pymilo-serialized treepredictor
:type serialized_tree_predictor: dict
:return: Treepredictor
"""
gdst = GeneralDataStructureTransporter()
nodes = serialized_tree_predictor["pymiloed-data"]["nodes"]["pymiloed-ndarray-list"]
for idx, value in enumerate(nodes):
nodes[idx] = tuple(value)
binned_left_cat_bitsets = serialized_tree_predictor["pymiloed-data"]["binned_left_cat_bitsets"]
raw_left_cat_bitsets = serialized_tree_predictor["pymiloed-data"]["raw_left_cat_bitsets"]
return TreePredictor(
nodes=gdst.deep_deserialize_ndarray(
serialized_tree_predictor["pymiloed-data"]["nodes"]),
binned_left_cat_bitsets=gdst.deep_deserialize_ndarray(
binned_left_cat_bitsets),
raw_left_cat_bitsets=gdst.deep_deserialize_ndarray(
raw_left_cat_bitsets)
)
def serialize_possible_inner_tree_predictor(self, _list):
"""
Traverse over list and serialize Treepredictor objects.
:param _list: given list to serialize inner Treepredictor objects
:type _list: list
:return: list
"""
for idx, value in enumerate(_list):
if self.is_treepredictor(value):
_list[idx] = self.serialize_tree_predictor(value)
if isinstance(value, list):
_list[idx] = self.serialize_possible_inner_tree_predictor(value)
return _list
def deserialize_possible_inner_tree_predictor(self, _list):
"""
Traverse over list and deserialize previously pymilo-serialized Treepredictor objects.
:param _list: given list to deserialize inner Treepredictor objects
:type _list: list
:return: list
"""
for idx, value in enumerate(_list):
if self.is_serialized_treepredictor(value):
_list[idx] = self.deserialize_tree_predictor(value)
if isinstance(value, list):
_list[idx] = self.deserialize_possible_inner_tree_predictor(value)
return _list
================================================
FILE: pymilo/utils/__init__.py
================================================
# -*- coding: utf-8 -*-
"""PyMilo utils."""
================================================
FILE: pymilo/utils/data_exporter.py
================================================
# -*- coding: utf-8 -*-
"""data exporter modules."""
from sklearn import datasets
def _split_X_y(X, y, threshold=20):
"""
Split X and y into train and test sets.
:param X: the data
:type X: list or np.ndarray
:param y: the targets
:type y: list or np.ndarray
:param threshold: threshold for train/test spliting
:int threshold: int
:return: X train, y train, X test, y test
"""
X_train, X_test = X[:-threshold], X[-threshold:]
y_train, y_test = y[:-threshold], y[-threshold:]
return X_train, y_train, X_test, y_test
def prepare_simple_classification_datasets(threshold=50):
"""
Generate a dataset for classification (breast cancer wisconsin).
:param threshold: threshold for train/test spliting
:int threshold: int
:return: splited dataset for classification
"""
cancer_X, cancer_y = datasets.load_breast_cancer(return_X_y=True)
return _split_X_y(cancer_X, cancer_y, threshold)
def prepare_simple_regression_datasets(threshold=20):
"""
Generate a dataset for regression (the diabetes).
:param threshold: threshold for train/test spliting
:int threshold: int
:return: splited dataset for regression
"""
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)
return _split_X_y(diabetes_X, diabetes_y, threshold)
def prepare_simple_clustering_datasets():
"""
Generate a dataset for clustering (the iris).
:return: dataset for clustering
"""
# Load the Iris dataset
iris = datasets.load_iris()
# Access the features and target
X = iris.data # Features
y = iris.target # Target (labels)
return X, y
================================================
FILE: pymilo/utils/test_pymilo.py
================================================
# -*- coding: utf-8 -*-
"""pymilo test modules."""
import os
import copy
from numpy import array_equal
from ..pymilo_obj import Export
from ..pymilo_obj import Import
from ..chains.util import get_transporter
from ..pymilo_func import compare_model_outputs
from ..pymilo_param import EXPORTED_MODELS_PATH
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, hinge_loss
def pymilo_export_path(model):
"""
Return the associated folder name to save the json file generated by pymilo Export(applied to the given model).
:param model: given model
:type model: any sklearn's model class
:return: folder name
"""
model_type, _ = get_transporter(model)
return EXPORTED_MODELS_PATH[model_type]
def pymilo_test(model, model_name):
"""
Return the pymilo imported model's outputs for given test_data.
:param model: given model
:type model: any sklearn's model class
:param model_name: model name
:type model_name: str
:return: imported model's output
"""
export_model_path = pymilo_export_path(model)
exported_model = Export(model)
exported_model_serialized_path = os.path.join(
os.getcwd(), "tests", export_model_path, model_name + '.json')
exported_model.save(exported_model_serialized_path)
imported_model = Import(exported_model_serialized_path)
imported_sklearn_model = imported_model.to_model()
return imported_sklearn_model
def pymilo_regression_test(regressor, model_name, test_data):
"""
Test the package's main structure in regression task.
:param regressor: the given regressor model
:type regressor: any valid sklearn's regressor class
:param model_name: model name
:type model_name: str
:param test_data: data for testing
:type test_data: np.ndarray or list
:return: True if the test succeed
"""
x_test, y_test = test_data
pre_pymilo_model_y_pred = regressor.predict(x_test)
pre_pymilo_model_prediction_output = {
"mean-error": mean_squared_error(y_test, pre_pymilo_model_y_pred),
"r2-score": r2_score(y_test, pre_pymilo_model_y_pred)
}
post_pymilo_model_y_pred = pymilo_test(regressor, model_name).predict(x_test)
post_pymilo_model_prediction_outputs = {
"mean-error": mean_squared_error(y_test, post_pymilo_model_y_pred),
"r2-score": r2_score(y_test, post_pymilo_model_y_pred)
}
comparison_result = compare_model_outputs(
pre_pymilo_model_prediction_output,
post_pymilo_model_prediction_outputs)
report_status(comparison_result, model_name)
return comparison_result
def pymilo_classification_test(classifier, model_name, test_data):
"""
Test the package's main structure in classification task.
:param classifier: the given classifier model
:type classifier: any valid sklearn's classifier class
:param model_name: model name
:type model_name: str
:param test_data: data for testing
:type test_data: np.ndarray or list
:return: True if the test succeed
"""
x_test, y_test = test_data
pre_pymilo_model_y_pred = classifier.predict(x_test)
pre_pymilo_model_prediction_output = {
"accuracy-score": accuracy_score(y_test, pre_pymilo_model_y_pred),
"hinge-loss": hinge_loss(y_test, pre_pymilo_model_y_pred)
}
post_pymilo_model_y_pred = pymilo_test(classifier, model_name).predict(x_test)
post_pymilo_model_prediction_outputs = {
"accuracy-score": accuracy_score(y_test, post_pymilo_model_y_pred),
"hinge-loss": hinge_loss(y_test, post_pymilo_model_y_pred)
}
comparison_result = compare_model_outputs(
pre_pymilo_model_prediction_output,
post_pymilo_model_prediction_outputs)
report_status(comparison_result, model_name)
return comparison_result
def pymilo_clustering_test(clusterer, model_name, x_test, support_prediction=False):
"""
Test the package's main structure in clustering task.
:param clusterer: the given clusterer model
:type clusterer: any valid sklearn's clusterer class
:param model_name: model name
:type model_name: str
:param x_test: data for testing
:type x_test: np.ndarray or list
:param support_prediction: whether the given clusterer supports .predict function or
:type support_prediction: boolean
:return: True if the test succeed
"""
pre_pymilo_model = copy.deepcopy(clusterer)
post_pymilo_model = pymilo_test(clusterer, model_name)
if (support_prediction):
pre_pymilo_model_y_pred = pre_pymilo_model.predict(x_test)
post_pymilo_model_y_pred = post_pymilo_model.predict(x_test)
mse = ((post_pymilo_model_y_pred - pre_pymilo_model_y_pred)**2).mean(axis=0)
epsilon_error = 10**(-8)
return report_status(mse < epsilon_error, model_name)
else:
# TODO, apply peer to peer
# Evaluation: peer to peer field type & value check
return report_status(True, model_name)
def pymilo_nearest_neighbor_test(nearest_neighbor, model_name, test_data):
"""
Test the package's main structure in nearest neighbor task.
:param nearest_neighbor: the given nearest neighbor model
:type nearest_neighbor: sklearn's nearest neighbor class
:param model_name: model name
:type model_name: str
:param test_data: data for testing
:type test_data: np.ndarray or list
:return: True if the test succeed
"""
x_test, _ = test_data
pre_pymilo_kneighbors = nearest_neighbor.kneighbors([x_test[0]], 3, return_distance=True)
post_pymilo_nearest_neighbor = pymilo_test(nearest_neighbor, model_name)
post_pymilo_kneighbors = post_pymilo_nearest_neighbor.kneighbors([x_test[0]], 3, return_distance=True)
report_status(array_equal(pre_pymilo_kneighbors, post_pymilo_kneighbors), model_name)
def report_status(result, model_name):
"""
Print status for each model.
:param result: the test result
:type result: bool
:param model_name: model name
:type model_name: str
:return: None
"""
if result:
print('Pymilo Test for Model: ' + model_name + ' succeed.')
else:
print('Pymilo Test for Model: ' + model_name + ' failed.')
================================================
FILE: pymilo/utils/util.py
================================================
# -*- coding: utf-8 -*-
"""utility module."""
import requests
import importlib
from inspect import signature
from ..pymilo_param import DOWNLOAD_MODEL_FAILED, INVALID_DOWNLOADED_MODEL, SKLEARN_SUPPORTED_CATEGORIES
def get_sklearn_type(model):
"""
Return sklearn model type.
:param model: sklearn model
:type model: any sklearn's model class
:return: model type as str
"""
raw_type = type(model)
return str(raw_type).split(".")[-1][:-2]
def is_primitive(obj):
"""
Check if the given object is primitive.
:param obj: given object
:type obj: any valid type
:return: True if object is primitive
"""
if isinstance(obj, dict):
return False
return not hasattr(obj, '__dict__')
def is_iterable(obj):
"""
Check if the given object is iterable.
:param obj: given object
:type obj: any valid type
:return: True if object is iterable
"""
try:
iter(obj)
return True
except TypeError:
return False
def check_str_in_iterable(field, content):
"""
Check if the specified string field exists in content, which is supposed to be a dictionary.
:param field: given string field
:type field: str
:param content: given supposed to be a dictionary
:type content: obj
:return: True if associated field is an iterable string in content and False otherwise.
"""
if isinstance(content, dict):
return field in content
else:
return False
def get_homogeneous_type(seq):
"""
Check if the given sequence's inner items have the same type or not and if they do, return the associated type.
:param seq: given sequence
:type seq: sequence
:return: Tuple of (True, inner_type) or (False, None)
"""
iseq = iter(seq)
first_type = type(next(iseq))
if all((isinstance(x, first_type)) for x in iseq):
return True, first_type
else:
return False, None
def all_same(arr):
"""
Check if the given array's items are the same or not.
:param arr: given array
:type arr: array
:return: bool
"""
return all(x == arr[0] for x in arr)
def import_function(module_name, function_name):
"""
Import function with name function_name from module called module_name.
:param module_name: module to import the function from
:type module_name: str
:param function_name: function's name to get imported
:type function_name: str
:return: function
"""
module = importlib.import_module(module_name)
function = getattr(module, function_name)
return function
def has_named_parameter(func, param_name):
"""
Check whether the given function has a parameter named param_name or not.
:param func: function to check it's params
:type func: function
:param param_name: parameter's name
:type param_name: str
:return: boolean
"""
_signature = signature(func)
parameter_names = [p.name for p in _signature.parameters.values()]
return param_name in parameter_names
def prefix_list(list1, list2):
"""
Check whether the list2 list is list1 sublist of the a list.
:param list1: outer list
:type list1: list
:param list2: inner list
:type list2: list
:return: boolean
"""
if len(list1) < len(list2):
return False
return all(list1[j] == list2[j] for j in range(len(list2)))
def download_model(url):
"""
Download the model from the given url.
:param url: url to exported JSON file
:type url: str
:return: obj
"""
s = requests.Session()
retries = requests.adapters.Retry(
total=5,
backoff_factor=0.1,
status_forcelist=[500, 502, 503, 504]
)
s.mount('http://', requests.adapters.HTTPAdapter(max_retries=retries))
s.mount('https://', requests.adapters.HTTPAdapter(max_retries=retries))
try:
response = s.get(url)
except Exception:
raise Exception(DOWNLOAD_MODEL_FAILED)
try:
if response.status_code == 200:
return response.json()
except ValueError:
raise Exception(INVALID_DOWNLOADED_MODEL)
def get_sklearn_class(model_name):
"""
Return the sklearn class of the requested model name.
:param model_name: model name
:type model_name: str
:return: sklearn ML model class
"""
for _, category_models in SKLEARN_SUPPORTED_CATEGORIES.items():
if model_name in category_models:
return category_models[model_name]
return None
================================================
FILE: requirements.txt
================================================
art>=1.8
numpy>=1.9.0
requests>=2.0.0
scikit-learn>=0.22.2
scipy>=0.19.1
================================================
FILE: setup.py
================================================
# -*- coding: utf-8 -*-
"""Setup module."""
try:
from setuptools import setup, find_packages
except ImportError:
from distutils.core import setup
INSTALLATION_MODES = {
'core': 'requirements.txt',
'streaming': 'streaming-requirements.txt',
}
def get_requires(mode='core'):
"""Read associated requirements to install."""
reqs_path = INSTALLATION_MODES[mode]
requirements = open(reqs_path, "r").read()
return list(filter(lambda x: x != "", requirements.split()))
def read_description():
"""Read README.md and CHANGELOG.md."""
try:
with open("README.md") as r:
description = "\n"
description += r.read()
with open("CHANGELOG.md") as c:
description += "\n"
description += c.read()
return description
except Exception:
return '''PyMilo: Python for ML I/O'''
setup(
name='pymilo',
packages=find_packages(include=['pymilo*'], exclude=['tests*']),
version='1.6',
description='PyMilo: Python for ML I/O',
long_description=read_description(),
long_description_content_type='text/markdown',
author='PyMilo Development Team',
author_email='pymilo@openscilab.com',
url='https://github.com/openscilab/pymilo',
download_url='https://github.com/openscilab/pymilo/tarball/v1.6',
keywords="machine_learning ml ai mlops model export import",
project_urls={
'Source': 'https://github.com/openscilab/pymilo',
},
install_requires=get_requires(),
extras_require={
'streaming': get_requires(mode='streaming'),
},
python_requires='>=3.7',
classifiers=[
'Development Status :: 3 - Alpha',
'Natural Language :: English',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: 3.13',
'Programming Language :: Python :: 3.14',
'Intended Audience :: Developers',
'Intended Audience :: Education',
'Intended Audience :: End Users/Desktop',
'Intended Audience :: Manufacturing',
'Intended Audience :: Science/Research',
'Topic :: Scientific/Engineering :: Information Analysis',
'Topic :: Education',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Scientific/Engineering :: Human Machine Interfaces',
'Topic :: Scientific/Engineering :: Mathematics',
'Topic :: Scientific/Engineering :: Physics',
],
license='MIT',
entry_points={
'console_scripts': [
'pymilo = pymilo.__main__:main',
]
}
)
================================================
FILE: streaming-requirements.txt
================================================
uvicorn>=0.14.0
fastapi>=0.68.0
pydantic>=1.5.0
websockets>=9.0
================================================
FILE: tests/test_clusterings/affinity_propagation.py
================================================
from sklearn.cluster import AffinityPropagation
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "Affinity Propagation"
def affinity_propagation():
x, y = prepare_simple_clustering_datasets()
affinity_propagation = AffinityPropagation(random_state=5).fit(x, y)
pymilo_clustering_test(affinity_propagation, MODEL_NAME, x)
================================================
FILE: tests/test_clusterings/birch.py
================================================
from sklearn.cluster import Birch
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "Birch"
def birch():
x, y = prepare_simple_clustering_datasets()
birch = Birch().fit(x, y)
pymilo_clustering_test(birch, MODEL_NAME, True)
================================================
FILE: tests/test_clusterings/bisecting_kmeans.py
================================================
from sklearn.cluster import BisectingKMeans
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "Bisecting KMeans"
def bisecting_kmeans():
x, y = prepare_simple_clustering_datasets()
bisecting_kmeans = BisectingKMeans(n_clusters=3, random_state=0).fit(x, y)
pymilo_clustering_test(bisecting_kmeans, MODEL_NAME, x, True)
================================================
FILE: tests/test_clusterings/dbscan.py
================================================
from sklearn.cluster import DBSCAN
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "DBSCAN"
def dbscan():
x, y = prepare_simple_clustering_datasets()
dbscan = DBSCAN(eps=3, min_samples=2).fit(x, y)
pymilo_clustering_test(dbscan, MODEL_NAME, x)
================================================
FILE: tests/test_clusterings/gaussian_mixture/bayesian_gaussian_mixture.py
================================================
from sklearn.mixture import BayesianGaussianMixture
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "Bayesian Gaussian Mixture"
def bayesian_gaussian_mixture():
x, y = prepare_simple_clustering_datasets()
bayesian_gaussian_mixture = BayesianGaussianMixture(n_components=2, random_state=42).fit(x, y)
pymilo_clustering_test(bayesian_gaussian_mixture, MODEL_NAME, x, True)
================================================
FILE: tests/test_clusterings/gaussian_mixture/gaussian_mixture.py
================================================
from sklearn.mixture import GaussianMixture
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "Gaussian Mixture"
def gaussian_mixture():
x, y = prepare_simple_clustering_datasets()
gaussian_mixture = GaussianMixture(n_components=2, random_state=0).fit(x, y)
pymilo_clustering_test(gaussian_mixture, MODEL_NAME, x)
================================================
FILE: tests/test_clusterings/hdbscan.py
================================================
from sklearn.cluster import HDBSCAN
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "HDBSCAN"
def hdbscan():
x, y = prepare_simple_clustering_datasets()
hdbscan = HDBSCAN(min_cluster_size=20).fit(x, y)
pymilo_clustering_test(hdbscan, MODEL_NAME, x)
================================================
FILE: tests/test_clusterings/hierarchical_clustering/agglomerative_clustering.py
================================================
from sklearn.cluster import AgglomerativeClustering
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "Agglomerative Clustering"
def agglomerative_clustering():
x, y = prepare_simple_clustering_datasets()
agglomerative_clustering = AgglomerativeClustering().fit(x, y)
pymilo_clustering_test(agglomerative_clustering, MODEL_NAME, x)
================================================
FILE: tests/test_clusterings/hierarchical_clustering/feature_agglomeration.py
================================================
from sklearn.cluster import FeatureAgglomeration
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "Feature Agglomeration"
def feature_agglomeration():
x, y = prepare_simple_clustering_datasets()
feature_agglomeration = FeatureAgglomeration(n_clusters=2).fit(x, y)
pymilo_clustering_test(feature_agglomeration, MODEL_NAME, x)
================================================
FILE: tests/test_clusterings/kmeans.py
================================================
from sklearn.cluster import KMeans
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "Kmeans"
def kmeans():
x, y = prepare_simple_clustering_datasets()
kmeans = KMeans(n_clusters=2, random_state=0).fit(x, y)
pymilo_clustering_test(kmeans, MODEL_NAME, x)
================================================
FILE: tests/test_clusterings/mean_shift.py
================================================
from sklearn.cluster import MeanShift
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "Mean Shift"
def mean_shift():
x, y = prepare_simple_clustering_datasets()
mean_shift = MeanShift(bandwidth=2).fit(x, y)
pymilo_clustering_test(mean_shift, MODEL_NAME, x)
================================================
FILE: tests/test_clusterings/minibatch_kmeans.py
================================================
from sklearn.cluster import MiniBatchKMeans
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "MiniBatch KMeans"
def minibatch_kmeans():
x, y = prepare_simple_clustering_datasets()
minibatch_kmeans = MiniBatchKMeans(n_clusters=2, random_state=2, batch_size=6, max_iter=10).fit(x, y)
pymilo_clustering_test(minibatch_kmeans, MODEL_NAME, x, True)
================================================
FILE: tests/test_clusterings/optics.py
================================================
from sklearn.cluster import OPTICS
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "OPTICS"
def optics():
x, y = prepare_simple_clustering_datasets()
optics = OPTICS().fit(x, y)
pymilo_clustering_test(optics, MODEL_NAME, x)
================================================
FILE: tests/test_clusterings/spectral_clustering/spectral_biclustering.py
================================================
from sklearn.cluster import SpectralBiclustering
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "Spectral Biclustering"
def spectral_biclustering():
x, y = prepare_simple_clustering_datasets()
spectral_biclustering = SpectralBiclustering(n_clusters=2, random_state=0).fit(x, y)
pymilo_clustering_test(spectral_biclustering, MODEL_NAME, x)
================================================
FILE: tests/test_clusterings/spectral_clustering/spectral_clustering.py
================================================
from sklearn.cluster import SpectralClustering
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "Spectral Clustering"
def spectral_clustering():
x, y = prepare_simple_clustering_datasets()
spectral_clustering = SpectralClustering(random_state=5).fit(x, y)
pymilo_clustering_test(spectral_clustering, MODEL_NAME, x)
================================================
FILE: tests/test_clusterings/spectral_clustering/spectral_coclustering.py
================================================
from sklearn.cluster import SpectralCoclustering
from pymilo.utils.test_pymilo import pymilo_clustering_test
from pymilo.utils.data_exporter import prepare_simple_clustering_datasets
MODEL_NAME = "Spectral Coclustering"
def spectral_coclustering():
x, y = prepare_simple_clustering_datasets()
spectral_coclustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(x, y)
pymilo_clustering_test(spectral_coclustering, MODEL_NAME, x)
================================================
FILE: tests/test_clusterings/test_clusterings.py
================================================
import os
import pytest
from pymilo.pymilo_param import SKLEARN_CLUSTERING_TABLE, NOT_SUPPORTED
from birch import birch
from kmeans import kmeans
from minibatch_kmeans import minibatch_kmeans
from affinity_propagation import affinity_propagation
from mean_shift import mean_shift
from dbscan import dbscan
from optics import optics
from spectral_clustering.spectral_clustering import spectral_clustering
from spectral_clustering.spectral_biclustering import spectral_biclustering
from spectral_clustering.spectral_coclustering import spectral_coclustering
from gaussian_mixture.gaussian_mixture import gaussian_mixture
from gaussian_mixture.bayesian_gaussian_mixture import bayesian_gaussian_mixture
from hierarchical_clustering.agglomerative_clustering import agglomerative_clustering
from hierarchical_clustering.feature_agglomeration import feature_agglomeration
bisecting_kmeans_support = SKLEARN_CLUSTERING_TABLE["BisectingKMeans"] != NOT_SUPPORTED
if bisecting_kmeans_support:
from bisecting_kmeans import bisecting_kmeans
hdbscan_support = SKLEARN_CLUSTERING_TABLE["HDBSCAN"] != NOT_SUPPORTED
if hdbscan_support:
from hdbscan import hdbscan
CLUSTERINGS = {
"KMEANS": [kmeans, bisecting_kmeans if bisecting_kmeans_support else (None,"BisectingKMeans"), minibatch_kmeans],
"AFFINITY_PROPAGATION": [affinity_propagation],
"MEAN_SHIFT": [mean_shift],
"DBSCAN": [dbscan, hdbscan if hdbscan_support else (None,"HDBSCAN")],
"OPTICS": [optics],
"SPECTRAL_CLUSTERING": [spectral_clustering, spectral_biclustering, spectral_coclustering],
"GAUSSIAN_MIXTURE": [gaussian_mixture, bayesian_gaussian_mixture],
"HIERARCHICAL_CLUSTERING": [agglomerative_clustering, feature_agglomeration],
"BIRCH": [birch],
}
@pytest.fixture(scope="session", autouse=True)
def reset_exported_models_directory():
exported_models_directory = os.path.join(
os.getcwd(), "tests", "exported_clusterings")
if not os.path.isdir(exported_models_directory):
os.mkdir(exported_models_directory)
return
for file_name in os.listdir(exported_models_directory):
# construct full file path
json_file = os.path.join(exported_models_directory, file_name)
if os.path.isfile(json_file):
os.remove(json_file)
def test_full():
for category in CLUSTERINGS:
for model in CLUSTERINGS[category]:
if isinstance(model, tuple):
func, model_name = model
if func == None:
print("Model: " + model_name + " is not supported in this python version.")
continue
model()
================================================
FILE: tests/test_composes/column_transformer.py
================================================
from numpy import array, array_equal
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import Normalizer, MinMaxScaler
from sklearn.feature_extraction.text import CountVectorizer
from util import get_path, write_and_read
from pymilo.transporters.compose_transporter import ComposeTransporter
from pymilo.utils.test_pymilo import report_status
MODEL_NAME = "ColumnTransformer"
def column_transformer():
model = ColumnTransformer(
[("norm1", Normalizer(norm='l1'), [0, 1]),
("norm2", Normalizer(norm='l1'), slice(2, 4))])
X = array([[0., 1., 2., 2.],
[1., 1., 0., 1.]])
# Normalizer scales each row of X to unit norm. A separate scaling
# is applied for the two first and two last elements of each
# row independently.
pre_result = model.fit_transform(X)
ct = ComposeTransporter()
post_pymilo_ct_model = ct.deserialize_compose_internal_model(
write_and_read(
ct.serialize_compose_internal_model(model),
get_path(MODEL_NAME,1)))
post_result = post_pymilo_ct_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
def complex_column_transformer():
# Use a plain 2D numpy array (no pandas dependency). With numpy inputs, select columns by index.
X = array(
[["First item", 3.0], ["second one here", 4.0], ["Is this the last?", 5.0]],
dtype=object,
)
ct = ColumnTransformer(
[("text_preprocess", CountVectorizer(), 0),
("num_preprocess", MinMaxScaler(), [1])])
pre_result = ct.fit_transform(X)
pt = ComposeTransporter()
post_pymilo_pre_model = pt.deserialize_compose_internal_model(
write_and_read(
pt.serialize_compose_internal_model(ct),
get_path(MODEL_NAME,2)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
def nested_column_transformer_with_pipeline():
# Numeric-only example to keep output dense and easily comparable
X = array([[0., 1., 2., 3.],
[1., 1., 0., 1.],
[2., 0., 1., 0.]])
# Inner ColumnTransformer working on array indices (post StandardScaler)
inner_ct = ColumnTransformer([
("minmax_first_two", MinMaxScaler(), [0, 1])
])
# Pipeline used as a transformer inside the outer ColumnTransformer
from sklearn.pipeline import Pipeline
pipe = Pipeline([
("scale", MinMaxScaler()),
("inner_ct", inner_ct),
])
# Outer ColumnTransformer applies the pipeline on all columns
outer_ct = ColumnTransformer([
("pipe", pipe, slice(0, 4))
])
pre_result = outer_ct.fit_transform(X)
pt = ComposeTransporter()
post_model = pt.deserialize_compose_internal_model(
write_and_read(
pt.serialize_compose_internal_model(outer_ct),
get_path(MODEL_NAME, 3)))
post_result = post_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_composes/test_compose_models.py
================================================
import os
import pytest
from column_transformer import (
column_transformer,
complex_column_transformer,
nested_column_transformer_with_pipeline,
)
from transformed_target_regressor import (
transformed_target_regressor,
)
COMPOSE_MODEL_TESTS = [
column_transformer,
complex_column_transformer,
transformed_target_regressor,
nested_column_transformer_with_pipeline,
]
@pytest.fixture(scope="session", autouse=True)
def reset_exported_models_directory():
exported_models_directory = os.path.join(
os.getcwd(), "tests", "exported_composes")
if not os.path.isdir(exported_models_directory):
os.mkdir(exported_models_directory)
return
for file_name in os.listdir(exported_models_directory):
# construct full file path
json_file = os.path.join(exported_models_directory, file_name)
if os.path.isfile(json_file):
os.remove(json_file)
def test_full():
for model in COMPOSE_MODEL_TESTS:
print("Testing model: ", model.__name__)
model()
================================================
FILE: tests/test_composes/transformed_target_regressor.py
================================================
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.compose import TransformedTargetRegressor
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
from pymilo.utils.test_pymilo import pymilo_regression_test
MODEL_NAME = "TransformedTargetRegressor"
def transformed_target_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
tt_regressor = TransformedTargetRegressor(regressor=LinearRegression(),
func=np.log, inverse_func=np.exp)
tt_regressor.fit(x_train, y_train)
assert pymilo_regression_test(
tt_regressor, MODEL_NAME, (x_test, y_test)) == True
def complex_transformed_target_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create SGD Regression object
sgd_max_iter = 1000
sgd_tol = 1e-3
sgd_regression = SGDRegressor(max_iter=sgd_max_iter, tol=sgd_tol)
# Train the model using the training sets
sgd_regression.fit(x_train, y_train)
x_scaler = StandardScaler()
y_scaler = StandardScaler()
ttr = TransformedTargetRegressor(regressor=sgd_regression, transformer=y_scaler)
pipeline = Pipeline([("x_scaler", x_scaler), ("ttr", ttr)])
pipeline.fit(x_train, y_train)
assert pymilo_regression_test(
pipeline, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_composes/util.py
================================================
import os
import json
def write_and_read(serialized_model, file_addr):
with open(file_addr, 'w') as fp:
fp.write(json.dumps(serialized_model, indent=4))
with open(file_addr, 'r') as fp:
return json.load(fp)
def get_path(model_name, index=None):
index = "" if index is None else f"_{index}"
return os.path.join(os.getcwd(), "tests", "exported_composes", model_name + index + ".json")
================================================
FILE: tests/test_cross_decomposition/cca.py
================================================
from sklearn.cross_decomposition import CCA
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "CCA"
def cca():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
cca = CCA(n_components=1).fit(x_train, y_train)
pymilo_regression_test(cca, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_cross_decomposition/pls_canonical.py
================================================
from sklearn.cross_decomposition import PLSCanonical
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "PLSCanonical"
def pls_canonical():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
pls_canonical = PLSCanonical(n_components=1).fit(x_train, y_train)
pymilo_regression_test(pls_canonical, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_cross_decomposition/pls_regression.py
================================================
from sklearn.cross_decomposition import PLSRegression
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "PLSRegression"
def pls_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
pls_regressor = PLSRegression(n_components=2).fit(x_train, y_train)
pymilo_regression_test(pls_regressor, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_cross_decomposition/test_cross_decompositions.py
================================================
import os
import pytest
from pls_regression import pls_regressor
from pls_canonical import pls_canonical
from cca import cca
CROSS_DECOMPOSITIONS = {
"PLS_REGRESSION": [pls_regressor],
"PLS_CANONICAL": [pls_canonical],
"CCA": [cca],
}
@pytest.fixture(scope="session", autouse=True)
def reset_exported_models_directory():
exported_models_directory = os.path.join(
os.getcwd(), "tests", "exported_cross_decomposition")
if not os.path.isdir(exported_models_directory):
os.mkdir(exported_models_directory)
return
for file_name in os.listdir(exported_models_directory):
# construct full file path
json_file = os.path.join(exported_models_directory, file_name)
if os.path.isfile(json_file):
os.remove(json_file)
def test_full():
for category in CROSS_DECOMPOSITIONS:
for model in CROSS_DECOMPOSITIONS[category]:
model()
================================================
FILE: tests/test_decision_trees/decision_tree/decision_tree_classification.py
================================================
from sklearn.tree import DecisionTreeClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "Decision Tree Classifier"
def decision_tree_classification():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
# Create Decision Tree Regressor
decision_tree_classifier = DecisionTreeClassifier(random_state=1)
decision_tree_classifier = decision_tree_classifier.fit(x_train, y_train)
assert pymilo_classification_test(
decision_tree_classifier, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_decision_trees/decision_tree/decision_tree_regression.py
================================================
from sklearn.tree import DecisionTreeRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Decision Tree Regressor"
def decision_tree_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Decision Tree Regressor
decision_tree_regressor = DecisionTreeRegressor(random_state=1)
decision_tree_regressor = decision_tree_regressor.fit(x_train, y_train)
assert pymilo_regression_test(
decision_tree_regressor, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_decision_trees/extra_tree/extra_tree_classification.py
================================================
from sklearn.tree import ExtraTreeClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "Extra Tree Classifier"
def extra_tree_classification():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
# Create Decision Tree Regressor
extra_tree_classifier = ExtraTreeClassifier(random_state=1)
extra_tree_classifier = extra_tree_classifier.fit(x_train, y_train)
assert pymilo_classification_test(
extra_tree_classifier, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_decision_trees/extra_tree/extra_tree_regression.py
================================================
from sklearn.tree import ExtraTreeRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Extra Tree Regressor"
def extra_tree_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Decision Tree Regressor
extra_tree_regressor = ExtraTreeRegressor(random_state=0)
extra_tree_regressor = extra_tree_regressor.fit(x_train, y_train)
assert pymilo_regression_test(
extra_tree_regressor, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_decision_trees/test_decision_trees.py
================================================
import os
import pytest
from decision_tree.decision_tree_regression import decision_tree_regression
from decision_tree.decision_tree_classification import decision_tree_classification
from extra_tree.extra_tree_regression import extra_tree_regression
from extra_tree.extra_tree_classification import extra_tree_classification
DECISION_TREES = {
"DECISION_TREE": [decision_tree_regression, decision_tree_classification],
"EXTRA TREE": [extra_tree_regression, extra_tree_classification],
}
@pytest.fixture(scope="session", autouse=True)
def reset_exported_models_directory():
exported_models_directory = os.path.join(
os.getcwd(), "tests", "exported_decision_trees")
if not os.path.isdir(exported_models_directory):
os.mkdir(exported_models_directory)
return
for file_name in os.listdir(exported_models_directory):
# construct full file path
json_file = os.path.join(exported_models_directory, file_name)
if os.path.isfile(json_file):
os.remove(json_file)
def test_full():
for category in DECISION_TREES:
for model in DECISION_TREES[category]:
model()
================================================
FILE: tests/test_ensembles/adaboost/adaboost_classifier.py
================================================
from sklearn.ensemble import AdaBoostClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "AdaBoostClassifier"
def adaboost_classifier():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
adaboost_classifier = AdaBoostClassifier(n_estimators=100, random_state=0).fit(x_train, y_train)
pymilo_classification_test(adaboost_classifier, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/adaboost/adaboost_regressor.py
================================================
from sklearn.ensemble import AdaBoostRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "AdaBoostRegressor"
def adaboost_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
adaboost_regressor = AdaBoostRegressor(random_state=0, n_estimators=100).fit(x_train, y_train)
pymilo_regression_test(adaboost_regressor, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/bagging/bagging_classifier.py
================================================
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
from pymilo.utils.util import has_named_parameter
MODEL_NAME = "BaggingClassifier"
def bagging_classifier():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
if has_named_parameter(BaggingClassifier, "estimator"):
bagging_classifier = BaggingClassifier(estimator=SVC(), n_estimators=10, random_state=0).fit(x_train, y_train)
else:
bagging_classifier = BaggingClassifier(n_estimators=10, random_state=0).fit(x_train, y_train)
pymilo_classification_test(bagging_classifier, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/bagging/bagging_regressor.py
================================================
from sklearn.ensemble import BaggingRegressor
from sklearn.svm import SVR
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
from pymilo.utils.util import has_named_parameter
MODEL_NAME = "BaggingRegressor"
def bagging_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
if has_named_parameter(BaggingRegressor, "estimator"):
bagging_regressor = BaggingRegressor(estimator=SVR(), n_estimators=10, random_state=0).fit(x_train, y_train)
else:
bagging_regressor = BaggingRegressor(n_estimators=10, random_state=0).fit(x_train, y_train)
pymilo_regression_test(bagging_regressor, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/extra_trees/extra_trees_classifier.py
================================================
from sklearn.ensemble import ExtraTreesClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "ExtraTreesClassifier"
def extra_trees_classifier():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
extra_trees_classifier = ExtraTreesClassifier(n_estimators=100, random_state=0).fit(x_train, y_train)
pymilo_classification_test(extra_trees_classifier, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/extra_trees/extra_trees_regressor.py
================================================
from sklearn.ensemble import ExtraTreesRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "ExtraTreesRegressor"
def extra_trees_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
extra_trees_regressor = ExtraTreesRegressor(n_estimators=100, random_state=0).fit(x_train, y_train)
pymilo_regression_test(extra_trees_regressor, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/gradient_booster/gradient_booster_classifier.py
================================================
from sklearn.ensemble import GradientBoostingClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "GradientBoostingClassifier"
def gradient_booster_classifier():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
gradient_booster_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0).fit(x_train, y_train)
pymilo_classification_test(gradient_booster_classifier, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/gradient_booster/gradient_booster_regressor.py
================================================
from sklearn.ensemble import GradientBoostingRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "GradientBoostingRegressor"
def gradient_booster_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
gradient_booster_regressor = GradientBoostingRegressor(random_state=0).fit(x_train, y_train, sample_weight=1)
pymilo_regression_test(gradient_booster_regressor, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/hist_gradient_boosting/hist_gradient_boosting_classifier.py
================================================
from sklearn.ensemble import HistGradientBoostingClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "HistGradientBoostingClassifier"
def hist_gradient_boosting_classifier():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
hist_gradient_boosting_classifier = HistGradientBoostingClassifier().fit(x_train, y_train)
pymilo_classification_test(hist_gradient_boosting_classifier, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/hist_gradient_boosting/hist_gradient_boosting_regressor.py
================================================
from sklearn.ensemble import HistGradientBoostingRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "HistGradientBoostingRegressor"
def hist_gradient_boosting_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
hist_gradient_boosting_regressor = HistGradientBoostingRegressor().fit(x_train, y_train)
pymilo_regression_test(hist_gradient_boosting_regressor, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/isolation_forest.py
================================================
from sklearn.ensemble import IsolationForest
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "IsolationForest"
def isolation_forest():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
isolation_forest = IsolationForest(random_state=0).fit(x_train, y_train)
pymilo_regression_test(isolation_forest, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/pipeline.py
================================================
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "Pipeline"
def pipeline():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
pipeline = Pipeline([
('scaler', StandardScaler()),
('svc', SVC())]).fit(x_train, y_train)
pymilo_classification_test(pipeline, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/random_forests/random_forest_classifier.py
================================================
from sklearn.ensemble import RandomForestClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "RandomForestClassifier"
def random_forest_classifier():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
random_forest_classifier = RandomForestClassifier(max_depth=2, random_state=0).fit(x_train, y_train)
pymilo_classification_test(random_forest_classifier, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/random_forests/random_forest_regressor.py
================================================
from sklearn.ensemble import RandomForestRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "RandomForestRegressor"
def random_forest_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
random_forest_regressor = RandomForestRegressor(max_depth=2, random_state=0).fit(x_train, y_train, sample_weight=1)
pymilo_regression_test(random_forest_regressor, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/random_trees_embedding.py
================================================
from sklearn.ensemble import RandomTreesEmbedding
from pymilo.utils.test_pymilo import pymilo_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "RandomTreesEmbedding"
def random_trees_embedding():
x_train, y_train, _, _ = prepare_simple_regression_datasets()
random_trees_embedding = RandomTreesEmbedding(n_estimators=5, random_state=0, max_depth=1).fit(x_train, y_train)
pymilo_test(random_trees_embedding, MODEL_NAME)
================================================
FILE: tests/test_ensembles/stacking/stacking_classifier.py
================================================
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "StackingClassifier"
def stacking_classifier():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
estimators = [
('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
('svr', make_pipeline(LinearSVC(random_state=42)))
]
stacking_classifier = StackingClassifier(
estimators=estimators, final_estimator=LogisticRegression()
).fit(x_train, y_train)
pymilo_classification_test(stacking_classifier, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_ensembles/stacking/stacking_regressor.py
================================================
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV
from sklearn.svm import LinearSVR
from sklearn.ensemble import RandomForestRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "StackingRegressor"
def stacking_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
estimators = [
('lr', RidgeCV()),
('svr', LinearSVR(random_state=42))]
stacking_regressor = StackingRegressor(
estimators=estimators,
final_estimator=RandomForestRegressor(n_estimators=10,random_state=42)).fit(x_train,y_train)
pymilo_regression_test(stacking_regressor, MODEL_NAME,(x_test, y_test))
================================================
FILE: tests/test_ensembles/test_ensembles.py
================================================
import os
import pytest
from adaboost.adaboost_regressor import adaboost_regressor
from adaboost.adaboost_classifier import adaboost_classifier
from bagging.bagging_regressor import bagging_regressor
from bagging.bagging_classifier import bagging_classifier
from extra_trees.extra_trees_regressor import extra_trees_regressor
from extra_trees.extra_trees_classifier import extra_trees_classifier
from gradient_booster.gradient_booster_regressor import gradient_booster_regressor
from gradient_booster.gradient_booster_classifier import gradient_booster_classifier
from random_forests.random_forest_regressor import random_forest_regressor
from random_forests.random_forest_classifier import random_forest_classifier
from isolation_forest import isolation_forest
from random_trees_embedding import random_trees_embedding
from stacking.stacking_regressor import stacking_regressor
from stacking.stacking_classifier import stacking_classifier
from voting.voting_regressor import voting_regressor
from voting.voting_classifier import voting_classifier
from pipeline import pipeline
from tests.test_composes.transformed_target_regressor import complex_transformed_target_regressor
from pymilo.pymilo_param import SKLEARN_ENSEMBLE_TABLE, NOT_SUPPORTED
if SKLEARN_ENSEMBLE_TABLE["HistGradientBoostingRegressor"] != NOT_SUPPORTED:
from hist_gradient_boosting.hist_gradient_boosting_regressor import hist_gradient_boosting_regressor
from hist_gradient_boosting.hist_gradient_boosting_classifier import hist_gradient_boosting_classifier
ENSEMBLES = {
"Adaboost": [adaboost_regressor, adaboost_classifier],
"Bagging": [bagging_regressor, bagging_classifier],
"ExtaTrees": [extra_trees_regressor, extra_trees_classifier],
"GradientBooster": [gradient_booster_regressor, gradient_booster_classifier],
"HistGradientBooster": [
hist_gradient_boosting_regressor if SKLEARN_ENSEMBLE_TABLE["HistGradientBoostingRegressor"] != NOT_SUPPORTED else (None, "HistGradientBoostingRegressor"),
hist_gradient_boosting_classifier if SKLEARN_ENSEMBLE_TABLE["HistGradientBoostingClassifier"] != NOT_SUPPORTED else (None, "HistGradientBoostingClassifier")
],
"Forests": [random_forest_regressor, random_forest_classifier, isolation_forest, random_trees_embedding],
"Stacking": [stacking_regressor, stacking_classifier],
"Voting": [voting_regressor, voting_classifier],
"Pipeline": [pipeline, complex_transformed_target_regressor],
}
@pytest.fixture(scope="session", autouse=True)
def reset_exported_models_directory():
exported_models_directory = os.path.join(
os.getcwd(), "tests", "exported_ensembles")
if not os.path.isdir(exported_models_directory):
os.mkdir(exported_models_directory)
return
for file_name in os.listdir(exported_models_directory):
# construct full file path
json_file = os.path.join(exported_models_directory, file_name)
if os.path.isfile(json_file):
os.remove(json_file)
def test_full():
for category in ENSEMBLES:
for model in ENSEMBLES[category]:
if isinstance(model, tuple):
func, model_name = model
if func == None:
print("Model: " + model_name + " is not supported in this python version.")
continue
model()
================================================
FILE: tests/test_ensembles/voting/voting_classifier.py
================================================
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "VotingClassifier"
def voting_classifier():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
r1 = LogisticRegression(random_state=1)
r2 = RandomForestClassifier(n_estimators=50, random_state=1)
r3 = GaussianNB()
voting_classifier = VotingClassifier([('lr', r1), ('rf', r2), ('r3', r3)], voting='hard').fit(x_train,y_train)
pymilo_classification_test(voting_classifier, MODEL_NAME,(x_test, y_test))
================================================
FILE: tests/test_ensembles/voting/voting_regressor.py
================================================
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.neighbors import KNeighborsRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "VotingRegressor"
def voting_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
r1 = LinearRegression()
r2 = RandomForestRegressor(n_estimators=10, random_state=1)
r3 = KNeighborsRegressor()
voting_regressor = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)]).fit(x_train,y_train)
pymilo_regression_test(voting_regressor, MODEL_NAME,(x_test, y_test))
================================================
FILE: tests/test_exceptions/custom_models.py
================================================
from collections import namedtuple
import numpy as np
DistributionBoundary = namedtuple("DistributionBoundary", ("value", "inclusive"))
class CustomizedTweedieDistribution():
def __init__(self, power=0):
self.power = power
@property
def power(self):
return self._power
@power.setter
def power(self, power):
self._lower_bound = DistributionBoundary(0, inclusive=True)
self._power = power
def unit_variance(self, y_pred):
return np.power(y_pred, self.power)
def unit_deviance(self, y, y_pred, check_input=False):
return (y - y_pred) ** 2
================================================
FILE: tests/test_exceptions/export_exceptions.py
================================================
# INVALID_MODEL = 1 -> tested.
# VALID_MODEL_INVALID_INTERNAL_STRUCTURE = 2 -> tested.
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.chains.neural_network_chain import neural_network_chain
from pymilo.transporters.transporter import Command
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from custom_models import CustomizedTweedieDistribution
import numpy as np
# Learning model, but an invalid one.
# test case for INVALID_MODEL.
class InvalidModel:
def __init__(self):
self.name = "Invalid Linear Model"
def fit(self, x, y):
return
def predict(self, x):
return [0 for _ in range(np.shape(x)[0])]
def invalid_model(print_output = True):
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create linear regression object
model = InvalidModel()
# Train the model using the training sets
model.fit(x_train, y_train)
try:
pymilo_regression_test(
model, model.name , (x_test, y_test))
return False
except Exception as e:
if print_output: print("An Exception occured\n", e)
return True
def valid_model_invalid_structure_linear_model(print_output = True):
MODEL_NAME = "LinearRegression"
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create linear regression object
linear_regression = LinearRegression()
linear_regression.__dict__["invalid_field"] = CustomizedTweedieDistribution(power= 1.5)
# Train the model using the training sets
linear_regression.fit(x_train, y_train)
try:
pymilo_regression_test(
linear_regression, MODEL_NAME, (x_test, y_test))
return False
except Exception as e:
if print_output: print("An Exception occured\n", e)
return True
def valid_model_irrelevant_chain(print_output = True):
x_train, y_train, _, _ = prepare_simple_regression_datasets()
# Create linear regression object
linear_regression = LinearRegression()
# Train the model using the training sets
linear_regression.fit(x_train, y_train)
try:
neural_network_chain.transport(linear_regression, Command.SERIALIZE)
return False
except Exception as e:
if print_output: print("An Exception occured\n", e)
return True
def valid_model_invalid_structure_neural_network(print_output = True):
MODEL_NAME = "MLPRegressor"
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create linear regression object
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Passive Aggressive Regression object
multi_layer_perceptron_regression = MLPRegressor(random_state=1, max_iter=500).fit(x_train, y_train)
multi_layer_perceptron_regression.__dict__["invalid_field"] = CustomizedTweedieDistribution(power= 1.5)
# Train the model using the training sets
multi_layer_perceptron_regression.fit(x_train, y_train)
try:
pymilo_regression_test(
multi_layer_perceptron_regression, MODEL_NAME, (x_test, y_test))
return False
except Exception as e:
if print_output: print("An Exception occured\n", e)
return True
================================================
FILE: tests/test_exceptions/import_exceptions.py
================================================
# CORRUPTED_JSON_FILE = 1 -> tested.
# INVALID_MODEL = 2 -> tested.
# VALID_MODEL_INVALID_INTERNAL_STRUCTURE = 3 -> tested.
import os
from pymilo.pymilo_obj import Import
def invalid_json(print_output = True):
json_files = ["corrupted", "unknown-model"]
for json_file in json_files:
json_path = os.path.join(os.getcwd(), "tests", "test_exceptions", "invalid_jsons", json_file + '.json')
try:
imported_model = Import(json_path)
imported_model.to_model()
return False
except Exception as e:
if print_output: print("An Exception occured\n", e)
return True
def invalid_url():
try:
url = "https://invalid_url"
Import(url=url)
return False
except Exception:
return True
def valid_url_invalid_file():
try:
url = "https://filesamples.com/samples/code/json/sample1.json"
Import(url=url)
return False
except Exception:
return True
def valid_url_valid_file():
url = "https://raw.githubusercontent.com/openscilab/pymilo/main/tests/test_exceptions/valid_jsons/linear_regression.json"
_ = Import(url=url).to_model()
return True
================================================
FILE: tests/test_exceptions/invalid_jsons/corrupted.json
================================================
{
"data": {
"n_iter": 300,
"tol": 0.001,
"alpha_1": 1e-06,
"alpha_2": 1e-06,
"lambda_1": 1e-06,
"lambda_2": 1e-06,
"alpha_init": null,
"lambda_init": null,
"compute_score": false,
"fit_intercept": true,
"copy_X": true,
"verbose": false,
"n_features_in_": 10,
"X_offset_": [
0.00039047198185187474,
-0.00014309199603861062,
0.0004729050423879101,
0.00031769088082990975,
-0.0005321043488653789,
-0.000540281996323959,
-0.0005080203823721925,
9.749742869583833e-05,
0.00016354453731930825,
-0.0004691415082391016
],
"X_scale_": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0
],
"scores_": [],
"n_iter_": 10,
"alpha_": 0.00033567214754955896,
"lambda_": 1.1696814565888858e-05,
"coef_": [
6.260169432328754,
-223.51864434743877,
504.4898155237174,
317.6826922409544,
-183.93709375460858,
-3.0140347144499344,
-166.81028226122402,
117.74970819732141,
490.6339122980116,
85.56330705023953
],
"sigma_": [
[
3579.602330752307,
-360.00990476089714,
-39.833887053829145,
-677.6426736494097,
-192.69161195980587,
-333.1369639704782,
-291.5207855281649,
166.8639609758337,
-416.48851363073044,
-487.84362180798183
],
[
-360.00990476089686,
3738.821765173381,
544.655627817642,
-674.7534735548464,
494.03892752340954,
-298.82757068683424,
902.917044082385,
-682.3225387742524,
305.1778851029596,
-257.3341683192198
],
[
-39.83388705382913,
544.6556278176421,
4338.24999893544,
-967.992302745072,
366.5816726368117,
-714.3457193814772,
902.2808250243974,
-17.853540520319132,
-865.0410004760379,
-616.8235023579258
],
[
-677.6426736494096,
-674.7534735548462,
-967.9923027450723,
4204.47184045613,
-351.55865308243597,
97.58076887959795,
16.213647168832498,
605.3948813145282,
-778.2934065916294,
-709.062846767269
],
[
-192.69161195980578,
494.03892752340954,
366.58167263681156,
-351.558653082436,
35727.450648570455,
-26540.29423432095,
-15225.494106017897,
-5527.399988117267,
-12198.04092770419,
-124.95284890762083
],
[
-333.1369639704781,
-298.8275706868342,
-714.3457193814772,
97.58076887959798,
-26540.29423432095,
26727.310400999704,
7445.10478909175,
-3536.960212881747,
10440.664494101131,
-37.504835508766185
],
[
-291.52078552816477,
902.917044082385,
902.2808250243974,
16.21364716883258,
-15225.4941060179,
7445.104789091751,
15547.2038315758,
11175.055399649176,
4141.551253444934,
-42.36715192288026
],
[
166.8639609758336,
-682.3225387742525,
-17.853540520319097,
605.3948813145282,
-5527.399988117267,
-3536.9602128817464,
11175.055399649176,
17732.9762426958,
-2377.7448150592045,
-628.0335721125343
],
[
-416.4885136307306,
305.1778851029597,
-865.0410004760382,
-778.2934065916294,
-12198.04092770419,
10440.664494101131,
4141.551253444934,
-2377.7448150592045,
9938.406157954483,
-834.1616253656699
],
[
-487.8436218079817,
-257.33416831921977,
-616.8235023579258,
-709.0628467672691,
-124.9528489076208,
-37.50483550876613,
-42.36715192288027,
-628.0335721125343,
-834.16162536567,
4391.940199555901
]
],
"intercept_": 152.75280574943835
},
"sklearn_version": "1.2.0",
"pymilo_version": "0.1"
"model_type": "Unknown-Model"
}
================================================
FILE: tests/test_exceptions/invalid_jsons/unknown-model.json
================================================
{
"data": {
"n_iter": 300,
"tol": 0.001,
"alpha_1": 1e-06,
"alpha_2": 1e-06,
"lambda_1": 1e-06,
"lambda_2": 1e-06,
"alpha_init": null,
"lambda_init": null,
"compute_score": false,
"fit_intercept": true,
"copy_X": true,
"verbose": false,
"n_features_in_": 10,
"X_offset_": [
0.00039047198185187474,
-0.00014309199603861062,
0.0004729050423879101,
0.00031769088082990975,
-0.0005321043488653789,
-0.000540281996323959,
-0.0005080203823721925,
9.749742869583833e-05,
0.00016354453731930825,
-0.0004691415082391016
],
"X_scale_": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0
],
"scores_": [],
"n_iter_": 10,
"alpha_": 0.00033567214754955896,
"lambda_": 1.1696814565888858e-05,
"coef_": [
6.260169432328754,
-223.51864434743877,
504.4898155237174,
317.6826922409544,
-183.93709375460858,
-3.0140347144499344,
-166.81028226122402,
117.74970819732141,
490.6339122980116,
85.56330705023953
],
"sigma_": [
[
3579.602330752307,
-360.00990476089714,
-39.833887053829145,
-677.6426736494097,
-192.69161195980587,
-333.1369639704782,
-291.5207855281649,
166.8639609758337,
-416.48851363073044,
-487.84362180798183
],
[
-360.00990476089686,
3738.821765173381,
544.655627817642,
-674.7534735548464,
494.03892752340954,
-298.82757068683424,
902.917044082385,
-682.3225387742524,
305.1778851029596,
-257.3341683192198
],
[
-39.83388705382913,
544.6556278176421,
4338.24999893544,
-967.992302745072,
366.5816726368117,
-714.3457193814772,
902.2808250243974,
-17.853540520319132,
-865.0410004760379,
-616.8235023579258
],
[
-677.6426736494096,
-674.7534735548462,
-967.9923027450723,
4204.47184045613,
-351.55865308243597,
97.58076887959795,
16.213647168832498,
605.3948813145282,
-778.2934065916294,
-709.062846767269
],
[
-192.69161195980578,
494.03892752340954,
366.58167263681156,
-351.558653082436,
35727.450648570455,
-26540.29423432095,
-15225.494106017897,
-5527.399988117267,
-12198.04092770419,
-124.95284890762083
],
[
-333.1369639704781,
-298.8275706868342,
-714.3457193814772,
97.58076887959798,
-26540.29423432095,
26727.310400999704,
7445.10478909175,
-3536.960212881747,
10440.664494101131,
-37.504835508766185
],
[
-291.52078552816477,
902.917044082385,
902.2808250243974,
16.21364716883258,
-15225.4941060179,
7445.104789091751,
15547.2038315758,
11175.055399649176,
4141.551253444934,
-42.36715192288026
],
[
166.8639609758336,
-682.3225387742525,
-17.853540520319097,
605.3948813145282,
-5527.399988117267,
-3536.9602128817464,
11175.055399649176,
17732.9762426958,
-2377.7448150592045,
-628.0335721125343
],
[
-416.4885136307306,
305.1778851029597,
-865.0410004760382,
-778.2934065916294,
-12198.04092770419,
10440.664494101131,
4141.551253444934,
-2377.7448150592045,
9938.406157954483,
-834.1616253656699
],
[
-487.8436218079817,
-257.33416831921977,
-616.8235023579258,
-709.0628467672691,
-124.9528489076208,
-37.50483550876613,
-42.36715192288027,
-628.0335721125343,
-834.16162536567,
4391.940199555901
]
],
"intercept_": 152.75280574943835
},
"sklearn_version": "1.2.0",
"pymilo_version": "0.1",
"model_type": "Unknown-Model"
}
================================================
FILE: tests/test_exceptions/test_exceptions.py
================================================
from export_exceptions import invalid_model
from export_exceptions import valid_model_invalid_structure_linear_model
from export_exceptions import valid_model_invalid_structure_neural_network
from export_exceptions import valid_model_irrelevant_chain
from import_exceptions import invalid_json, invalid_url, valid_url_invalid_file, valid_url_valid_file
EXCEPTION_TESTS = {
'IMPORT': [
invalid_json,
invalid_url,
valid_url_invalid_file,
valid_url_valid_file,
],
'EXPORT': [
invalid_model,
valid_model_invalid_structure_linear_model,
valid_model_invalid_structure_neural_network,
valid_model_irrelevant_chain
]
}
def test_full():
for category in EXCEPTION_TESTS:
category_all_test_pass = True
for test in EXCEPTION_TESTS[category]:
category_all_test_pass = category_all_test_pass and test()
assert category_all_test_pass == True
print("Test of Category: " + category + " with granularity of: " + test.__name__ + " executed successfully." )
================================================
FILE: tests/test_exceptions/valid_jsons/linear_regression.json
================================================
{
"data": {
"fit_intercept": true,
"copy_X": true,
"n_jobs": null,
"positive": false,
"n_features_in_": 10,
"coef_": {
"pymiloed-ndarray-list": [
0.30609424754267966,
-237.63557011300716,
510.53804765114097,
327.7298779909887,
-814.1119263534517,
492.7995945034062,
102.84123996793083,
184.6034960903708,
743.5093875957093,
76.09664636971895
],
"pymiloed-ndarray-dtype": "float64",
"pymiloed-ndarray-shape": [
10
],
"pymiloed-data-structure": "numpy.ndarray"
},
"rank_": 10,
"singular_": {
"pymiloed-ndarray-list": [
1.9578051002417796,
1.1797491126040702,
1.0755406405377144,
0.9579192686906345,
0.7980638292867588,
0.7594342409324799,
0.7216957209064547,
0.6459380350140406,
0.27271507089040337,
0.0915832239699
],
"pymiloed-ndarray-dtype": "float64",
"pymiloed-ndarray-shape": [
10
],
"pymiloed-data-structure": "numpy.ndarray"
},
"intercept_": {
"value": 152.76429169049118,
"np-type": "numpy.float64"
}
},
"sklearn_version": "1.3.0",
"pymilo_version": "0.9",
"model_type": "LinearRegression"
}
================================================
FILE: tests/test_feature_extraction/count_vectorizer.py
================================================
from numpy import array_equal
from util import get_path, write_and_read
from pymilo.utils.test_pymilo import report_status
from sklearn.feature_extraction.text import CountVectorizer
from pymilo.transporters.feature_extraction_transporter import FeatureExtractorTransporter
MODEL_NAME = "CountVectorizer"
def count_vectorizer():
corpus = [
'This is the first document.',
'This document is the second document.',
'And this is the third one.',
'Is this the first document?',
]
cv = CountVectorizer(analyzer='word', ngram_range=(2, 2))
X = cv.fit_transform(corpus)
pre_result = X.toarray()
fe = FeatureExtractorTransporter()
post_pymilo_pre_model = fe.deserialize_fe_module(
write_and_read(
fe.serialize_fe_module(cv),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.fit_transform(corpus).toarray()
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_feature_extraction/dict_vectorizer.py
================================================
from numpy import array_equal
from sklearn.feature_extraction import DictVectorizer
from pymilo.transporters.feature_extraction_transporter import FeatureExtractorTransporter
from pymilo.utils.test_pymilo import report_status
from util import get_path, write_and_read
MODEL_NAME = "DictVectorizer"
def dict_vectorizer():
v = DictVectorizer(sparse=False)
D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
_ = v.fit_transform(D)
pre_result = v.transform({'foo': 4, 'unseen_feature': 3})
fe = FeatureExtractorTransporter()
post_pymilo_pre_model = fe.deserialize_fe_module(
write_and_read(
fe.serialize_fe_module(v),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform({'foo': 4, 'unseen_feature': 3})
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_feature_extraction/feature_hasher.py
================================================
from numpy import array_equal
from sklearn.feature_extraction import FeatureHasher
from pymilo.transporters.feature_extraction_transporter import FeatureExtractorTransporter
from pymilo.utils.test_pymilo import report_status
from util import get_path, write_and_read
MODEL_NAME = "FeatureHasher"
def feature_hasher():
h = FeatureHasher(n_features=10)
D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]
f = h.transform(D)
pre_result = f.toarray()
fe = FeatureExtractorTransporter()
post_pymilo_pre_model = fe.deserialize_fe_module(
write_and_read(
fe.serialize_fe_module(h),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(D).toarray()
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_feature_extraction/hashing_vectorizer.py
================================================
from numpy import array_equal
from util import get_path, write_and_read
from pymilo.utils.test_pymilo import report_status
from sklearn.feature_extraction.text import HashingVectorizer
from pymilo.transporters.feature_extraction_transporter import FeatureExtractorTransporter
MODEL_NAME = "HashingVectorizer"
def hashing_vectorizer():
corpus = [
'This is the first document.',
'This document is the second document.',
'And this is the third one.',
'Is this the first document?',
]
hv = HashingVectorizer(n_features=2**4)
X = hv.fit_transform(corpus)
pre_result = X.toarray()
fe = FeatureExtractorTransporter()
post_pymilo_pre_model = fe.deserialize_fe_module(
write_and_read(
fe.serialize_fe_module(hv),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.fit_transform(corpus).toarray()
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_feature_extraction/patch_extractor.py
================================================
from numpy import array_equal, random
from sklearn.datasets import load_sample_images
from sklearn.feature_extraction import image
from pymilo.transporters.feature_extraction_transporter import FeatureExtractorTransporter
from pymilo.utils.test_pymilo import report_status
from util import get_path, write_and_read
MODEL_NAME = "PatchExtractor"
def patch_extractor():
X = load_sample_images().images[1]
X = X[None, ...]
pe = image.PatchExtractor(patch_size=(10, 10), random_state=random.RandomState(42))
pre_result = pe.transform(X)
fe = FeatureExtractorTransporter()
post_pymilo_pre_model = fe.deserialize_fe_module(
write_and_read(
fe.serialize_fe_module(pe),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_feature_extraction/pipeline.py
================================================
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from pymilo.utils.test_pymilo import pymilo_classification_test
MODEL_NAME = "Pipeline"
def pipeline():
corpus = ['this is the first document',
'this document is the second document',
'and this is the third one',
'is this the first document']
labels = ['A', 'B', 'A', 'B']
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)
X_train, X_test, y_train, y_test = train_test_split(corpus, y, test_size=0.2, random_state=42)
pipe = Pipeline([
('count', CountVectorizer(vocabulary=['this', 'document', 'first', 'is', 'second', 'the', 'and', 'one'])),
('tfid', TfidfTransformer()),
('clf', LogisticRegression())
])
pipe.fit(X_train, y_train)
pymilo_classification_test(pipe, MODEL_NAME, (X_test, y_test))
================================================
FILE: tests/test_feature_extraction/test_feature_extractions.py
================================================
import os
import pytest
from count_vectorizer import count_vectorizer
from dict_vectorizer import dict_vectorizer
from feature_hasher import feature_hasher
from hashing_vectorizer import hashing_vectorizer
from patch_extractor import patch_extractor
from tfidf_transformer import tfidf_transformer
from tfidf_vectorizer import tfidf_vectorizer
FEATURE_EXTRACTIONS = [
count_vectorizer,
dict_vectorizer,
feature_hasher,
hashing_vectorizer,
patch_extractor,
tfidf_transformer,
tfidf_vectorizer,
]
@pytest.fixture(scope="session", autouse=True)
def reset_exported_models_directory():
exported_models_directory = os.path.join(
os.getcwd(), "tests", "exported_feature_extraction")
if not os.path.isdir(exported_models_directory):
os.mkdir(exported_models_directory)
return
for file_name in os.listdir(exported_models_directory):
json_file = os.path.join(exported_models_directory, file_name)
if os.path.isfile(json_file):
os.remove(json_file)
def test_full():
for model in FEATURE_EXTRACTIONS:
if isinstance(model, tuple):
func, model_name = model
if func == None:
print("Model: " + model_name + " is not supported in this python version.")
continue
model()
================================================
FILE: tests/test_feature_extraction/tfidf_transformer.py
================================================
from numpy import array_equal
from util import get_path, write_and_read
from pymilo.utils.test_pymilo import report_status
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from pymilo.transporters.feature_extraction_transporter import FeatureExtractorTransporter
MODEL_NAME = "TfidfTransformer"
def tfidf_transformer():
corpus = ['this is the first document',
'this document is the second document',
'and this is the third one',
'is this the first document']
vocabulary = ['this', 'document', 'first', 'is', 'second', 'the',
'and', 'one']
pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)),
('tfid', TfidfTransformer())]).fit(corpus)
_tfidf = pipe['tfid']
pre_result = _tfidf.idf_
fe = FeatureExtractorTransporter()
post_pymilo_pre_model = fe.deserialize_fe_module(
write_and_read(
fe.serialize_fe_module(_tfidf),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.idf_
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_feature_extraction/tfidf_vectorizer.py
================================================
from numpy import array_equal
from util import get_path, write_and_read
from pymilo.utils.test_pymilo import report_status
from sklearn.feature_extraction.text import TfidfVectorizer
from pymilo.transporters.feature_extraction_transporter import FeatureExtractorTransporter
MODEL_NAME = "TfidfVectorizer"
def tfidf_vectorizer():
corpus = [
'This is the first document.',
'This document is the second document.',
'And this is the third one.',
'Is this the first document?',
]
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(corpus)
pre_result = X.toarray()
fe = FeatureExtractorTransporter()
post_pymilo_pre_model = fe.deserialize_fe_module(
write_and_read(
fe.serialize_fe_module(tfidf),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.fit_transform(corpus).toarray()
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_feature_extraction/util.py
================================================
import os
import json
def write_and_read(serialized_model, file_addr):
with open(file_addr, 'w') as fp:
fp.write(json.dumps(serialized_model, indent=4))
with open(file_addr, 'r') as fp:
return json.load(fp)
def get_path(model_name):
return os.path.join(os.getcwd(), "tests", "exported_feature_extraction", model_name + ".json")
================================================
FILE: tests/test_linear_models/bayesian/ard_regression.py
================================================
from sklearn.linear_model import ARDRegression
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Automatic-Relevance-Determination-Regression"
def ard_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create ARD regression object
ard_regression = ARDRegression()
# Train the model using the training sets
ard_regression.fit(x_train, y_train)
assert pymilo_regression_test(
ard_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/bayesian/bayesian_regression.py
================================================
from sklearn.linear_model import BayesianRidge
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Bayesian-Ridge-Regression"
def bayesian_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create bayesian ridge regression object
bayesian_ridge_regression = BayesianRidge()
# Train the model using the training sets
bayesian_ridge_regression.fit(x_train, y_train)
assert pymilo_regression_test(
bayesian_ridge_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/elasticnet/elastic_net.py
================================================
from sklearn.linear_model import ElasticNet
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Elastic-Net-Regression"
def elastic_net():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Elastic Net regression object
elasticnet_alpha = 0.1
elasticnet_random_state = 0
elasticnet_regression = ElasticNet(
random_state=elasticnet_random_state,
alpha=elasticnet_alpha)
# Train the model using the training sets
elasticnet_regression.fit(x_train, y_train)
assert pymilo_regression_test(
elasticnet_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/elasticnet/elastic_net_cv.py
================================================
from sklearn.linear_model import ElasticNetCV
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Elastic-Net-CV-Regression"
def elastic_net_cv():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Elastic Net CV regression object
elasticnet_alphas = [1e-3, 1e-2, 1e-1, 1]
elasticnet_cv = 5
elasticnet_random_state = 0
elasticnet_cv_regression = ElasticNetCV(
cv=elasticnet_cv,
alphas=elasticnet_alphas,
random_state=elasticnet_random_state)
# Train the model using the training sets
elasticnet_cv_regression.fit(x_train, y_train)
assert pymilo_regression_test(
elasticnet_cv_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/elasticnet/multi_task_elastic_net.py
================================================
from sklearn.linear_model import MultiTaskElasticNet
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Multi-Task-Elastic-Net-Regression"
def multi_task_elastic_net():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
y_train = [[y, y**2] for y in y_train]
y_test = [[y, y**2] for y in y_test]
# Create MultiTaskElasticNet regression object
elasticnet_alpha = 0.01
elasticnet_random_state = 0
multitask_elasticnet_regression = MultiTaskElasticNet(
random_state=elasticnet_random_state, alpha=elasticnet_alpha)
# Train the model using the training sets
multitask_elasticnet_regression.fit(x_train, y_train)
assert pymilo_regression_test(
multitask_elasticnet_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/elasticnet/multi_task_elastic_net_cv.py
================================================
from sklearn.linear_model import MultiTaskElasticNetCV
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Multi-Task-Elastic-Net-CV-Regression"
def multi_task_elastic_net_cv():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
y_train = [[y, y**2] for y in y_train]
y_test = [[y, y**2] for y in y_test]
# Create MultiTaskElasticNetCV regression object
elasticnet_alphas = [1e-3, 1e-2, 1e-1, 1]
elasticnet_cv = 5
elasticnet_random_state = 0
multitask_elasticnet_cv_regression = MultiTaskElasticNetCV(
random_state=elasticnet_random_state, alphas=elasticnet_alphas, cv=elasticnet_cv)
# Train the model using the training sets
multitask_elasticnet_cv_regression.fit(x_train, y_train)
assert pymilo_regression_test(
multitask_elasticnet_cv_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/glm/gamma_regression.py
================================================
from sklearn.linear_model import GammaRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Gamma-Regression"
def gamma_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Gamma regression object
gamma_alpha = 0.5
gamma_regression = GammaRegressor(alpha=gamma_alpha)
# Train the model using the training sets
gamma_regression.fit(x_train, y_train)
assert pymilo_regression_test(
gamma_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/glm/poisson_regression.py
================================================
from sklearn.linear_model import PoissonRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Poisson-Regression"
def poisson_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Poisson regression object
poisson_alpha = 0.5
poisson_regression = PoissonRegressor(alpha=poisson_alpha)
# Train the model using the training sets
poisson_regression.fit(x_train, y_train)
assert pymilo_regression_test(
poisson_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/glm/tweedie_regression.py
================================================
from sklearn.linear_model import TweedieRegressor
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
from pymilo.utils.test_pymilo import pymilo_regression_test
MODEL_NAME = "Tweedie-Regression"
def tweedie_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Tweedie Regression object
tweedie_alpha = 0.5
tweedie_link = 'log'
tweedie_power = 1
tweedie_regression = TweedieRegressor(
power=tweedie_power,
alpha=tweedie_alpha,
link=tweedie_link)
# Train the model using the training sets
tweedie_regression.fit(x_train, y_train)
assert pymilo_regression_test(
tweedie_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/lasso_lars/lasso.py
================================================
from sklearn.linear_model import Lasso
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Lasso-Regression"
def lasso():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Lasso regression object
lasso_alpha = 0.2
lasso_regression = Lasso(lasso_alpha)
# Train the model using the training sets
lasso_regression.fit(x_train, y_train)
assert pymilo_regression_test(
lasso_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/lasso_lars/lasso_cv.py
================================================
from sklearn.linear_model import LassoCV
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Lasso-Regression-CV"
def lasso_cv():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Lasso CV regression object
lasso_alphas = [1e-3, 1e-2, 1e-1, 1]
lasso_cv = 5
lasso_random_state = 0
lasso_cv_regression = LassoCV(
alphas=lasso_alphas,
cv=lasso_cv,
random_state=lasso_random_state)
# Train the model using the training sets
lasso_cv_regression.fit(x_train, y_train)
assert pymilo_regression_test(
lasso_cv_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/lasso_lars/lasso_lars.py
================================================
from sklearn.linear_model import LassoLars
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Lasso-Lars-Regression"
def lasso_lars():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Lasso Lars regression object
lasso_alpha = 0.2
lasso_lars_regression = LassoLars(lasso_alpha)
# Train the model using the training sets
lasso_lars_regression.fit(x_train, y_train)
assert pymilo_regression_test(
lasso_lars_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/lasso_lars/lasso_lars_cv.py
================================================
from sklearn.linear_model import LassoLarsCV
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Lasso-Lars-CV-Regression"
def lasso_lars_cv():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Lasso Lars CV regression object
lasso_cv = 5
lasso_lars_cv_regression = LassoLarsCV(cv=lasso_cv)
# Train the model using the training sets
lasso_lars_cv_regression.fit(x_train, y_train)
assert pymilo_regression_test(
lasso_lars_cv_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/lasso_lars/lasso_lars_ic.py
================================================
from sklearn.linear_model import LassoLarsIC
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Lasso-Lars-IC-Regression"
def lasso_lars_ic():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Lasso Lars IC regression object
lasso_criterian = "bic"
lass_lars_ic_regression = LassoLarsIC(criterion=lasso_criterian)
# Train the model using the training sets
lass_lars_ic_regression.fit(x_train, y_train)
assert pymilo_regression_test(
lass_lars_ic_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/lasso_lars/multi_task_lasso.py
================================================
from sklearn.linear_model import MultiTaskLasso
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Multi-Task-Lasso-Regression"
def multi_task_lasso():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
y_train = [[y, y**2] for y in y_train]
y_test = [[y, y**2] for y in y_test]
# Create MultiTaskLasso regression object
lasso_alpha = 0.1
lasso_random_state = 0
multi_task_lasso = MultiTaskLasso(
random_state=lasso_random_state,
alpha=lasso_alpha)
# Train the model using the training sets
multi_task_lasso.fit(x_train, y_train)
assert pymilo_regression_test(
multi_task_lasso, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/lasso_lars/multi_task_lasso_cv.py
================================================
from sklearn.linear_model import MultiTaskLassoCV
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Multi-Task-Lasso-CV-Regression"
def multi_task_lasso_cv():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
y_train = [[y, y**2] for y in y_train]
y_test = [[y, y**2] for y in y_test]
# Create Multi Task Lasso CV regression object
lasso_alphas = [1e-3, 1e-2, 1e-1, 1]
lasso_cv = 5
lasso_random_state = 0
multi_task_lasso_cv = MultiTaskLassoCV(
random_state=lasso_random_state,
alphas=lasso_alphas,
cv=lasso_cv)
# Train the model using the training sets
multi_task_lasso_cv.fit(x_train, y_train)
assert pymilo_regression_test(
multi_task_lasso_cv, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/linear_regression/linear_regression.py
================================================
from sklearn.linear_model import LinearRegression
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
from pymilo.utils.test_pymilo import pymilo_regression_test
MODEL_NAME = "Linear-Regression"
def linear_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create linear regression object
linear_regression = LinearRegression()
# Train the model using the training sets
linear_regression.fit(x_train, y_train)
assert pymilo_regression_test(
linear_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/logistic/logistic_regression.py
================================================
from sklearn.linear_model import LogisticRegression
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "Logistic-Regression"
def logistic_regression():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
# Create Logistic regression object
logistic_regression_random_state = 4
logistic_regression = LogisticRegression(
random_state=logistic_regression_random_state)
# Train the model using the training sets
logistic_regression.fit(x_train, y_train)
assert pymilo_classification_test(
logistic_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/logistic/logistic_regression_cv.py
================================================
from sklearn.linear_model import LogisticRegressionCV
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "Logistic-Regression-CV"
def logistic_regression_cv():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
# Create Logistic regression cv object
logistic_regression_cv = 5
logistic_regression_random_state = 0
logistic_regression_cv = LogisticRegressionCV(
cv=logistic_regression_cv,
random_state=logistic_regression_random_state)
# Train the model using the training sets
logistic_regression_cv.fit(x_train, y_train)
assert pymilo_classification_test(
logistic_regression_cv, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/omp/omp.py
================================================
from sklearn.linear_model import OrthogonalMatchingPursuit
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Orthogonal-Matching-Pursuit-Regression"
def omp():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Orthogonal Matching Pursuit regression object
omp_n_nonzero_coefs = 10
omp_regression = OrthogonalMatchingPursuit(
n_nonzero_coefs=omp_n_nonzero_coefs)
# Train the model using the training sets
omp_regression.fit(x_train, y_train)
assert pymilo_regression_test(
omp_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/omp/omp_cv.py
================================================
from sklearn.linear_model import OrthogonalMatchingPursuitCV
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Orthogonal-Matching-Pursuit-CV-Regression"
def omp_cv():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Orthogonal Matching Pursuit CV regression object
omp_cv = 5
omp_cv_regression = OrthogonalMatchingPursuitCV(cv=omp_cv)
# Train the model using the training sets
omp_cv_regression.fit(x_train, y_train)
assert pymilo_regression_test(
omp_cv_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/passive_aggressive/passive_aggressive_classifier.py
================================================
from sklearn.linear_model import PassiveAggressiveClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "Passive-Aggressive-Classifier"
def passive_aggressive_classifier():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
# Create ridge regression object
pac_max_iter = 1000
pac_random_state = 0
pac_tol = 1e-3
passive_aggressive_classifier = PassiveAggressiveClassifier(
max_iter=pac_max_iter, random_state=pac_random_state, tol=pac_tol)
# Train the model using the training sets
passive_aggressive_classifier.fit(x_train, y_train)
assert pymilo_classification_test(
passive_aggressive_classifier, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/passive_aggressive/passive_aggressive_regressor.py
================================================
from sklearn.linear_model import PassiveAggressiveRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Passive-Aggressive-Regressor"
def passive_agressive_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Passive Aggressive Regression object
par_random_state = 2
par_max_iter = 100
passive_aggressive_regression = PassiveAggressiveRegressor(
max_iter=par_max_iter, random_state=par_random_state)
# Train the model using the training sets
passive_aggressive_regression.fit(x_train, y_train)
assert pymilo_regression_test(
passive_aggressive_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/perceptron/perception.py
================================================
from sklearn.linear_model import Perceptron
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Perceptron"
def perceptron():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create perceptron regression object
perceptron_random_state = 0
perceptron_tol = 1e-3
perceptron = Perceptron(
random_state=perceptron_random_state,
tol=perceptron_tol)
# Train the model using the training sets
perceptron.fit(x_train, y_train)
assert pymilo_regression_test(
perceptron, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/quantile/quantile.py
================================================
from sklearn.linear_model import QuantileRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Quantile-Regressor"
def quantile_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Quantile regression object
quantile_regression = QuantileRegressor(quantile=0.8, solver="highs")
# Train the model using the training sets
quantile_regression.fit(x_train, y_train)
assert pymilo_regression_test(
quantile_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/ridge/ridge_classifier.py
================================================
from sklearn.linear_model import RidgeClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "Ridge-Classifier"
def ridge_classifier():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
# Create ridge classifier object
ridge_alpha = 0.4
ridge_classifier = RidgeClassifier(alpha=ridge_alpha)
# Train the model using the training sets
ridge_classifier.fit(x_train, y_train)
assert pymilo_classification_test(
ridge_classifier, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/ridge/ridge_classifier_cv.py
================================================
from sklearn.linear_model import RidgeClassifierCV
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "Ridge-Classifier-CV"
def ridge_classifier_cv():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
# Create ridge classifier cv object
ridge_cv_alphas = [1e-3, 1e-2, 1e-1, 1]
ridge_classifier = RidgeClassifierCV(alphas=ridge_cv_alphas)
# Train the model using the training sets
ridge_classifier.fit(x_train, y_train)
assert pymilo_classification_test(
ridge_classifier, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/ridge/ridge_regression.py
================================================
from sklearn.linear_model import Ridge
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
from pymilo.utils.test_pymilo import pymilo_regression_test
MODEL_NAME = "Ridge-Regression"
def ridge_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create ridge regression object
ridge_alpha = 0.5
ridge_regression = Ridge(alpha=ridge_alpha)
# Train the model using the training sets
ridge_regression.fit(x_train, y_train)
assert pymilo_regression_test(
ridge_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/ridge/ridge_regression_cv.py
================================================
from sklearn.linear_model import RidgeCV
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Ridge-Regression-CV"
def ridge_regression_cv():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create ridgeCV regression object
ridge_cv_alphas = [1e-3, 1e-2, 1e-1, 1]
ridge_regression_cv = RidgeCV(alphas=ridge_cv_alphas)
# Train the model using the training sets
ridge_regression_cv.fit(x_train, y_train)
assert pymilo_regression_test(
ridge_regression_cv, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/robustness/huber_regression.py
================================================
from sklearn.linear_model import HuberRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Huber-Regressor"
def huber_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create Huber regression object
huber_regresion = HuberRegressor(max_iter=300)
# Train the model using the training sets
huber_regresion.fit(x_train, y_train)
assert pymilo_regression_test(
huber_regresion, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/robustness/ransac_regression.py
================================================
from sklearn.linear_model import RANSACRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "RANSAC-Regressor"
def ransac_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create ransac regression object
ransac_random_state = 3
ransac_regression = RANSACRegressor(random_state=ransac_random_state)
# Train the model using the training sets
ransac_regression.fit(x_train, y_train)
assert pymilo_regression_test(
ransac_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/robustness/theil_sen_regression.py
================================================
from sklearn.linear_model import TheilSenRegressor
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
from pymilo.utils.test_pymilo import pymilo_regression_test
MODEL_NAME = "Theil-Sen-Regressor"
def theil_sen_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create TheilSen Regression object
theilsen_random_state = 4
theilsen_regresion = TheilSenRegressor(random_state=theilsen_random_state)
# Train the model using the training sets
theilsen_regresion.fit(x_train, y_train)
assert pymilo_regression_test(
theilsen_regresion, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/sgd/sgd_classifier.py
================================================
from sklearn.linear_model import SGDClassifier
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
from pymilo.utils.test_pymilo import pymilo_classification_test
MODEL_NAME = "SGD-Classifier"
def sgd_classifier():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
# Create SGDClassifier regression object
sgd_max_iter = 100000
sgd_tol = 1e-3
sgd_classifier = SGDClassifier(max_iter=sgd_max_iter, tol=sgd_tol)
# Train the model using the training sets
sgd_classifier.fit(x_train, y_train)
assert pymilo_classification_test(
sgd_classifier, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/sgd/sgd_oneclass_svm.py
================================================
from sklearn.linear_model import SGDOneClassSVM
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
from pymilo.utils.test_pymilo import pymilo_regression_test
MODEL_NAME = "SGD-OneClass-Regression"
def sgd_oneclass_svm():
x_train, _, x_test, y_test = prepare_simple_regression_datasets()
# Create SGDOneClassSVM regression object
sgd_random_state = 34
sgd_oneclass_svm = SGDOneClassSVM(random_state=sgd_random_state)
# Train the model using the training sets
sgd_oneclass_svm.fit(x_train)
assert pymilo_regression_test(
sgd_oneclass_svm, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/sgd/sgd_regression.py
================================================
from sklearn.linear_model import SGDRegressor
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
from pymilo.utils.test_pymilo import pymilo_regression_test
MODEL_NAME = "SGD-Regression"
def sgd_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
# Create SGD Regression object
sgd_max_iter = 100000
sgd_tol = 1e-3
sgd_regression = SGDRegressor(max_iter=sgd_max_iter, tol=sgd_tol)
# Train the model using the training sets
sgd_regression.fit(x_train, y_train)
assert pymilo_regression_test(
sgd_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_linear_models/test_linear_models.py
================================================
import os
import pytest
from pymilo.pymilo_param import SKLEARN_LINEAR_MODEL_TABLE, NOT_SUPPORTED
from linear_regression.linear_regression import linear_regression
from ridge.ridge_regression import ridge_regression
from ridge.ridge_regression_cv import ridge_regression_cv
from ridge.ridge_classifier import ridge_classifier
from ridge.ridge_classifier_cv import ridge_classifier_cv
from lasso_lars.lasso import lasso
from lasso_lars.lasso_cv import lasso_cv
from lasso_lars.lasso_lars import lasso_lars
from lasso_lars.lasso_lars_cv import lasso_lars_cv
from lasso_lars.lasso_lars_ic import lasso_lars_ic
from lasso_lars.multi_task_lasso import multi_task_lasso
from lasso_lars.multi_task_lasso_cv import multi_task_lasso_cv
from elasticnet.elastic_net import elastic_net
from elasticnet.elastic_net_cv import elastic_net_cv
from elasticnet.multi_task_elastic_net import multi_task_elastic_net
from elasticnet.multi_task_elastic_net_cv import multi_task_elastic_net_cv
from omp.omp import omp
from omp.omp_cv import omp_cv
from bayesian.bayesian_regression import bayesian_regression
from bayesian.ard_regression import ard_regression
from logistic.logistic_regression import logistic_regression
from logistic.logistic_regression_cv import logistic_regression_cv
from sgd.sgd_regression import sgd_regression
from sgd.sgd_classifier import sgd_classifier
from perceptron.perception import perceptron
from passive_aggressive.passive_aggressive_regressor import passive_agressive_regressor
from passive_aggressive.passive_aggressive_classifier import passive_aggressive_classifier
from robustness.ransac_regression import ransac_regression
from robustness.theil_sen_regression import theil_sen_regression
from robustness.huber_regression import huber_regression
if SKLEARN_LINEAR_MODEL_TABLE["TweedieRegressor"] != NOT_SUPPORTED:
from glm.tweedie_regression import tweedie_regression
if SKLEARN_LINEAR_MODEL_TABLE["PoissonRegressor"] != NOT_SUPPORTED:
from glm.poisson_regression import poisson_regression
if SKLEARN_LINEAR_MODEL_TABLE["GammaRegressor"] != NOT_SUPPORTED:
from glm.gamma_regression import gamma_regression
if SKLEARN_LINEAR_MODEL_TABLE["SGDOneClassSVM"] != NOT_SUPPORTED:
from sgd.sgd_oneclass_svm import sgd_oneclass_svm
if SKLEARN_LINEAR_MODEL_TABLE["QuantileRegressor"] != NOT_SUPPORTED:
from quantile.quantile import quantile_regressor
LINEAR_MODELS = {
"LINEAR_REGRESSION": [linear_regression],
"RIDGE_REGRESSION_AND_CLASSIFICATION": [
ridge_regression,
ridge_regression_cv,
ridge_classifier,
ridge_classifier_cv],
"LASSO_AND_LARS": [
lasso,
lasso_cv,
lasso_lars,
lasso_lars_cv,
lasso_lars_ic,
multi_task_lasso,
multi_task_lasso_cv],
"ELASTIC_NET": [
elastic_net,
elastic_net_cv],
"MULTI_CLASS_ELASTIC_NET": [
multi_task_elastic_net,
multi_task_elastic_net_cv],
"OMP": [
omp,
omp_cv],
"BAYESIAN_REGRESSION": [
bayesian_regression,
ard_regression],
"LOGISTIC_REGRESSION": [
logistic_regression,
logistic_regression_cv],
"GLM": [
tweedie_regression if SKLEARN_LINEAR_MODEL_TABLE["TweedieRegressor"] != NOT_SUPPORTED else (None,"TweedieRegressor"),
poisson_regression if SKLEARN_LINEAR_MODEL_TABLE["PoissonRegressor"] != NOT_SUPPORTED else (None,"PoissonRegressor"),
gamma_regression if SKLEARN_LINEAR_MODEL_TABLE["GammaRegressor"] != NOT_SUPPORTED else (None,"GammaRegressor")],
"SGD": [
sgd_regression,
sgd_classifier,
sgd_oneclass_svm if SKLEARN_LINEAR_MODEL_TABLE["SGDOneClassSVM"] != NOT_SUPPORTED else (None,"SGDOneClassSVM")],
"PERCEPTRON": [perceptron],
"PASSIVE_AGGRESSIVE_REGRESSION_AND_CLASSIFIER": [
passive_agressive_regressor,
passive_aggressive_classifier],
"ROBUSTNESS_REGRESSION": [
ransac_regression,
theil_sen_regression,
huber_regression],
"QUANTILE_REGRESSION": [quantile_regressor if SKLEARN_LINEAR_MODEL_TABLE["QuantileRegressor"] != NOT_SUPPORTED else (None,"QuantileRegressor")]}
@pytest.fixture(scope="session", autouse=True)
def reset_exported_models_directory():
exported_models_directory = os.path.join(
os.getcwd(), "tests", "exported_linear_models")
if not os.path.isdir(exported_models_directory):
os.mkdir(exported_models_directory)
return
for file_name in os.listdir(exported_models_directory):
# construct full file path
json_file = os.path.join(exported_models_directory, file_name)
if os.path.isfile(json_file):
os.remove(json_file)
def test_full():
for category in LINEAR_MODELS:
for model in LINEAR_MODELS[category]:
if isinstance(model, tuple):
func, model_name = model
if func == None:
print("Model: " + model_name + " is not supported in this python version.")
continue
model()
================================================
FILE: tests/test_misc_functionalities.py/test_batch.py
================================================
import os
import re
import random
import numpy as np
from pymilo import Export, Import
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
def test_batch_execution():
x_train, y_train, x_test, _ = prepare_simple_regression_datasets()
linear_regression = LinearRegression()
linear_regression.fit(x_train, y_train)
pre_models = [linear_regression]*100
exp_n = Export.batch_export(pre_models, os.getcwd())
imp_n, post_models = Import.batch_import(os.getcwd())
r_index = random.randint(0, len(post_models) - 1)
pre_result = pre_models[r_index].predict(x_test)
post_result = post_models[r_index].predict(x_test)
mse = mean_squared_error(post_result, pre_result)
pattern = re.compile(r'model_\d+\.json')
for filename in os.listdir(os.getcwd()):
if pattern.match(filename):
file_path = os.path.join(os.getcwd(), filename)
os.remove(file_path)
assert exp_n == imp_n and np.abs(mse) <= 10**(-8)
================================================
FILE: tests/test_ml_streaming/docker_files/Dockerfile1
================================================
# Use an official Python runtime as a parent image
FROM python:3.11-slim
# Set the working directory in the container
WORKDIR /app
# Install pymilo
RUN pip install pymilo[streaming]
EXPOSE 8000
CMD ["python", "-m", "pymilo", "--compression", "NULL", "--protocol", "REST", "--port", "8000", "--load", "https://raw.githubusercontent.com/openscilab/pymilo/main/tests/test_exceptions/valid_jsons/linear_regression.json"]
================================================
FILE: tests/test_ml_streaming/docker_files/Dockerfile2
================================================
# Use an official Python runtime as a parent image
FROM python:3.11-slim
# Set the working directory in the container
WORKDIR /app
# Install pymilo
RUN pip install pymilo[streaming]
COPY linear_regression.json /app/model.json
EXPOSE 8000
CMD ["python", "-m", "pymilo", "--compression", "NULL", "--protocol", "REST", "--port", "8000", "--load", "/app/model.json"]
================================================
FILE: tests/test_ml_streaming/run_server.py
================================================
import argparse
from sklearn.linear_model import LinearRegression
from pymilo.streaming import PymiloServer, Compression, CommunicationProtocol
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
def main():
parser = argparse.ArgumentParser(description='Run the Pymilo server with a specified compression method.')
parser.add_argument(
'--compression',
type=str,
choices=['NULL', 'GZIP', 'ZLIB', 'LZMA', 'BZ2'],
default='NULL',
help='Specify the compression method (NULL, GZIP, ZLIB, LZMA, or BZ2). Default is NULL.'
)
parser.add_argument(
'--protocol',
type=str,
choices=['REST', 'WEBSOCKET'],
default='REST',
help='Specify the communication protocol (REST or WEBSOCKET). Default is REST.'
)
parser.add_argument(
'--init',
action="store_true",
default=False,
help='the `init` command specifies whether or not initializing the PyMilo Server with a ML model.',
)
parser.add_argument(
'--port',
type=int,
default=None,
help='Override the default port.',
)
args = parser.parse_args()
communicator = None
if args.init:
port = args.port if args.port else 9000
x_train, y_train, _, _ = prepare_simple_regression_datasets()
linear_regression = LinearRegression()
linear_regression.fit(x_train, y_train)
ps = PymiloServer(
port=port,
compressor=Compression[args.compression],
communication_protocol=CommunicationProtocol[args.protocol],
)
sample_client_id = "0x_demo_client_id"
sample_ml_model_id = "0x_demo_ml_model_id"
ps.init_client(sample_client_id)
ps.init_ml_model(sample_client_id, sample_ml_model_id)
ps.set_ml_model(sample_client_id, sample_ml_model_id, linear_regression)
communicator = ps.communicator
else:
port = args.port if args.port else 8000
communicator = PymiloServer(
port=port,
compressor=Compression[args.compression],
communication_protocol=CommunicationProtocol[args.protocol],
).communicator
communicator.run()
if __name__ == '__main__':
main()
================================================
FILE: tests/test_ml_streaming/scenarios/scenario1.py
================================================
import numpy as np
from pymilo.streaming import PymiloClient, Compression, CommunicationProtocol
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
def scenario1(compression_method, communication_protocol):
# [PyMilo Server is not initialized with ML Model]
# 1. create model in local
# 2. train model in local
# 3. calculate mse before streaming
# 4. upload model to server
# 5. download model to local
# 6. calculate mse after streaming
# 1.
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
linear_regression = LinearRegression()
# 2.
linear_regression.fit(x_train, y_train)
client = PymiloClient(
model=linear_regression,
mode=PymiloClient.Mode.LOCAL,
compressor=Compression[compression_method],
communication_protocol=CommunicationProtocol[communication_protocol],
)
# 3. get client id + get ml model id [from remote server]
client.register()
client.register_ml_model()
# 4.
result = client.predict(x_test)
mse_before = mean_squared_error(y_test, result)
# 5.
client.upload()
# 6.
client.download()
# 7.
result = client.predict(x_test)
mse_after = mean_squared_error(y_test, result)
return np.abs(mse_after-mse_before)
================================================
FILE: tests/test_ml_streaming/scenarios/scenario2.py
================================================
import numpy as np
from pymilo.streaming import PymiloClient, Compression, CommunicationProtocol
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
def scenario2(compression_method, communication_protocol):
# [PyMilo Server is not initialized with ML Model]
# 1. create model in local
# 2. upload model to server
# 3. train model in server
# 4. calculate mse in server
# 5. download model to local
# 6. calculate mse in local
# 1.
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
linear_regression = LinearRegression()
client = PymiloClient(
model=linear_regression,
mode=PymiloClient.Mode.LOCAL,
compressor=Compression[compression_method],
communication_protocol=CommunicationProtocol[communication_protocol],
)
# 2. get client id + get ml model id [from remote server]
client.register()
client.register_ml_model()
# 3.
client.upload()
# 4.
client.toggle_mode(PymiloClient.Mode.DELEGATE)
client.fit(x_train, y_train)
remote_field = client.coef_
# 5.
result = client.predict(x_test)
mse_server = mean_squared_error(y_test, result)
# 6.
client.download()
# 7.
client.toggle_mode(mode=PymiloClient.Mode.LOCAL)
local_field = client.coef_
result = client.predict(x_test)
mse_local = mean_squared_error(y_test, result)
return np.abs(mse_server-mse_local) + np.abs(np.sum(local_field-remote_field))
================================================
FILE: tests/test_ml_streaming/scenarios/scenario3.py
================================================
import numpy as np
from sklearn.metrics import mean_squared_error
from pymilo.streaming import PymiloClient, Compression, CommunicationProtocol
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
def scenario3(compression_method, communication_protocol):
# [PyMilo Server is initialized with ML Model]
# 1. calculate mse in server
# 2. download model in local
# 3. calculate mse in local
# 4. compare results
# 1.
_, _, x_test, y_test = prepare_simple_regression_datasets()
client = PymiloClient(
mode=PymiloClient.Mode.LOCAL,
compressor=Compression[compression_method],
server_url="127.0.0.1:9000",
communication_protocol=CommunicationProtocol[communication_protocol],
)
client.client_id = "0x_demo_client_id"
client.ml_model_id = "0x_demo_ml_model_id"
client.toggle_mode(PymiloClient.Mode.DELEGATE)
result = client.predict(x_test)
mse_server = mean_squared_error(y_test, result)
# 2.
client.download()
# 3.
client.toggle_mode(mode=PymiloClient.Mode.LOCAL)
result = client.predict(x_test)
mse_local = mean_squared_error(y_test, result)
# 4.
return np.abs(mse_server-mse_local)
================================================
FILE: tests/test_ml_streaming/scenarios/scenario4.py
================================================
from pymilo.streaming import PymiloClient, Compression, CommunicationProtocol
from sklearn.linear_model import LinearRegression
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
def scenario4(compression_method, communication_protocol):
"""
Test access control management features between multiple clients.
This scenario tests:
1. Client registration/deregistration
2. Model registration/deregistration
3. get_ml_models
4. grant_access / revoke_access
5. get_allowance / get_allowed_models
"""
x_train, y_train, _, _ = prepare_simple_regression_datasets()
# Create and train a model locally
linear_regression = LinearRegression()
linear_regression.fit(x_train, y_train)
# Initialize client_a (model owner)
client_a = PymiloClient(
model=linear_regression,
mode=PymiloClient.Mode.LOCAL,
compressor=Compression[compression_method],
server_url="127.0.0.1:8500",
communication_protocol=CommunicationProtocol[communication_protocol],
)
# Initialize client_b (will be granted access)
client_b = PymiloClient(
mode=PymiloClient.Mode.LOCAL,
compressor=Compression[compression_method],
server_url="127.0.0.1:8500",
communication_protocol=CommunicationProtocol[communication_protocol],
)
# 1. Register both clients
client_a.register()
client_b.register()
assert client_a.client_id is not None, "client_a registration failed"
assert client_b.client_id is not None, "client_b registration failed"
assert client_a.client_id != client_b.client_id, "clients should have different IDs"
# 2. Register model for client_a
client_a.register_ml_model()
assert client_a.ml_model_id is not None, "model registration failed"
# 3. Test get_ml_models
models_a = client_a.get_ml_models()
assert client_a.ml_model_id in models_a, "registered model should appear in get_ml_models"
# 4. Upload model from client_a
client_a.upload()
# 5. Grant access from client_a to client_b (uses client_a.ml_model_id implicitly)
grant_result = client_a.grant_access(client_b.client_id)
assert grant_result is True, "grant_access should succeed"
# 6. Verify allowance updated
allowance = client_a.get_allowance()
assert isinstance(allowance, dict), "get_allowance should return a dict"
assert client_b.client_id in allowance, "client_b should be in allowance after grant"
assert client_a.ml_model_id in allowance[client_b.client_id], "model should be in allowance"
# 7. Test get_allowed_models (from client_b's perspective)
allowed_models = client_b.get_allowed_models(client_a.client_id)
assert client_a.ml_model_id in allowed_models, "model should be in allowed_models"
# 8. Revoke access (uses client_a.ml_model_id implicitly)
revoke_result = client_a.revoke_access(client_b.client_id)
assert revoke_result is True, "revoke_access should succeed"
# 9. Verify allowance updated after revoke
allowed_models_after_revoke = client_b.get_allowed_models(client_a.client_id)
assert client_a.ml_model_id not in allowed_models_after_revoke, "model should not be in allowed_models after revoke"
# 10. Test model deregistration
models_before_deregister = client_a.get_ml_models()
client_a.deregister_ml_model()
models_after_deregister = client_a.get_ml_models()
assert len(models_after_deregister) == len(models_before_deregister) - 1, "model count should decrease after deregister"
# 11. Test client deregistration
client_b.deregister()
client_a.deregister()
return 0
================================================
FILE: tests/test_ml_streaming/test_streaming.py
================================================
import os
import time
import pytest
import subprocess
from filecmp import cmp
from sys import executable
from scenarios.scenario1 import scenario1
from scenarios.scenario2 import scenario2
from scenarios.scenario3 import scenario3
from scenarios.scenario4 import scenario4
from pymilo.streaming.util import generate_dockerfile
@pytest.fixture(
scope="session",
params=["NULL", "GZIP", "ZLIB", "LZMA", "BZ2"])
def prepare_bare_server(request):
compression_method = request.param
# Using PyMilo direct CLI
# server_proc = subprocess.Popen(
# [
# executable,
# "-m", "pymilo",
# "--compression", compression_method,
# "--protocol", "REST",
# "--port", "8000",
# "--bare",
# ],
# )
path = os.path.join(
os.getcwd(),
"tests",
"test_ml_streaming",
"run_server.py",
)
server_proc = subprocess.Popen(
[
executable,
path,
"--compression", compression_method,
"--protocol", "REST"
],
)
time.sleep(10)
yield (compression_method, "REST")
server_proc.terminate()
@pytest.fixture(
scope="session",
params=["REST", "WEBSOCKET"])
def prepare_ml_server(request):
communication_protocol = request.param
compression_method = "ZLIB"
# Using PyMilo direct CLI
# server_proc = subprocess.Popen(
# [
# executable,
# "-m", "pymilo",
# "--compression", compression_method,
# "--protocol", communication_protocol,
# "--port", "9000",
# "--load", os.path.join(os.getcwd(), "tests", "test_exceptions", "valid_jsons", "linear_regression.json")
# # "--load", "https://raw.githubusercontent.com/openscilab/pymilo/main/tests/test_exceptions/valid_jsons/linear_regression.json",
# ],
# )
path = os.path.join(
os.getcwd(),
"tests",
"test_ml_streaming",
"run_server.py",
)
server_proc = subprocess.Popen(
[
executable,
path,
"--compression", compression_method,
"--protocol", communication_protocol,
"--init",
],
)
time.sleep(10)
yield (compression_method, communication_protocol)
server_proc.terminate()
@pytest.fixture(
scope="function",
params=["REST", "WEBSOCKET"])
def prepare_access_control_server(request):
communication_protocol = request.param
compression_method = "ZLIB"
path = os.path.join(
os.getcwd(),
"tests",
"test_ml_streaming",
"run_server.py",
)
server_proc = subprocess.Popen(
[
executable,
path,
"--compression", compression_method,
"--protocol", communication_protocol,
"--port", "8500",
],
)
time.sleep(10)
yield (compression_method, communication_protocol)
server_proc.terminate()
time.sleep(2)
def test1(prepare_bare_server):
compression_method, communication_protocol = prepare_bare_server
assert scenario1(compression_method, communication_protocol) == 0
def test2(prepare_bare_server):
compression_method, communication_protocol = prepare_bare_server
assert scenario2(compression_method, communication_protocol) == 0
def test3(prepare_ml_server):
compression_method, communication_protocol = prepare_ml_server
assert scenario3(compression_method, communication_protocol) == 0
def test4(prepare_access_control_server):
compression_method, communication_protocol = prepare_access_control_server
assert scenario4(compression_method, communication_protocol) == 0
def test_dockerfile():
docker_files_folder = os.path.join(
os.getcwd(),
"tests",
"test_ml_streaming",
"docker_files",
)
generate_dockerfile(
dockerfile_name="Dockerfile",
model_path="https://raw.githubusercontent.com/openscilab/pymilo/main/tests/test_exceptions/valid_jsons/linear_regression.json")
r1 = cmp('Dockerfile', os.path.join(
docker_files_folder,
"Dockerfile1"
)
)
generate_dockerfile(
dockerfile_name="Dockerfile",
model_path="linear_regression.json",
)
r2 = cmp('Dockerfile', os.path.join(
docker_files_folder,
"Dockerfile2"
)
)
os.remove(path='Dockerfile')
assert r1 and r2
================================================
FILE: tests/test_naive_bayes/bernoulli.py
================================================
from sklearn.naive_bayes import BernoulliNB
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "BernoulliNB"
def bernoulli_naive_bayes():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
bernoulli_naive_bayes = BernoulliNB().fit(x_train, y_train)
pymilo_classification_test(bernoulli_naive_bayes, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_naive_bayes/categorical.py
================================================
from sklearn.naive_bayes import CategoricalNB
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "CategoricalNB"
def categorical_naive_bayes():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
categorical_naive_bayes = CategoricalNB().fit(x_train, y_train)
pymilo_classification_test(categorical_naive_bayes, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_naive_bayes/complement.py
================================================
from sklearn.naive_bayes import ComplementNB
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "ComplementNB"
def complement_naive_bayes():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
complement_naive_bayes = ComplementNB().fit(x_train, y_train)
pymilo_classification_test(complement_naive_bayes, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_naive_bayes/gaussian.py
================================================
from sklearn.naive_bayes import GaussianNB
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "GaussianNB"
def gaussian_naive_bayes():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
gaussian_naive_bayes = GaussianNB().fit(x_train, y_train)
pymilo_classification_test(gaussian_naive_bayes, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_naive_bayes/multinomial.py
================================================
from sklearn.naive_bayes import MultinomialNB
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "MultinomialNB"
def multinomial_naive_bayes():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
multinomial_naive_bayes = MultinomialNB().fit(x_train, y_train)
pymilo_classification_test(multinomial_naive_bayes, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_naive_bayes/test_naive_bayes_models.py
================================================
import os
import pytest
from gaussian import gaussian_naive_bayes
from multinomial import multinomial_naive_bayes
from complement import complement_naive_bayes
from bernoulli import bernoulli_naive_bayes
from categorical import categorical_naive_bayes
NAIVE_BAYES_MODELS = [
gaussian_naive_bayes,
multinomial_naive_bayes,
complement_naive_bayes,
bernoulli_naive_bayes,
categorical_naive_bayes
]
@pytest.fixture(scope="session", autouse=True)
def reset_exported_models_directory():
exported_models_directory = os.path.join(
os.getcwd(), "tests", "exported_naive_bayes")
if not os.path.isdir(exported_models_directory):
os.mkdir(exported_models_directory)
return
for file_name in os.listdir(exported_models_directory):
# construct full file path
json_file = os.path.join(exported_models_directory, file_name)
if os.path.isfile(json_file):
os.remove(json_file)
def test_full():
for model in NAIVE_BAYES_MODELS:
model()
================================================
FILE: tests/test_neighbors/kneighbors_classifier.py
================================================
from sklearn.neighbors import KNeighborsClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "KNeighborsClassifier"
def kneighbors_classifier():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
kneighbors_classifier = KNeighborsClassifier(n_neighbors=3).fit(x_train, y_train)
pymilo_classification_test(kneighbors_classifier, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_neighbors/kneighbors_regressor.py
================================================
from sklearn.neighbors import KNeighborsRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "KNeighborsRegressor"
def kneighbors_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
kneighbors_regressor = KNeighborsRegressor(n_neighbors=2).fit(x_train, y_train)
pymilo_regression_test(kneighbors_regressor, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_neighbors/local_outlier_factor.py
================================================
from sklearn.neighbors import LocalOutlierFactor
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "LocalOutlierFactor"
def local_outlier_factor():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
local_outlier_factor = LocalOutlierFactor(n_neighbors=2, novelty= True).fit(x_train, y_train)
pymilo_classification_test(local_outlier_factor, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_neighbors/nearest_centroid.py
================================================
from sklearn.neighbors import NearestCentroid
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "NearestCentroid"
def nearest_centroid():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
nearest_centroid = NearestCentroid().fit(x_train, y_train)
pymilo_classification_test(nearest_centroid, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_neighbors/nearest_neighbor.py
================================================
from sklearn.neighbors import NearestNeighbors
from pymilo.utils.test_pymilo import pymilo_nearest_neighbor_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "NearestNeighbors"
def nearest_neighbor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
nearest_neighbor = NearestNeighbors(n_neighbors=2, radius=0.4).fit(x_train, y_train)
pymilo_nearest_neighbor_test(nearest_neighbor, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_neighbors/radius_neighbors_classifier.py
================================================
from sklearn.neighbors import RadiusNeighborsClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "RadiusNeighborsClassifier"
def radius_neighbors_classifier():
x_train, y_train, _, _ = prepare_simple_classification_datasets()
radius_neighbors_classifier = RadiusNeighborsClassifier(radius=1.0).fit(x_train, y_train)
pymilo_classification_test(radius_neighbors_classifier, MODEL_NAME, (x_train, y_train))
================================================
FILE: tests/test_neighbors/radius_neighbors_regressor.py
================================================
from sklearn.neighbors import RadiusNeighborsRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "RadiusNeighborsRegressor"
def radius_neighbors_regressor():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
radius_neighbors_regressor = RadiusNeighborsRegressor(radius=1.0).fit(x_train, y_train)
pymilo_regression_test(radius_neighbors_regressor, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_neighbors/test_neighbors.py
================================================
import os
import pytest
from kneighbors_regressor import kneighbors_regressor
from kneighbors_classifier import kneighbors_classifier
from radius_neighbors_regressor import radius_neighbors_regressor
from radius_neighbors_classifier import radius_neighbors_classifier
from nearest_neighbor import nearest_neighbor
from nearest_centroid import nearest_centroid
from local_outlier_factor import local_outlier_factor
NEIGHBORS = {
"KNeighbors": [kneighbors_regressor, kneighbors_classifier],
"RadiusNeighbors": [radius_neighbors_regressor, radius_neighbors_classifier],
"Nearests": [nearest_neighbor, nearest_centroid],
"LocalOutlierDetectors": [local_outlier_factor],
}
@pytest.fixture(scope="session", autouse=True)
def reset_exported_models_directory():
exported_models_directory = os.path.join(
os.getcwd(), "tests", "exported_neighbors")
if not os.path.isdir(exported_models_directory):
os.mkdir(exported_models_directory)
return
for file_name in os.listdir(exported_models_directory):
# construct full file path
json_file = os.path.join(exported_models_directory, file_name)
if os.path.isfile(json_file):
os.remove(json_file)
def test_full():
for category in NEIGHBORS:
for model in NEIGHBORS[category]:
model()
================================================
FILE: tests/test_neural_networks/bernoulli_rbm/bernoulli_rbm.py
================================================
import os
from sklearn import metrics
from sklearn.base import clone
from sklearn.pipeline import Pipeline
from sklearn.neural_network import BernoulliRBM
from sklearn.linear_model import LogisticRegression
from pymilo.pymilo_obj import Export
from pymilo.pymilo_obj import Import
from pymilo.utils.test_pymilo import pymilo_export_path
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "Bernoulli Restricted Boltzmann Machine (RBM)"
def bernoulli_rbm():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
# Create Bernoulli RBM object
logistic = LogisticRegression(solver="newton-cg", tol=1)
rbm = BernoulliRBM(random_state=0, verbose=True)
rbm_features_classifier = Pipeline(steps=[("rbm", rbm), ("logistic", logistic)])
# Hyper-parameters. These were set by cross-validation,
# using a GridSearchCV. Here we are not performing cross-validation to
# save time.
rbm.learning_rate = 0.06
rbm.n_iter = 10
# More components tend to give better prediction performance, but larger
# fitting time
rbm.n_components = 100
logistic.C = 6000
# Training RBM-Logistic Pipeline
rbm_features_classifier.fit(x_train, y_train)
# Training the Logistic regression classifier directly on the pixel
raw_pixel_classifier = clone(logistic)
raw_pixel_classifier.C = 100.0
raw_pixel_classifier.fit(x_train, y_train)
Y_pred = rbm_features_classifier.predict(x_test)
before_report = metrics.classification_report(y_test, Y_pred)
export_model_path = pymilo_export_path(rbm)
exported_model = Export(rbm)
exported_model_serialized_path = os.path.join(
os.getcwd(), "tests", export_model_path, MODEL_NAME + '.json')
exported_model.save(exported_model_serialized_path)
imported_model = Import(exported_model_serialized_path)
imported_rbm = imported_model.to_model()
logistic = LogisticRegression(solver="newton-cg", tol=1)
rbm_features_classifier = Pipeline(steps=[("rbm", imported_rbm), ("logistic", logistic)])
logistic.C = 6000
# Training RBM-Logistic Pipeline
rbm_features_classifier.fit(x_train, y_train)
# Training the Logistic regression classifier directly on the pixel
raw_pixel_classifier = clone(logistic)
raw_pixel_classifier.C = 100.0
raw_pixel_classifier.fit(x_train, y_train)
Y_pred = rbm_features_classifier.predict(x_test)
after_report = metrics.classification_report(y_test, Y_pred)
assert before_report == after_report
================================================
FILE: tests/test_neural_networks/mlp/mlp_classification.py
================================================
from sklearn.neural_network import MLPClassifier
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "Multi Layer Perceptron Classification"
def multi_layer_perceptron_classification():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
# Create MLPClassifier object
multi_layer_perceptron_classifier = MLPClassifier(random_state=1, max_iter=500).fit(x_train, y_train)
# Train the model using the training sets
multi_layer_perceptron_classifier.fit(x_train, y_train)
assert pymilo_classification_test(
multi_layer_perceptron_classifier, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_neural_networks/mlp/mlp_regression.py
================================================
from sklearn.neural_network import MLPRegressor
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "Multi Layer Perceptron Regression"
def multi_layer_perceptron_regression():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
multi_layer_perceptron_regression = MLPRegressor(random_state=1, max_iter=500).fit(x_train, y_train)
# Train the model using the training sets
multi_layer_perceptron_regression.fit(x_train, y_train)
assert pymilo_regression_test(
multi_layer_perceptron_regression, MODEL_NAME, (x_test, y_test)) == True
================================================
FILE: tests/test_neural_networks/test_neural_networks.py
================================================
import os
import pytest
from mlp.mlp_regression import multi_layer_perceptron_regression
from mlp.mlp_classification import multi_layer_perceptron_classification
from bernoulli_rbm.bernoulli_rbm import bernoulli_rbm
NEURAL_NETWORKS = {
"MLP_REGRESSION": [multi_layer_perceptron_regression, multi_layer_perceptron_classification],
"BERNOULLI_RBM": [bernoulli_rbm]
}
@pytest.fixture(scope="session", autouse=True)
def reset_exported_models_directory():
exported_models_directory = os.path.join(
os.getcwd(), "tests", "exported_neural_networks")
if not os.path.isdir(exported_models_directory):
os.mkdir(exported_models_directory)
return
for file_name in os.listdir(exported_models_directory):
# construct full file path
json_file = os.path.join(exported_models_directory, file_name)
if os.path.isfile(json_file):
os.remove(json_file)
def test_full():
for category in NEURAL_NETWORKS:
for model in NEURAL_NETWORKS[category]:
model()
================================================
FILE: tests/test_preprocessings/binarizer.py
================================================
from numpy import array_equal
from sklearn.preprocessing import Binarizer
from pymilo.utils.test_pymilo import report_status
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from util import get_path, write_and_read
MODEL_NAME = "Binarizer"
def binarizer():
X = [[ 1., -1., 2.],
[ 2., 0., 0.],
[ 0., 1., -1.]]
_binarizer = Binarizer().fit(X)
pre_result = _binarizer.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(_binarizer),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/function_transformer.py
================================================
from numpy import array_equal, log1p
from sklearn.preprocessing import FunctionTransformer
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from pymilo.utils.test_pymilo import report_status
from util import get_path, write_and_read
MODEL_NAME = "FunctionTransformer"
def function_transformer():
f = log1p
X = [[0, 1], [2, 3]]
_function_transformer = FunctionTransformer(f).fit(X)
pre_result = _function_transformer.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(_function_transformer),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/kbins_discretizer.py
================================================
from numpy import array_equal
from sklearn.preprocessing import KBinsDiscretizer
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from pymilo.utils.test_pymilo import report_status
from util import get_path, write_and_read
MODEL_NAME = "KBinsDiscretizer"
def kbins_discretizer():
X = [[-2, 1, -4, -1],
[-1, 2, -3, -0.5],
[ 0, 3, -2, 0.5],
[ 1, 4, -1, 2]]
est = KBinsDiscretizer(
n_bins=3, encode='ordinal', strategy='uniform'
)
est = est.fit(X)
pre_result = est.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(est),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/kernel_centerer.py
================================================
from numpy import array_equal
from sklearn.preprocessing import KernelCenterer
from sklearn.metrics.pairwise import pairwise_kernels
from pymilo.utils.test_pymilo import report_status
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from util import get_path, write_and_read
MODEL_NAME = "KernelCenterer"
def kernel_centerer():
X = [[ 1., -2., 2.],
[-2., 1., 3.],
[ 4., 1., -2.]]
kernel = pairwise_kernels(X, metric='linear')
_kernel_centerer = KernelCenterer().fit(kernel)
pre_result = _kernel_centerer.transform(kernel)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(_kernel_centerer),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(kernel)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/label_binarizer.py
================================================
from numpy import array_equal
from sklearn.preprocessing import LabelBinarizer
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from pymilo.utils.test_pymilo import report_status
from util import get_path, write_and_read
MODEL_NAME = "LabelBinarizer"
def label_binarizer():
X = ['yes', 'no', 'no', 'yes']
lb = LabelBinarizer().fit(X)
pre_result = lb.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(lb),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/label_encoder.py
================================================
from numpy import array_equal
from sklearn.preprocessing import LabelEncoder
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from pymilo.utils.test_pymilo import report_status
from util import get_path, write_and_read
MODEL_NAME = "LabelEncoder"
def label_encoder():
X = ["paris", "paris", "tokyo", "amsterdam"]
le = LabelEncoder().fit(X)
pre_result = le.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(le),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/max_abs_scaler.py
================================================
from numpy import array_equal
from sklearn.preprocessing import MaxAbsScaler
from pymilo.utils.test_pymilo import report_status
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from util import get_path, write_and_read
MODEL_NAME = "MaxAbsScaler"
def max_abs_scaler():
X = [[ 1., -1., 2.],
[ 2., 0., 0.],
[ 0., 1., -1.]]
_max_abs_scaler = MaxAbsScaler().fit(X)
pre_result = _max_abs_scaler.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(_max_abs_scaler),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/multilabel_binarizer.py
================================================
from numpy import array_equal
from sklearn.preprocessing import MultiLabelBinarizer
from pymilo.utils.test_pymilo import report_status
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from util import get_path, write_and_read
MODEL_NAME = "MultiLabelBinarizer"
def multilabel_binarizer():
X = [{'sci-fi', 'thriller'}, {'comedy'}]
mlb = MultiLabelBinarizer().fit(X)
pre_result = mlb.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(mlb),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/normalizer.py
================================================
from numpy import array_equal
from sklearn.preprocessing import Normalizer
from pymilo.utils.test_pymilo import report_status
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from util import get_path, write_and_read
MODEL_NAME = "Normalizer"
def normalizer():
X = [[4, 1, 2, 2],
[1, 3, 9, 3],
[5, 7, 5, 1]]
_normalizer = Normalizer().fit(X)
pre_result = _normalizer.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(_normalizer),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/one_hot_encoder.py
================================================
from numpy import array_equal
from sklearn.preprocessing import OneHotEncoder
from pymilo.utils.test_pymilo import report_status
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from util import get_path, write_and_read
MODEL_NAME = "OneHotEncoder"
def one_hot_encoder():
X = [['Male', 1], ['Female', 3], ['Female', 2]]
_one_hot_encoder = OneHotEncoder(handle_unknown='ignore').fit(X)
pre_result = _one_hot_encoder.transform(X).toarray()
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(_one_hot_encoder),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X).toarray()
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/ordinal_encoder.py
================================================
from numpy import array_equal
from sklearn.preprocessing import OrdinalEncoder
from pymilo.utils.test_pymilo import report_status
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from util import get_path, write_and_read
MODEL_NAME = "OrdinalEncoder"
def ordinal_encoder():
X = [['Male', 1], ['Female', 3], ['Female,', 2]]
_ordinal_encoder = OrdinalEncoder().fit(X)
pre_result = _ordinal_encoder.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(_ordinal_encoder),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/polynomial_features.py
================================================
from numpy import array_equal, arange
from sklearn.preprocessing import PolynomialFeatures
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from pymilo.utils.test_pymilo import report_status
from util import get_path, write_and_read
MODEL_NAME = "PolynomialFeatures"
def polynomial_features():
X = arange(6).reshape(3, 2)
_polynomial_features = PolynomialFeatures().fit(X)
pre_result = _polynomial_features.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(_polynomial_features),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/power_transformer.py
================================================
from numpy import array_equal
from sklearn.preprocessing import PowerTransformer
from pymilo.utils.test_pymilo import report_status
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from util import get_path, write_and_read
MODEL_NAME = "PowerTransformer"
def power_transformer():
power_transformer = PowerTransformer()
X = [[1, 2], [3, 2], [4, 5]]
power_transformer = power_transformer.fit(X)
pre_result = power_transformer.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(power_transformer),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/quantile_transformer.py
================================================
from numpy import array_equal, random, sort
from sklearn.preprocessing import QuantileTransformer
from pymilo.utils.test_pymilo import report_status
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from util import get_path, write_and_read
MODEL_NAME = "QuantileTransformer"
def quantile_transformer():
rng = random.RandomState(0)
X = sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)
_quantile_transformer = QuantileTransformer(n_quantiles=10, random_state=0).fit(X)
pre_result = _quantile_transformer.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(_quantile_transformer),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/robust_scaler.py
================================================
from numpy import array_equal
from sklearn.preprocessing import RobustScaler
from pymilo.utils.test_pymilo import report_status
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from util import get_path, write_and_read
MODEL_NAME = "RobustScaler"
def robust_scaler():
X = [[ 1., -2., 2.],
[ -2., 1., 3.],
[ 4., 1., -2.]]
_robust_scaler = RobustScaler().fit(X)
pre_result = _robust_scaler.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(_robust_scaler),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/spline_transformer.py
================================================
from numpy import array_equal, arange
from sklearn.preprocessing import SplineTransformer
from pymilo.utils.test_pymilo import report_status
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from util import get_path, write_and_read
MODEL_NAME = "SplineTransformer"
def spline_transformer():
X = arange(6).reshape(6, 1)
spline = SplineTransformer(degree=2, n_knots=3)
pre_result = spline.fit_transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(spline),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.fit_transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/standard_scaler.py
================================================
from numpy import array_equal
from sklearn.preprocessing import StandardScaler
from pymilo.utils.test_pymilo import report_status
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from util import get_path, write_and_read
MODEL_NAME = "StandardScaler"
def standard_scaler():
X = [[0, 0], [0, 0], [1, 1], [1, 1]]
_standard_scaler = StandardScaler().fit(X)
pre_result = _standard_scaler.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(_standard_scaler),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/target_encoder.py
================================================
from numpy import array_equal, array
from sklearn.preprocessing import TargetEncoder
from pymilo.transporters.preprocessing_transporter import PreprocessingTransporter
from pymilo.utils.test_pymilo import report_status
from util import get_path, write_and_read
MODEL_NAME = "TargetEncoder"
def target_encoder():
X = array([["dog"] * 20 + ["cat"] * 30 + ["snake"] * 38], dtype=object).T
y = [90.3] * 5 + [80.1] * 15 + [20.4] * 5 + [20.1] * 25 + [21.2] * 8 + [49] * 30
enc_auto = TargetEncoder(smooth="auto")
enc_auto = enc_auto.fit(X, y)
pre_result = enc_auto.transform(X)
pt = PreprocessingTransporter()
post_pymilo_pre_model = pt.deserialize_pre_module(
write_and_read(
pt.serialize_pre_module(enc_auto),
get_path(MODEL_NAME)))
post_result = post_pymilo_pre_model.transform(X)
comparison_result = array_equal(pre_result, post_result)
report_status(comparison_result, MODEL_NAME)
assert comparison_result
================================================
FILE: tests/test_preprocessings/test_preprocessings.py
================================================
import os
import pytest
from pymilo.pymilo_param import SKLEARN_PREPROCESSING_TABLE, NOT_SUPPORTED
from one_hot_encoder import one_hot_encoder
from label_binarizer import label_binarizer
from label_encoder import label_encoder
from standard_scaler import standard_scaler
from binarizer import binarizer
from function_transformer import function_transformer
from kernel_centerer import kernel_centerer
from multilabel_binarizer import multilabel_binarizer
from max_abs_scaler import max_abs_scaler
from normalizer import normalizer
from ordinal_encoder import ordinal_encoder
from polynomial_features import polynomial_features
from robust_scaler import robust_scaler
from quantile_transformer import quantile_transformer
from kbins_discretizer import kbins_discretizer
from power_transformer import power_transformer
if SKLEARN_PREPROCESSING_TABLE["SplineTransformer"] != NOT_SUPPORTED:
from spline_transformer import spline_transformer
if SKLEARN_PREPROCESSING_TABLE["TargetEncoder"] != NOT_SUPPORTED:
from target_encoder import target_encoder
PREPROCESSINGS = [one_hot_encoder,
label_binarizer,
label_encoder,
standard_scaler,
binarizer,
function_transformer,
kernel_centerer,
multilabel_binarizer,
max_abs_scaler,
normalizer,
ordinal_encoder,
polynomial_features,
robust_scaler,
quantile_transformer,
kbins_discretizer,
power_transformer,
spline_transformer if SKLEARN_PREPROCESSING_TABLE["SplineTransformer"] != NOT_SUPPORTED else (None, "SplineTransformer"),
target_encoder if SKLEARN_PREPROCESSING_TABLE["TargetEncoder"] != NOT_SUPPORTED else (None, "TargetEncoder")
]
@pytest.fixture(scope="session", autouse=True)
def reset_exported_models_directory():
exported_models_directory = os.path.join(
os.getcwd(), "tests", "exported_preprocessings")
if not os.path.isdir(exported_models_directory):
os.mkdir(exported_models_directory)
return
for file_name in os.listdir(exported_models_directory):
# construct full file path
json_file = os.path.join(exported_models_directory, file_name)
if os.path.isfile(json_file):
os.remove(json_file)
def test_full():
for model in PREPROCESSINGS:
if isinstance(model, tuple):
func, model_name = model
if func == None:
print("Model: " + model_name + " is not supported in this python version.")
continue
model()
================================================
FILE: tests/test_preprocessings/util.py
================================================
import os
import json
def write_and_read(serialized_model, file_addr):
with open(file_addr, 'w') as fp:
fp.write(json.dumps(serialized_model, indent=4))
with open(file_addr, 'r') as fp:
return json.load(fp)
def get_path(model_name):
return os.path.join(os.getcwd(), "tests", "exported_preprocessings", model_name + ".json")
================================================
FILE: tests/test_svms/linear_svc.py
================================================
from sklearn.svm import LinearSVC
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "LinearSVC"
def linear_svc():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
linear_svc = LinearSVC(random_state=0, tol=1e-5).fit(x_train, y_train)
pymilo_classification_test(linear_svc, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_svms/linear_svr.py
================================================
from sklearn.svm import LinearSVR
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "LinearSVR"
def linear_svr():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
linear_svr = LinearSVR(random_state=3, tol=1e-5).fit(x_train, y_train)
pymilo_regression_test(linear_svr, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_svms/nu_svc.py
================================================
from sklearn.svm import NuSVC
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "NuSVC"
def nu_svc():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
nu_svc = NuSVC().fit(x_train, y_train)
pymilo_classification_test(nu_svc, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_svms/nu_svr.py
================================================
from sklearn.svm import NuSVR
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "NuSVC"
def nu_svr():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
nu_svr = NuSVR(C=1.0, nu=0.1).fit(x_train, y_train)
pymilo_regression_test(nu_svr, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_svms/one_class_svm.py
================================================
from sklearn.svm import OneClassSVM
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "OneClassSVM"
def one_class_svm():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
one_class_svm = OneClassSVM(gamma='auto').fit(x_train, y_train)
pymilo_classification_test(one_class_svm, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_svms/svc.py
================================================
from sklearn.svm import SVC
from pymilo.utils.test_pymilo import pymilo_classification_test
from pymilo.utils.data_exporter import prepare_simple_classification_datasets
MODEL_NAME = "SVC"
def svc():
x_train, y_train, x_test, y_test = prepare_simple_classification_datasets()
svc = SVC(gamma='auto').fit(x_train, y_train)
pymilo_classification_test(svc, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_svms/svr.py
================================================
from sklearn.svm import SVR
from pymilo.utils.test_pymilo import pymilo_regression_test
from pymilo.utils.data_exporter import prepare_simple_regression_datasets
MODEL_NAME = "SVR"
def svr():
x_train, y_train, x_test, y_test = prepare_simple_regression_datasets()
svr = SVR(C=1.0, epsilon=0.2).fit(x_train, y_train)
pymilo_regression_test(svr, MODEL_NAME, (x_test, y_test))
================================================
FILE: tests/test_svms/test_svms.py
================================================
import os
import pytest
from linear_svc import linear_svc
from linear_svr import linear_svr
from nu_svc import nu_svc
from nu_svr import nu_svr
from one_class_svm import one_class_svm
from svc import svc
from svr import svr
SVMS = {
"LINEAR": [linear_svc, linear_svr],
"Nu": [nu_svc, nu_svr],
"ONE_CLASS": [one_class_svm],
"SVC": [svc],
"SVR": [svr],
}
@pytest.fixture(scope="session", autouse=True)
def reset_exported_models_directory():
exported_models_directory = os.path.join(
os.getcwd(), "tests", "exported_svms")
if not os.path.isdir(exported_models_directory):
os.mkdir(exported_models_directory)
return
for file_name in os.listdir(exported_models_directory):
# construct full file path
json_file = os.path.join(exported_models_directory, file_name)
if os.path.isfile(json_file):
os.remove(json_file)
def test_full():
for category in SVMS:
for model in SVMS[category]:
model()