Copy disabled (too large)
Download .txt
Showing preview only (12,075K chars total). Download the full file to get everything.
Repository: Diyago/ML-DL-scripts
Branch: master
Commit: e0f4511c7109
Files: 247
Total size: 43.6 MB
Directory structure:
gitextract_2g5lvqpj/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── Bug_report.md
│ │ ├── Feature_request.md
│ │ └── custom.md
│ └── workflows/
│ └── label.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── DEEP LEARNING/
│ ├── Autoencoders GANS/
│ │ ├── GAN-for-tabular-data/
│ │ │ ├── CODE_OF_CONDUCT.md
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── ctgan/
│ │ │ │ ├── README.MD
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __main__.py
│ │ │ │ ├── conditional.py
│ │ │ │ ├── data.py
│ │ │ │ ├── demo.py
│ │ │ │ ├── models.py
│ │ │ │ ├── sampler.py
│ │ │ │ ├── synthesizer.py
│ │ │ │ └── transformer.py
│ │ │ ├── encoders.py
│ │ │ ├── model.py
│ │ │ ├── results/
│ │ │ │ └── fit_predict_scores.txt
│ │ │ ├── run_experiment.py
│ │ │ └── utils.py
│ │ └── pytorch/
│ │ ├── CGAN/
│ │ │ └── ConditionalGAN.py
│ │ ├── DCGAN/
│ │ │ └── dcgan.py
│ │ ├── ProgressiveGAN/
│ │ │ ├── README.md
│ │ │ ├── progan_modules.py
│ │ │ └── train.py
│ │ ├── Semi-supervised GAN/
│ │ │ ├── Datasets.py
│ │ │ ├── ImprovedGAN.py
│ │ │ ├── Nets.py
│ │ │ ├── README.md
│ │ │ └── functional.py
│ │ └── VAE/
│ │ └── VAR mnist.py
│ ├── Google Landmark Retrieval Challenge.py
│ ├── Kaggle Avito Demand Prediction Challenge/
│ │ ├── README.MD
│ │ ├── image feat. extraction/
│ │ │ ├── avito_deepIQA/
│ │ │ │ └── deepIQA/
│ │ │ │ ├── LICENSE
│ │ │ │ ├── README.md
│ │ │ │ ├── evaluate.py
│ │ │ │ ├── evaluate_back.py
│ │ │ │ ├── fr_model.py
│ │ │ │ └── nr_model.py
│ │ │ ├── neural-image-assessment/
│ │ │ │ ├── README.md
│ │ │ │ ├── evaluate_inception_resnet.py
│ │ │ │ ├── evaluate_mobilenet.py
│ │ │ │ ├── evaluate_nasnet.py
│ │ │ │ └── utils/
│ │ │ │ ├── check_dataset.py
│ │ │ │ ├── data_loader.py
│ │ │ │ ├── nasnet.py
│ │ │ │ └── score_utils.py
│ │ │ └── nn_image_features.py
│ │ ├── stem to SVD.py
│ │ └── text embeddings.py
│ ├── NLP/
│ │ ├── Kaggle Quora Insincere Questions Classification/
│ │ │ ├── 3rd-place.py
│ │ │ ├── README.MD
│ │ │ └── fix misspellings.py
│ │ ├── LSTM RNN/
│ │ │ ├── Next Chars pytorch/
│ │ │ │ ├── Char level RNN/
│ │ │ │ │ └── data/
│ │ │ │ │ └── anna.txt
│ │ │ │ └── project-tv-script-generation/
│ │ │ │ ├── data/
│ │ │ │ │ └── Seinfeld_Scripts.txt
│ │ │ │ ├── helper.py
│ │ │ │ └── problem_unittests.py
│ │ │ └── Sentiment pytorch/
│ │ │ ├── labels.txt
│ │ │ └── reviews.txt
│ │ ├── WSDM - Fake News Classification/
│ │ │ └── Berd generate embeddings/
│ │ │ ├── 0_bert_encode_en_train.py
│ │ │ ├── 1_bert_encode_en_test.py
│ │ │ ├── 2_bert_encode_ch_train.py
│ │ │ ├── 3_bert_encode_ch_test.py
│ │ │ └── 4_gen_encoded_dfs.py
│ │ ├── elmo EMBEDDINGS/
│ │ │ └── Sentence encode.html
│ │ └── text analyses/
│ │ └── Logistic regression with words and char n-grams.py
│ ├── Object detection/
│ │ ├── YOLO Object Localization Keras/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── font/
│ │ │ │ ├── FiraMono-Medium.otf
│ │ │ │ └── SIL Open Font License.txt
│ │ │ ├── model_data/
│ │ │ │ ├── coco_classes.txt
│ │ │ │ ├── object_classes.txt
│ │ │ │ └── yolo_anchors.txt
│ │ │ ├── requirements.txt
│ │ │ ├── yad2k/
│ │ │ │ └── utils/
│ │ │ │ ├── __init__.py
│ │ │ │ └── utils.py
│ │ │ ├── yolo_run.py
│ │ │ └── yolo_utils.py
│ │ └── keras retinanet/
│ │ └── train.py
│ ├── Pytorch from scratch/
│ │ ├── CNN/
│ │ │ └── project-dog-classification/
│ │ │ ├── README.md
│ │ │ └── haarcascades/
│ │ │ └── haarcascade_frontalface_alt.xml
│ │ ├── MLP/
│ │ │ ├── fc_model.py
│ │ │ └── helper.py
│ │ ├── TODO/
│ │ │ └── GAN/
│ │ │ ├── cycle-gan/
│ │ │ │ ├── helpers.py
│ │ │ │ └── samples_cyclegan/
│ │ │ │ └── samples_dir.txt
│ │ │ └── project-face-generation/
│ │ │ └── problem_unittests.py
│ │ └── word2vec-embeddings/
│ │ ├── data/
│ │ │ └── download_data.txt
│ │ └── utils.py
│ └── segmentation/
│ ├── Kaggle TGS Salt Identification Challenge/
│ │ ├── README.MD
│ │ ├── v1/
│ │ │ ├── data_loader.py
│ │ │ ├── data_process/
│ │ │ │ ├── 10fold/
│ │ │ │ │ └── test.txt
│ │ │ │ └── transform.py
│ │ │ ├── evaluate.py
│ │ │ ├── loss/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bce_losses.py
│ │ │ │ ├── cyclic_lr.py
│ │ │ │ └── lovasz_losses.py
│ │ │ ├── main.py
│ │ │ ├── model/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ibnnet.py
│ │ │ │ ├── model.py
│ │ │ │ └── senet.py
│ │ │ └── utils.py
│ │ ├── v2/
│ │ │ ├── common_blocks/
│ │ │ │ ├── augmentation.py
│ │ │ │ ├── callbacks.py
│ │ │ │ ├── loaders.py
│ │ │ │ ├── metrics.py
│ │ │ │ ├── models.py
│ │ │ │ ├── pipelines.py
│ │ │ │ ├── pnasnet.py
│ │ │ │ ├── postprocessing.py
│ │ │ │ ├── preprocessing.py
│ │ │ │ ├── resnext.py
│ │ │ │ ├── unet_models.py
│ │ │ │ └── utils.py
│ │ │ ├── configs/
│ │ │ │ └── neptune.yaml
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bn.py
│ │ │ │ ├── build.py
│ │ │ │ ├── build.sh
│ │ │ │ ├── functions.py
│ │ │ │ ├── misc.py
│ │ │ │ ├── residual.py
│ │ │ │ ├── src/
│ │ │ │ │ ├── common.h
│ │ │ │ │ ├── inplace_abn.cpp
│ │ │ │ │ ├── inplace_abn.h
│ │ │ │ │ ├── inplace_abn_cpu.cpp
│ │ │ │ │ └── inplace_abn_cuda.cu
│ │ │ │ └── wider_resnet.py
│ │ │ └── results.ods
│ │ └── vanilla unet/
│ │ └── utils/
│ │ ├── cyclelr_callback.py
│ │ ├── lovasz_losses_tf.py
│ │ └── zf_unet_224_model.py
│ ├── Segmentation pipeline/
│ │ ├── README.MD
│ │ ├── get dataset.py
│ │ ├── segmentation pipeline.html
│ │ ├── segmentation pipeline.py
│ │ └── weights/
│ │ └── .gitkeep
│ ├── Severstal-Steel-Defect-Detection-master/
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── classification_pytorch_dummy.py
│ │ ├── common_blocks/
│ │ │ ├── __init__.py
│ │ │ ├── bam.py
│ │ │ ├── cbam.py
│ │ │ ├── dataloader.py
│ │ │ ├── generate_folds.py
│ │ │ ├── logger.py
│ │ │ ├── losses.py
│ │ │ ├── lovasz_losses.py
│ │ │ ├── metric.py
│ │ │ ├── new_metrics.py
│ │ │ ├── optimizers.py
│ │ │ ├── training_helper.py
│ │ │ └── utils.py
│ │ ├── configs/
│ │ │ ├── __init__.py
│ │ │ └── train_params.py
│ │ ├── inference.py
│ │ ├── model_resnet.py
│ │ └── train.py
│ └── Understanding-Clouds-from-Satellite-Images-master/
│ ├── .gitattributes
│ ├── .gitignore
│ ├── README.md
│ ├── augs.py
│ ├── callbacks.py
│ ├── config.py
│ ├── dataset.py
│ ├── inference_blend.py
│ ├── losses/
│ │ ├── losses.py
│ │ └── lovasz_losses.py
│ ├── optimizers.py
│ ├── predict.py
│ ├── schedulers.py
│ ├── train.py
│ ├── train.sh
│ └── utils.py
├── LICENSE
├── README.md
├── _config.yml
├── classification/
│ ├── Kaggle Home Credit Default Risk/
│ │ └── README.MD
│ ├── Kaggle Malware Prediction/
│ │ ├── README.MD
│ │ ├── kaggle.py
│ │ ├── models.py
│ │ ├── models_zoo.py
│ │ ├── oof_preds_level_1/
│ │ │ └── readme.md
│ │ ├── target_encoding.py
│ │ ├── test_preds_level_1/
│ │ │ └── readme.md
│ │ └── test_preds_level_2/
│ │ └── readme.md
│ ├── Kaggle Petfinder/
│ │ ├── 8th-place-solution-code.py
│ │ └── README.MD
│ └── Kaggle red hat user/
│ └── README.MD
├── deployment/
│ ├── docker flask fit predict/
│ │ ├── Dockerfile
│ │ ├── README.MD
│ │ ├── docker-compose.yml
│ │ ├── hello.py
│ │ ├── templates/
│ │ │ └── submit.html
│ │ └── train_model.py
│ └── ds docker db template/
│ ├── README.md
│ ├── docker/
│ │ ├── jupyter/
│ │ │ ├── Dockerfile
│ │ │ └── requirements.txt
│ │ └── postgres/
│ │ ├── Dockerfile
│ │ └── initdb.sql
│ └── docker-compose.yml
├── general studies/
│ ├── finetune gbm.md
│ ├── finetune xgb.md
│ └── get feature importance.py
├── images/
│ └── road-detection
├── recommendations/
│ └── ods_course/
│ ├── README.md
│ ├── competition/
│ │ ├── requirements.txt
│ │ └── tools.py
│ ├── lecture_2/
│ │ └── requirements.txt
│ ├── lecture_4/
│ │ ├── Dockerfile
│ │ ├── Readme.md
│ │ ├── ann/
│ │ │ ├── __init__.py
│ │ │ └── recommender.py
│ │ ├── config/
│ │ │ ├── __init__.py
│ │ │ ├── config.py
│ │ │ └── config.yaml
│ │ ├── main.py
│ │ └── pyproject.toml
│ └── lecture_5/
│ ├── README.md
│ ├── requirements.txt
│ └── tools.py
├── regression/
│ └── kaggle santander value prediction/
│ └── README.md
└── time series regression/
├── ARIMA/
│ ├── AR.py
│ ├── ARIMA.py
│ ├── ARMA.py
│ ├── ARMA_IBMstock.py
│ └── MA.py
├── Data Files/
│ ├── DJIA_Jan2016_Dec2016.xlsx
│ ├── Data Files
│ ├── Monthly_CO2_Concentrations.xlsx
│ ├── World Bank Mobile Phone Statistics.xlsx
│ ├── inflation-consumer-prices-annual.xlsx
│ └── mean-daily-temperature-fisher-river.xlsx
├── anomaly detection/
│ ├── README.md
│ ├── anomaly-detection-using-facebook-s-prophet.py
│ └── sunspots.txt
└── autocorelation, mov avg etc/
├── decomposition.py
├── doubleExponentialSmoothing.py
├── simpleExponentialSmoothing.py
└── tripleExponentialSmoothing.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/ISSUE_TEMPLATE/Bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error
**Expected behavior**
A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.
**Desktop (please complete the following information):**
- OS: [e.g. iOS]
- Browser [e.g. chrome, safari]
- Version [e.g. 22]
**Smartphone (please complete the following information):**
- Device: [e.g. iPhone6]
- OS: [e.g. iOS8.1]
- Browser [e.g. stock browser, safari]
- Version [e.g. 22]
**Additional context**
Add any other context about the problem here.
================================================
FILE: .github/ISSUE_TEMPLATE/Feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.
================================================
FILE: .github/ISSUE_TEMPLATE/custom.md
================================================
---
name: Custom issue template
about: Describe this issue template's purpose here.
title: ''
labels: ''
assignees: ''
---
================================================
FILE: .github/workflows/label.yml
================================================
# This workflow will triage pull requests and apply a label based on the
# paths that are modified in the pull request.
#
# To use this workflow, you will need to set up a .github/labeler.yml
# file with configuration. For more information, see:
# https://github.com/actions/labeler/blob/master/README.md
name: Labeler
on: [pull_request]
jobs:
label:
runs-on: ubuntu-latest
steps:
- uses: actions/labeler@v2
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Data files
*.csv
*.tsv
*.json
*.parquet
*.feather
*.pkl
*.pickle
# ML model artifacts
*.h5
*.hdf5
*.onnx
*.pt
*.pth
*.ckpt
*.safetensors
*.pb
saved_model/
checkpoints/
models/
# Distribution / packaging
.Python
env/
venv/
.venv/
ENV/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
*.rar
*.zip
*.tar.gz
# PyInstaller
*.manifest
*.spec
*.swp
*.pyc
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
requirements.txt.lock
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Logs
*.log
logs/
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
*.ipynb
# IDEs
.idea/
.vscode/
*.sublime-project
*.sublime-workspace
*.iml
# OS files
.DS_Store
Thumbs.db
desktop.ini
# Environment files
.env
.env.local
*.env.yaml
*.env.json
# TensorBoard
runs/
tensorboard/
# Weights & Biases
wandb/
# MLflow
mlruns/
mlartifacts/
================================================
FILE: .pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/psf/black
rev: 24.1.0
hooks:
- id: black
language_version: python3.11
================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at diyago@ya.ru. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
[homepage]: http://contributor-covenant.org
[version]: http://contributor-covenant.org/version/1/4/
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to ML-DL-scripts
Thank you for your interest in contributing! This repository contains Machine Learning and Deep Learning scripts, Kaggle solutions, and educational materials.
## How to Contribute
### Reporting Issues
If you find a bug or have a suggestion:
1. Check if the issue already exists
2. Open a new issue with a clear description
3. Include code examples and error messages if applicable
### Submitting Changes
1. Fork the repository
2. Create a feature branch (`git checkout -b feature/your-feature`)
3. Make your changes
4. Ensure code follows the existing style (Black formatter)
5. Commit with clear messages (`git commit -m "Add: description"`)
6. Push to your fork (`git push origin feature/your-feature`)
7. Open a Pull Request
### Code Guidelines
- Use Python 3.7+
- Follow PEP 8 style guidelines
- Use Black for code formatting
- Add docstrings for functions and classes
- Include examples where helpful
- Keep notebooks clean (clear outputs before committing)
### Project Structure
Place new scripts in appropriate folders:
- `classification/` – Classification algorithms
- `regression/` – Regression models
- `clustering/` – Clustering methods
- `DEEP LEARNING/` – Neural network implementations
- `time series regression/` – Time series analysis
- `statistics/` – Statistical tools
- `deployment/` – Docker and deployment examples
## Questions?
Contact via Telegram: [@ai_tablet](https://t.me/ai_tablet)
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at issues. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/README.md
================================================
[](https://github.com/psf/black) [](https://opensource.org/licenses/Apache-2.0)
# GANs for tabular data
We well know GANs for success in the realistic image generation. However, they can be applied in tabular data generation. We will review and examine some recent papers about tabular GANs in action.
Medium post: [GANs for tabular data](https://towardsdatascience.com/review-of-gans-for-tabular-data-a30a2199342)
## Used datasets and expriment design
**Task formalization**
Let say we have **T_train** and **T_test** (train and test set respectively). We need to train the model on **T_train** and make predictions on **T_test**. However, we will increase the train by generating new data by GAN, somehow similar to **T_test**, without using ground truth labels of it.
**Experiment design**
Let say we have **T_train** and **T_test** (train and test set respectively). The size of **T_train** is smaller and might have different data distribution. First of all, we train CTGAN on **T_train** with ground truth labels (step 1), then generate additional data **T_synth** (step 2). Secondly, we train boosting in an adversarial way on concatenated **T_train** and **T_synth** (target set to 0) with **T_test** (target set to 1) (steps 3 & 4). The goal is to apply newly trained adversarial boosting to obtain rows more like **T_test**. Note - initial ground truth labels aren't used for adversarial training. As a result, we take top rows from **T_train** and **T_synth** sorted by correspondence to **T_test** (steps 5 & 6), and train new boosting on them and check results on **T_test**.

**Picture 1.1** Experiment design and workflow
Of course for the benchmark purposes we will test ordinal training without these tricks and another original pipeline but without CTGAN (in step 3 we won't use **T_sync**).
**Datasets**
All datasets came from different domains. They have a different number of observations, number of categorical and numerical features.
The objective for all datasets - binary classification.
Preprocessing of datasets were simple: removed all time-based columns from datasets.
Remaining columns were either categorical or numerical.
**Table 1.1** Used datasets
| Name | Total points | Train points | Test points | Number of features | Number of categorical features | Short description |
| :--- | :---: | :---: | :---: | :---: | :---: | :---: |
| [Telecom](https://www.kaggle.com/blastchar/telco-customer-churn) | 7.0k | 4.2k | 2.8k | 20 | 16 | Churn prediction for telecom data |
| [Adult](https://www.kaggle.com/wenruliu/adult-income-dataset) | 48.8k | 29.3k | 19.5k | 15 | 8 | Predict if persons' income is bigger 50k |
| [Employee](https://www.kaggle.com/c/amazon-employee-access-challenge/data) | 32.7k | 19.6k | 13.1k | 10 | 9 | Predict an employee's access needs, given his/her job role|
| [Credit](https://www.kaggle.com/c/home-credit-default-risk/data) | 307.5k | 184.5k | 123k | 121 | 18 | Loan repayment |
| [Mortgages](https://www.crowdanalytix.com/contests/propensity-to-fund-mortgages) | 45.6k | 27.4k | 18.2k | 20 | 9 | Predict if house mortgage is founded |
| [Taxi](https://www.crowdanalytix.com/contests/mckinsey-big-data-hackathon) | 892.5k | 535.5k | 357k | 8 | 5 | Predict the probability of an offer being accepted by a certain driver |
| [Poverty_A](https://www.drivendata.org/competitions/50/worldbank-poverty-prediction/page/99/) | 37.6k | 22.5k | 15.0k | 41 | 38 | Predict whether or not a given household for a given country is poor or not |
## Results
To determine the best encoderthe ROC AUC scores of each dataset were scaled (min-max scale) and then averaged results among the dataset.
To determine the best validation strategy, I compared the top score of each dataset for each type of validation.
**Table 1.2** Different sampling results across the dataset, higher is better (100% - maximum per dataset ROC AUC)
| dataset_name | None | gan | sample_original |
|:-----------------------|-------------------:|------------------:|------------------------------:|
| credit | 0.997 | **0.998** | 0.997 |
| employee | **0.986** | 0.966 | 0.972 |
| mortgages | 0.984 | 0.964 | **0.988** |
| poverty_A | 0.937 | **0.950** | 0.933 |
| taxi | 0.966 | 0.938 | **0.987** |
| adult | 0.995 | 0.967 | **0.998** |
| telecom | **0.995** | 0.868 | 0.992 |
**Table 1.3** Different sampling results, higher is better for a mean (ROC AUC), lower is better for std (100% - maximum per dataset ROC AUC)
| sample_type | mean | std |
|:----------------|---------:|----------:|
| None | 0.980 | 0.036 |
| gan | 0.969 | 0.06 |
| sample_original | **0.981** | **0.032** |
**Table 1.4** same_target_prop is equal 1 then the target rate for train and test are different no more than 5%. Higher is better.
| sample_type | same_target_prop | prop_test_score |
|:----------------|-------------------:|------------------:|
| None | 0 | 0.964 |
| None | 1 | 0.985 |
| gan | 0 | 0.966 |
| gan | 1 | 0.945 |
| sample_original | 0 | 0.973 |
| sample_original | 1 | 0.984 |
## References
[1] Jonathan Hui. GAN — What is Generative Adversarial Networks GAN? (2018), medium article
[2]Ian J. Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, Yoshua Bengio. Generative Adversarial Networks (2014). arXiv:1406.2661
[3] Lei Xu LIDS, Kalyan Veeramachaneni. Synthesizing Tabular Data using Generative Adversarial Networks (2018). arXiv:1811.11264v1 [cs.LG]
[4] Lei Xu, Maria Skoularidou, Alfredo Cuesta-Infante, Kalyan Veeramachaneni. Modeling Tabular Data using Conditional GAN (2019). arXiv:1907.00503v2 [cs.LG]
[5] Denis Vorotyntsev. Benchmarking Categorical Encoders (2019). Medium post
[6] Insaf Ashrapov. GAN-for-tabular-data (2020). Github repository.
[7] Tero Karras, Samuli Laine, Miika Aittala, Janne Hellsten, Jaakko Lehtinen, Timo Aila. Analyzing and Improving the Image Quality of StyleGAN (2019) arXiv:1912.04958v2 [cs.CV]
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/README.MD
================================================
REFERENCE (initial code): https://github.com/sdv-dev/CTGAN
<p align="left">
<img width=15% src="https://dai.lids.mit.edu/wp-content/uploads/2018/06/Logo_DAI_highres.png" alt=“sdv-dev” />
<i>An open source project from Data to AI Lab at MIT.</i>
</p>
[](https://pypi.org/search/?c=Development+Status+%3A%3A+2+-+Pre-Alpha)
[](https://pypi.python.org/pypi/ctgan)
[](https://travis-ci.org/sdv-dev/CTGAN)
[](https://pepy.tech/project/ctgan)
[](https://codecov.io/gh/sdv-dev/CTGAN)
# CTGAN
Implementation of our NeurIPS paper [Modeling Tabular data using Conditional GAN](https://arxiv.org/abs/1907.00503).
CTGAN is a GAN-based data synthesizer that can generate synthetic tabular data with high fidelity.
* License: [MIT](https://github.com/sdv-dev/CTGAN/blob/master/LICENSE)
* Development Status: [Pre-Alpha](https://pypi.org/search/?c=Development+Status+%3A%3A+2+-+Pre-Alpha)
* Documentation: https://sdv-dev.github.io/CTGAN
* Homepage: https://github.com/sdv-dev/CTGAN
## Overview
Based on previous work ([TGAN](https://github.com/sdv-dev/TGAN)) on synthetic data generation,
we develop a new model called CTGAN. Several major differences make CTGAN outperform TGAN.
- **Preprocessing**: CTGAN uses more sophisticated Variational Gaussian Mixture Model to detect
modes of continuous columns.
- **Network structure**: TGAN uses LSTM to generate synthetic data column by column. CTGAN uses
Fully-connected networks which is more efficient.
- **Features to prevent mode collapse**: We design a conditional generator and resample the
training data to prevent model collapse on discrete columns. We use WGANGP and PacGAN to
stabilize the training of GAN.
# Install
## Requirements
**CTGAN** has been developed and tested on [Python 3.5, 3.6 and 3.7](https://www.python.org/downloads/)
## Install from PyPI
The recommended way to installing **CTGAN** is using [pip](https://pip.pypa.io/en/stable/):
```bash
pip install ctgan
```
This will pull and install the latest stable release from [PyPI](https://pypi.org/).
If you want to install from source or contribute to the project please read the
[Contributing Guide](https://sdv-dev.github.io/CTGAN/contributing.html#get-started).
# Data Format
**CTGAN** expects the input data to be a table given as either a `numpy.ndarray` or a
`pandas.DataFrame` object with two types of columns:
* **Continuous Columns**: Columns that contain numerical values and which can take any value.
* **Discrete columns**: Columns that only contain a finite number of possible values, wether
these are string values or not.
This is an example of a table with 4 columns:
* A continuous column with float values
* A continuous column with integer values
* A discrete column with string values
* A discrete column with integer values
| | A | B | C | D |
|---|------|-----|-----|---|
| 0 | 0.1 | 100 | 'a' | 1 |
| 1 | -1.3 | 28 | 'b' | 2 |
| 2 | 0.3 | 14 | 'a' | 2 |
| 3 | 1.4 | 87 | 'a' | 3 |
| 4 | -0.1 | 69 | 'b' | 2 |
**NOTE**: CTGAN does not distinguish between float and integer columns, which means that it will
sample float values in all cases. If integer values are required, the outputted float values
must be rounded to integers in a later step, outside of CTGAN.
# Python Quickstart
In this short tutorial we will guide you through a series of steps that will help you
getting started with **CTGAN**.
## 1. Model the data
### Step 1: Prepare your data
Before being able to use CTGAN you will need to prepare your data as specified above.
For this example, we will be loading some data using the `ctgan.load_demo` function.
```python
from ctgan import load_demo
data = load_demo()
```
This will download a copy of the [Adult Census Dataset](https://archive.ics.uci.edu/ml/datasets/adult) as a dataframe:
| age | workclass | fnlwgt | ... | hours-per-week | native-country | income |
|-------|------------------|----------|-----|------------------|------------------|----------|
| 39 | State-gov | 77516 | ... | 40 | United-States | <=50K |
| 50 | Self-emp-not-inc | 83311 | ... | 13 | United-States | <=50K |
| 38 | Private | 215646 | ... | 40 | United-States | <=50K |
| 53 | Private | 234721 | ... | 40 | United-States | <=50K |
| 28 | Private | 338409 | ... | 40 | Cuba | <=50K |
| ... | ... | ... | ... | ... | ... | ... |
Aside from the table itself, you will need to create a list with the names of the discrete
variables.
For this example:
```python
discrete_columns = [
'workclass',
'education',
'marital-status',
'occupation',
'relationship',
'race',
'sex',
'native-country',
'income'
]
```
### Step 2: Fit CTGAN to your data
Once you have the data ready, you need to import and create an instance of the `CTGANSynthesizer`
class and fit it passing your data and the list of discrete columns.
```python
from ctgan import CTGANSynthesizer
ctgan = CTGANSynthesizer()
ctgan.fit(data, discrete_columns)
```
This process is likely to take a long time to run.
If you want to make the process shorter, or longer, you can control the number of training epochs
that the model will be performing by adding it to the `fit` call:
```python
ctgan.fit(data, discrete_columns, epochs=5)
```
## 2. Generate synthetic data
Once the process has finished, all you need to do is call the `sample` method of your
`CTGANSynthesizer` instance indicating the number of rows that you want to generate.
```python
samples = ctgan.sample(1000)
```
The output will be a table with the exact same format as the input and filled with the synthetic
data generated by the model.
| age | workclass | fnlwgt | ... | hours-per-week | native-country | income |
|---------|--------------|-----------|-----|------------------|------------------|----------|
| 26.3191 | Private | 124079 | ... | 40.1557 | United-States | <=50K |
| 39.8558 | Private | 133996 | ... | 40.2507 | United-States | <=50K |
| 38.2477 | Self-emp-inc | 135955 | ... | 40.1124 | Ecuador | <=50K |
| 29.6468 | Private | 3331.86 | ... | 27.012 | United-States | <=50K |
| 20.9853 | Private | 120637 | ... | 40.0238 | United-States | <=50K |
| ... | ... | ... | ... | ... | ... | ... |
# Join our community
1. If you would like to try more dataset examples, please have a look at the [examples folder](
https://github.com/sdv-dev/CTGAN/tree/master/examples) of the repository. Please contact us
if you have a usage example that you would want to share with the community.
2. If you want to contribute to the project code, please head to the [Contributing Guide](
https://sdv-dev.github.io/CTGAN/contributing.html#get-started) for more details about how to do it.
3. If you have any doubts, feature requests or detect an error, please [open an issue on github](
https://github.com/sdv-dev/CTGAN/issues)
4. Also do not forget to check the [project documentation site](https://sdv-dev.github.io/CTGAN/)!
# Citing TGAN
If you use CTGAN, please cite the following work:
- *Lei Xu, Maria Skoularidou, Alfredo Cuesta-Infante, Kalyan Veeramachaneni.* **Modeling Tabular data using Conditional GAN**. NeurIPS, 2019.
```LaTeX
@inproceedings{xu2019modeling,
title={Modeling Tabular data using Conditional GAN},
author={Xu, Lei and Skoularidou, Maria and Cuesta-Infante, Alfredo and Veeramachaneni, Kalyan},
booktitle={Advances in Neural Information Processing Systems},
year={2019}
}
```
# Related Projects
Please note that these libraries are external contributions and are not maintained nor supervised by
the MIT DAI-Lab team.
## R interface for CTGAN
A wrapper around **CTGAN** has been implemented by Kevin Kuo @kevinykuo, bringing the functionalities
of **CTGAN** to **R** users.
More details can be found in the corresponding repository: https://github.com/kasaai/ctgan
## CTGAN Server CLI
A package to easily deploy **CTGAN** onto a remote server. This package is developed by Timothy Pillow @oregonpillow.
More details can be found in the corresponding repository: https://github.com/oregonpillow/ctgan-server-cli
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/__init__.py
================================================
# -*- coding: utf-8 -*-
"""Top-level package for ctgan."""
__author__ = "MIT Data To AI Lab"
__email__ = "dailabmit@gmail.com"
__version__ = "0.2.1"
from ctgan.demo import load_demo
from ctgan.synthesizer import CTGANSynthesizer
__all__ = ("CTGANSynthesizer", "load_demo")
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/__main__.py
================================================
import argparse
from ctgan.data import read_csv, read_tsv, write_tsv
from ctgan.synthesizer import CTGANSynthesizer
def _parse_args():
parser = argparse.ArgumentParser(description="CTGAN Command Line Interface")
parser.add_argument(
"-e", "--epochs", default=300, type=int, help="Number of training epochs"
)
parser.add_argument(
"-t",
"--tsv",
action="store_true",
help="Load data in TSV format instead of CSV",
)
parser.add_argument(
"--no-header",
dest="header",
action="store_false",
help="The CSV file has no header. Discrete columns will be indices.",
)
parser.add_argument("-m", "--metadata", help="Path to the metadata")
parser.add_argument(
"-d",
"--discrete",
help="Comma separated list of discrete columns, no whitespaces",
)
parser.add_argument(
"-n",
"--num-samples",
type=int,
help="Number of rows to sample. Defaults to the training data size",
)
parser.add_argument("data", help="Path to training data")
parser.add_argument("output", help="Path of the output file")
return parser.parse_args()
def main():
args = _parse_args()
if args.tsv:
data, discrete_columns = read_tsv(args.data, args.metadata)
else:
data, discrete_columns = read_csv(
args.data, args.metadata, args.header, args.discrete
)
model = CTGANSynthesizer()
model.fit(data, discrete_columns, args.epochs)
num_samples = args.num_samples or len(data)
sampled = model.sample(num_samples)
if args.tsv:
write_tsv(sampled, args.metadata, args.output)
else:
sampled.to_csv(args.output, index=False)
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/conditional.py
================================================
import numpy as np
class ConditionalGenerator(object):
def __init__(self, data, output_info, log_frequency):
self.model = []
start = 0
skip = False
max_interval = 0
counter = 0
for item in output_info:
if item[1] == "tanh":
start += item[0]
skip = True
continue
elif item[1] == "softmax":
if skip:
skip = False
start += item[0]
continue
end = start + item[0]
max_interval = max(max_interval, end - start)
counter += 1
self.model.append(np.argmax(data[:, start:end], axis=-1))
start = end
else:
assert 0
assert start == data.shape[1]
self.interval = []
self.n_col = 0
self.n_opt = 0
skip = False
start = 0
self.p = np.zeros((counter, max_interval))
for item in output_info:
if item[1] == "tanh":
skip = True
start += item[0]
continue
elif item[1] == "softmax":
if skip:
start += item[0]
skip = False
continue
end = start + item[0]
tmp = np.sum(data[:, start:end], axis=0)
if log_frequency:
tmp = np.log(tmp + 1)
tmp = tmp / np.sum(tmp)
self.p[self.n_col, : item[0]] = tmp
self.interval.append((self.n_opt, item[0]))
self.n_opt += item[0]
self.n_col += 1
start = end
else:
assert 0
self.interval = np.asarray(self.interval)
def random_choice_prob_index(self, idx):
a = self.p[idx]
r = np.expand_dims(np.random.rand(a.shape[0]), axis=1)
return (a.cumsum(axis=1) > r).argmax(axis=1)
def sample(self, batch):
if self.n_col == 0:
return None
batch = batch
idx = np.random.choice(np.arange(self.n_col), batch)
vec1 = np.zeros((batch, self.n_opt), dtype="float32")
mask1 = np.zeros((batch, self.n_col), dtype="float32")
mask1[np.arange(batch), idx] = 1
opt1prime = self.random_choice_prob_index(idx)
opt1 = self.interval[idx, 0] + opt1prime
vec1[np.arange(batch), opt1] = 1
return vec1, mask1, idx, opt1prime
def sample_zero(self, batch):
if self.n_col == 0:
return None
vec = np.zeros((batch, self.n_opt), dtype="float32")
idx = np.random.choice(np.arange(self.n_col), batch)
for i in range(batch):
col = idx[i]
pick = int(np.random.choice(self.model[col]))
vec[i, pick + self.interval[col, 0]] = 1
return vec
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/data.py
================================================
import json
import numpy as np
import pandas as pd
def read_csv(csv_filename, meta_filename=None, header=True, discrete=None):
data = pd.read_csv(csv_filename, header="infer" if header else None)
if meta_filename:
with open(meta_filename) as meta_file:
metadata = json.load(meta_file)
discrete_columns = [
column["name"]
for column in metadata["columns"]
if column["type"] != "continuous"
]
elif discrete:
discrete_columns = discrete.split(",")
if not header:
discrete_columns = [int(i) for i in discrete_columns]
else:
discrete_columns = []
return data, discrete_columns
def read_tsv(data_filename, meta_filename):
with open(meta_filename) as f:
column_info = f.readlines()
column_info_raw = [
x.replace("{", " ").replace("}", " ").split() for x in column_info
]
discrete = []
continuous = []
column_info = []
for idx, item in enumerate(column_info_raw):
if item[0] == "C":
continuous.append(idx)
column_info.append((float(item[1]), float(item[2])))
else:
assert item[0] == "D"
discrete.append(idx)
column_info.append(item[1:])
meta = {
"continuous_columns": continuous,
"discrete_columns": discrete,
"column_info": column_info,
}
with open(data_filename) as f:
lines = f.readlines()
data = []
for row in lines:
row_raw = row.split()
row = []
for idx, col in enumerate(row_raw):
if idx in continuous:
row.append(col)
else:
assert idx in discrete
row.append(column_info[idx].index(col))
data.append(row)
return np.asarray(data, dtype="float32"), meta["discrete_columns"]
def write_tsv(data, meta, output_filename):
with open(output_filename, "w") as f:
for row in data:
for idx, col in enumerate(row):
if idx in meta["continuous_columns"]:
print(col, end=" ", file=f)
else:
assert idx in meta["discrete_columns"]
print(meta["column_info"][idx][int(col)], end=" ", file=f)
print(file=f)
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/demo.py
================================================
import pandas as pd
DEMO_URL = "http://ctgan-data.s3.amazonaws.com/census.csv.gz"
def load_demo():
return pd.read_csv(DEMO_URL, compression="gzip")
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/models.py
================================================
import torch
from torch.nn import BatchNorm1d, Dropout, LeakyReLU, Linear, Module, ReLU, Sequential
class Discriminator(Module):
def calc_gradient_penalty(
self, real_data, fake_data, device="cpu", pac=10, lambda_=10
):
alpha = torch.rand(real_data.size(0) // pac, 1, 1, device=device)
alpha = alpha.repeat(1, pac, real_data.size(1))
alpha = alpha.view(-1, real_data.size(1))
interpolates = alpha * real_data + ((1 - alpha) * fake_data)
disc_interpolates = self(interpolates)
gradients = torch.autograd.grad(
outputs=disc_interpolates,
inputs=interpolates,
grad_outputs=torch.ones(disc_interpolates.size(), device=device),
create_graph=True,
retain_graph=True,
only_inputs=True,
)[0]
gradient_penalty = (
(gradients.view(-1, pac * real_data.size(1)).norm(2, dim=1) - 1) ** 2
).mean() * lambda_
return gradient_penalty
def __init__(self, input_dim, dis_dims, pack=10):
super(Discriminator, self).__init__()
dim = input_dim * pack
self.pack = pack
self.packdim = dim
seq = []
for item in list(dis_dims):
seq += [Linear(dim, item), LeakyReLU(0.2), Dropout(0.5)]
dim = item
seq += [Linear(dim, 1)]
self.seq = Sequential(*seq)
def forward(self, input):
assert input.size()[0] % self.pack == 0
return self.seq(input.view(-1, self.packdim))
class Residual(Module):
def __init__(self, i, o):
super(Residual, self).__init__()
self.fc = Linear(i, o)
self.bn = BatchNorm1d(o)
self.relu = ReLU()
def forward(self, input):
out = self.fc(input)
out = self.bn(out)
out = self.relu(out)
return torch.cat([out, input], dim=1)
class Generator(Module):
def __init__(self, embedding_dim, gen_dims, data_dim):
super(Generator, self).__init__()
dim = embedding_dim
seq = []
for item in list(gen_dims):
seq += [Residual(dim, item)]
dim += item
seq.append(Linear(dim, data_dim))
self.seq = Sequential(*seq)
def forward(self, input):
data = self.seq(input)
return data
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/sampler.py
================================================
import numpy as np
class Sampler(object):
"""docstring for Sampler."""
def __init__(self, data, output_info):
super(Sampler, self).__init__()
self.data = data
self.model = []
self.n = len(data)
st = 0
skip = False
for item in output_info:
if item[1] == "tanh":
st += item[0]
skip = True
elif item[1] == "softmax":
if skip:
skip = False
st += item[0]
continue
ed = st + item[0]
tmp = []
for j in range(item[0]):
tmp.append(np.nonzero(data[:, st + j])[0])
self.model.append(tmp)
st = ed
else:
assert 0
assert st == data.shape[1]
def sample(self, n, col, opt):
if col is None:
idx = np.random.choice(np.arange(self.n), n)
return self.data[idx]
idx = []
for c, o in zip(col, opt):
idx.append(np.random.choice(self.model[c][o]))
return self.data[idx]
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/synthesizer.py
================================================
import numpy as np
import torch
from ctgan.conditional import ConditionalGenerator
from ctgan.models import Discriminator, Generator
from ctgan.sampler import Sampler
from ctgan.transformer import DataTransformer
from torch import optim
from torch.nn import functional
class EarlyStopping:
"""Early stops the training if validation loss doesn't improve after a given patience."""
def __init__(self, patience=7, verbose=False, delta=0):
"""
Args:
patience (int): How long to wait after last time validation loss improved.
Default: 7
verbose (bool): If True, prints a message for each validation loss improvement.
Default: False
delta (float): Minimum change in the monitored quantity to qualify as an improvement.
Default: 0
"""
self.patience = patience
self.verbose = verbose
self.counter = 0
self.best_score = None
self.early_stop = False
self.val_loss_min = np.Inf
self.delta = delta
def __call__(self, val_loss):
score = -val_loss
if self.best_score is None:
self.best_score = score
elif score < self.best_score + self.delta:
self.counter += 1
# print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
if self.counter >= self.patience:
self.early_stop = True
else:
self.best_score = score
self.counter = 0
class CTGANSynthesizer(object):
"""Conditional Table GAN Synthesizer.
This is the core class of the CTGAN project, where the different components
are orchestrated together.
For more details about the process, please check the [Modeling Tabular data using
Conditional GAN](https://arxiv.org/abs/1907.00503) paper.
Args:
embedding_dim (int):
Size of the random sample passed to the Generator. Defaults to 128.
gen_dim (tuple or list of ints):
Size of the output samples for each one of the Residuals. A Resiudal Layer
will be created for each one of the values provided. Defaults to (256, 256).
dis_dim (tuple or list of ints):
Size of the output samples for each one of the Discriminator Layers. A Linear Layer
will be created for each one of the values provided. Defaults to (256, 256).
l2scale (float):
Wheight Decay for the Adam Optimizer. Defaults to 1e-6.
batch_size (int):
Number of data samples to process in each step.
"""
def __init__(
self,
embedding_dim=128,
gen_dim=(256, 256),
dis_dim=(256, 256),
l2scale=1e-6,
batch_size=500,
patience=25,
):
self.embedding_dim = embedding_dim
self.gen_dim = gen_dim
self.dis_dim = dis_dim
self.patience = patience
self.l2scale = l2scale
self.batch_size = batch_size
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def _apply_activate(self, data):
data_t = []
st = 0
for item in self.transformer.output_info:
if item[1] == "tanh":
ed = st + item[0]
data_t.append(torch.tanh(data[:, st:ed]))
st = ed
elif item[1] == "softmax":
ed = st + item[0]
data_t.append(functional.gumbel_softmax(data[:, st:ed], tau=0.2))
st = ed
else:
assert 0
return torch.cat(data_t, dim=1)
def _cond_loss(self, data, c, m):
loss = []
st = 0
st_c = 0
skip = False
for item in self.transformer.output_info:
if item[1] == "tanh":
st += item[0]
skip = True
elif item[1] == "softmax":
if skip:
skip = False
st += item[0]
continue
ed = st + item[0]
ed_c = st_c + item[0]
tmp = functional.cross_entropy(
data[:, st:ed],
torch.argmax(c[:, st_c:ed_c], dim=1),
reduction="none",
)
loss.append(tmp)
st = ed
st_c = ed_c
else:
assert 0
loss = torch.stack(loss, dim=1)
return (loss * m).sum() / data.size()[0]
def fit(self, train_data, discrete_columns=tuple(), epochs=300, log_frequency=True):
"""Fit the CTGAN Synthesizer models to the training data.
Args:
train_data (numpy.ndarray or pandas.DataFrame):
Training Data. It must be a 2-dimensional numpy array or a
pandas.DataFrame.
discrete_columns (list-like):
List of discrete columns to be used to generate the Conditional
Vector. If ``train_data`` is a Numpy array, this list should
contain the integer indices of the columns. Otherwise, if it is
a ``pandas.DataFrame``, this list should contain the column names.
epochs (int):
Number of training epochs. Defaults to 300.
log_frequency (boolean):
Whether to use log frequency of categorical levels in conditional
sampling. Defaults to ``True``.
"""
self.transformer = DataTransformer()
self.transformer.fit(train_data, discrete_columns)
train_data = self.transformer.transform(train_data)
data_sampler = Sampler(train_data, self.transformer.output_info)
data_dim = self.transformer.output_dimensions
self.cond_generator = ConditionalGenerator(
train_data, self.transformer.output_info, log_frequency
)
self.generator = Generator(
self.embedding_dim + self.cond_generator.n_opt, self.gen_dim, data_dim
).to(self.device)
discriminator = Discriminator(
data_dim + self.cond_generator.n_opt, self.dis_dim
).to(self.device)
optimizerG = optim.Adam(
self.generator.parameters(),
lr=2e-4,
betas=(0.5, 0.9),
weight_decay=self.l2scale,
)
optimizerD = optim.Adam(discriminator.parameters(), lr=2e-4, betas=(0.5, 0.9))
assert self.batch_size % 2 == 0
mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device)
std = mean + 1
train_losses = []
early_stopping = EarlyStopping(patience=self.patience, verbose=False)
steps_per_epoch = max(len(train_data) // self.batch_size, 1)
for i in range(epochs):
for id_ in range(steps_per_epoch):
fakez = torch.normal(mean=mean, std=std)
condvec = self.cond_generator.sample(self.batch_size)
if condvec is None:
c1, m1, col, opt = None, None, None, None
real = data_sampler.sample(self.batch_size, col, opt)
else:
c1, m1, col, opt = condvec
c1 = torch.from_numpy(c1).to(self.device)
m1 = torch.from_numpy(m1).to(self.device)
fakez = torch.cat([fakez, c1], dim=1)
perm = np.arange(self.batch_size)
np.random.shuffle(perm)
real = data_sampler.sample(self.batch_size, col[perm], opt[perm])
c2 = c1[perm]
fake = self.generator(fakez)
fakeact = self._apply_activate(fake)
real = torch.from_numpy(real.astype("float32")).to(self.device)
if c1 is not None:
fake_cat = torch.cat([fakeact, c1], dim=1)
real_cat = torch.cat([real, c2], dim=1)
else:
real_cat = real
fake_cat = fake
y_fake = discriminator(fake_cat)
y_real = discriminator(real_cat)
pen = discriminator.calc_gradient_penalty(
real_cat, fake_cat, self.device
)
loss_d = -(torch.mean(y_real) - torch.mean(y_fake))
train_losses.append(loss_d.item())
optimizerD.zero_grad()
pen.backward(retain_graph=True)
loss_d.backward()
optimizerD.step()
fakez = torch.normal(mean=mean, std=std)
condvec = self.cond_generator.sample(self.batch_size)
if condvec is None:
c1, m1, col, opt = None, None, None, None
else:
c1, m1, col, opt = condvec
c1 = torch.from_numpy(c1).to(self.device)
m1 = torch.from_numpy(m1).to(self.device)
fakez = torch.cat([fakez, c1], dim=1)
fake = self.generator(fakez)
fakeact = self._apply_activate(fake)
if c1 is not None:
y_fake = discriminator(torch.cat([fakeact, c1], dim=1))
else:
y_fake = discriminator(fakeact)
if condvec is None:
cross_entropy = 0
else:
cross_entropy = self._cond_loss(fake, c1, m1)
loss_g = -torch.mean(y_fake) + cross_entropy
train_losses.append(loss_g.item())
optimizerG.zero_grad()
loss_g.backward()
optimizerG.step()
early_stopping(np.average(train_losses))
if early_stopping.early_stop:
print("GAN: Early stopping after epochs {}".format(i))
break
train_losses = []
# print("Epoch %d, Loss G: %.4f, Loss D: %.4f" %
# (i + 1, loss_g.detach().cpu(), loss_d.detach().cpu()),
# flush=True)
def sample(self, n):
"""Sample data similar to the training data.
Args:
n (int):
Number of rows to sample.
Returns:
numpy.ndarray or pandas.DataFrame
"""
steps = n // self.batch_size + 1
data = []
for i in range(steps):
mean = torch.zeros(self.batch_size, self.embedding_dim)
std = mean + 1
fakez = torch.normal(mean=mean, std=std).to(self.device)
condvec = self.cond_generator.sample_zero(self.batch_size)
if condvec is None:
pass
else:
c1 = condvec
c1 = torch.from_numpy(c1).to(self.device)
fakez = torch.cat([fakez, c1], dim=1)
fake = self.generator(fakez)
fakeact = self._apply_activate(fake)
data.append(fakeact.detach().cpu().numpy())
data = np.concatenate(data, axis=0)
data = data[:n]
return self.transformer.inverse_transform(data, None)
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/transformer.py
================================================
import numpy as np
import pandas as pd
from sklearn.exceptions import ConvergenceWarning
from sklearn.mixture import BayesianGaussianMixture
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils._testing import ignore_warnings
class DataTransformer(object):
"""Data Transformer.
Model continuous columns with a BayesianGMM and normalized to a scalar
[0, 1] and a vector.
Discrete columns are encoded using a scikit-learn OneHotEncoder.
Args:
n_cluster (int):
Number of modes.
epsilon (float):
Epsilon value.
"""
def __init__(self, n_clusters=10, epsilon=0.005):
self.n_clusters = n_clusters
self.epsilon = epsilon
@ignore_warnings(category=ConvergenceWarning)
def _fit_continuous(self, column, data):
gm = BayesianGaussianMixture(
self.n_clusters,
weight_concentration_prior_type="dirichlet_process",
weight_concentration_prior=0.001,
n_init=1,
)
gm.fit(data)
components = gm.weights_ > self.epsilon
num_components = components.sum()
return {
"name": column,
"model": gm,
"components": components,
"output_info": [(1, "tanh"), (num_components, "softmax")],
"output_dimensions": 1 + num_components,
}
def _fit_discrete(self, column, data):
ohe = OneHotEncoder(sparse=False)
ohe.fit(data)
categories = len(ohe.categories_[0])
return {
"name": column,
"encoder": ohe,
"output_info": [(categories, "softmax")],
"output_dimensions": categories,
}
def fit(self, data, discrete_columns=tuple()):
self.output_info = []
self.output_dimensions = 0
if not isinstance(data, pd.DataFrame):
self.dataframe = False
data = pd.DataFrame(data)
else:
self.dataframe = True
self.meta = []
for column in data.columns:
column_data = data[[column]].values
if column in discrete_columns:
meta = self._fit_discrete(column, column_data)
else:
meta = self._fit_continuous(column, column_data)
self.output_info += meta["output_info"]
self.output_dimensions += meta["output_dimensions"]
self.meta.append(meta)
def _transform_continuous(self, column_meta, data):
components = column_meta["components"]
model = column_meta["model"]
means = model.means_.reshape((1, self.n_clusters))
stds = np.sqrt(model.covariances_).reshape((1, self.n_clusters))
features = (data - means) / (4 * stds)
probs = model.predict_proba(data)
n_opts = components.sum()
features = features[:, components]
probs = probs[:, components]
opt_sel = np.zeros(len(data), dtype="int")
for i in range(len(data)):
pp = probs[i] + 1e-6
pp = pp / pp.sum()
opt_sel[i] = np.random.choice(np.arange(n_opts), p=pp)
idx = np.arange((len(features)))
features = features[idx, opt_sel].reshape([-1, 1])
features = np.clip(features, -0.99, 0.99)
probs_onehot = np.zeros_like(probs)
probs_onehot[np.arange(len(probs)), opt_sel] = 1
return [features, probs_onehot]
def _transform_discrete(self, column_meta, data):
encoder = column_meta["encoder"]
return encoder.transform(data)
def transform(self, data):
if not isinstance(data, pd.DataFrame):
data = pd.DataFrame(data)
values = []
for meta in self.meta:
column_data = data[[meta["name"]]].values
if "model" in meta:
values += self._transform_continuous(meta, column_data)
else:
values.append(self._transform_discrete(meta, column_data))
return np.concatenate(values, axis=1).astype(float)
def _inverse_transform_continuous(self, meta, data, sigma):
model = meta["model"]
components = meta["components"]
u = data[:, 0]
v = data[:, 1:]
if sigma is not None:
u = np.random.normal(u, sigma)
u = np.clip(u, -1, 1)
v_t = np.ones((len(data), self.n_clusters)) * -100
v_t[:, components] = v
v = v_t
means = model.means_.reshape([-1])
stds = np.sqrt(model.covariances_).reshape([-1])
p_argmax = np.argmax(v, axis=1)
std_t = stds[p_argmax]
mean_t = means[p_argmax]
column = u * 4 * std_t + mean_t
return column
def _inverse_transform_discrete(self, meta, data):
encoder = meta["encoder"]
return encoder.inverse_transform(data)
def inverse_transform(self, data, sigmas):
start = 0
output = []
column_names = []
for meta in self.meta:
dimensions = meta["output_dimensions"]
columns_data = data[:, start : start + dimensions]
if "model" in meta:
sigma = sigmas[start] if sigmas else None
inverted = self._inverse_transform_continuous(meta, columns_data, sigma)
else:
inverted = self._inverse_transform_discrete(meta, columns_data)
output.append(inverted)
column_names.append(meta["name"])
start += dimensions
output = np.column_stack(output)
if self.dataframe:
output = pd.DataFrame(output, columns=column_names)
return output
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/encoders.py
================================================
from typing import List
import numpy as np
import pandas as pd
from category_encoders.backward_difference import BackwardDifferenceEncoder
from category_encoders.cat_boost import CatBoostEncoder
from category_encoders.helmert import HelmertEncoder
from category_encoders.james_stein import JamesSteinEncoder
from category_encoders.leave_one_out import LeaveOneOutEncoder
from category_encoders.m_estimate import MEstimateEncoder
from category_encoders.one_hot import OneHotEncoder
from category_encoders.ordinal import OrdinalEncoder
from category_encoders.sum_coding import SumEncoder
from category_encoders.target_encoder import TargetEncoder
from category_encoders.woe import WOEEncoder
from sklearn.model_selection import RepeatedStratifiedKFold
def get_single_encoder(encoder_name: str, cat_cols: list):
"""
Get encoder by its name
:param encoder_name: Name of desired encoder
:param cat_cols: Cat columns for encoding
:return: Categorical encoder
"""
if encoder_name == "FrequencyEncoder":
encoder = FrequencyEncoder(cols=cat_cols)
if encoder_name == "WOEEncoder":
encoder = WOEEncoder(cols=cat_cols)
if encoder_name == "TargetEncoder":
encoder = TargetEncoder(cols=cat_cols)
if encoder_name == "SumEncoder":
encoder = SumEncoder(cols=cat_cols)
if encoder_name == "MEstimateEncoder":
encoder = MEstimateEncoder(cols=cat_cols)
if encoder_name == "LeaveOneOutEncoder":
encoder = LeaveOneOutEncoder(cols=cat_cols)
if encoder_name == "HelmertEncoder":
encoder = HelmertEncoder(cols=cat_cols)
if encoder_name == "BackwardDifferenceEncoder":
encoder = BackwardDifferenceEncoder(cols=cat_cols)
if encoder_name == "JamesSteinEncoder":
encoder = JamesSteinEncoder(cols=cat_cols)
if encoder_name == "OrdinalEncoder":
encoder = OrdinalEncoder(cols=cat_cols)
if encoder_name == "CatBoostEncoder":
encoder = CatBoostEncoder(cols=cat_cols)
if encoder_name == "MEstimateEncoder":
encoder = MEstimateEncoder(cols=cat_cols)
if encoder_name == "OneHotEncoder":
encoder = OneHotEncoder(cols=cat_cols)
if encoder is None:
raise NotImplementedError("To be implemented")
return encoder
class DoubleValidationEncoderNumerical:
"""
Encoder with validation within
"""
def __init__(self, cols, encoders_names_tuple=()):
"""
:param cols: Categorical columns
:param encoders_names_tuple: Tuple of str with encoders
"""
self.cols, self.num_cols = cols, None
self.encoders_names_tuple = encoders_names_tuple
self.n_folds, self.n_repeats = 5, 3
self.model_validation = RepeatedStratifiedKFold(
n_splits=self.n_folds, n_repeats=self.n_repeats, random_state=0
)
self.encoders_dict = {}
self.storage = None
def fit_transform(self, X: pd.DataFrame, y: np.array) -> pd.DataFrame:
self.num_cols = [col for col in X.columns if col not in self.cols]
self.storage = []
for encoder_name in self.encoders_names_tuple:
for n_fold, (train_idx, val_idx) in enumerate(
self.model_validation.split(X, y)
):
encoder = get_single_encoder(encoder_name, self.cols)
X_train, X_val = (
X.loc[train_idx].reset_index(drop=True),
X.loc[val_idx].reset_index(drop=True),
)
y_train, y_val = y[train_idx], y[val_idx]
_ = encoder.fit_transform(X_train, y_train)
# transform validation part and get all necessary cols
val_t = encoder.transform(X_val)
val_t = val_t[
[col for col in val_t.columns if col not in self.num_cols]
].values
if encoder_name not in self.encoders_dict.keys():
cols_representation = np.zeros((X.shape[0], val_t.shape[1]))
self.encoders_dict[encoder_name] = [encoder]
else:
self.encoders_dict[encoder_name].append(encoder)
cols_representation[val_idx, :] += val_t / self.n_repeats
cols_representation = pd.DataFrame(cols_representation)
cols_representation.columns = [
f"encoded_{encoder_name}_{i}"
for i in range(cols_representation.shape[1])
]
self.storage.append(cols_representation)
for df in self.storage:
X = pd.concat([X, df], axis=1)
X.drop(self.cols, axis=1, inplace=True)
return X
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
self.storage = []
for encoder_name in self.encoders_names_tuple:
cols_representation = None
for encoder in self.encoders_dict[encoder_name]:
test_tr = encoder.transform(X)
test_tr = test_tr[
[col for col in test_tr.columns if col not in self.num_cols]
].values
if cols_representation is None:
cols_representation = np.zeros(test_tr.shape)
cols_representation = (
cols_representation + test_tr / self.n_folds / self.n_repeats
)
cols_representation = pd.DataFrame(cols_representation)
cols_representation.columns = [
f"encoded_{encoder_name}_{i}"
for i in range(cols_representation.shape[1])
]
self.storage.append(cols_representation)
for df in self.storage:
X = pd.concat([X, df], axis=1)
X.drop(self.cols, axis=1, inplace=True)
return X
class MultipleEncoder:
"""
Multiple encoder for categorical columns
"""
def __init__(self, cols: List[str], encoders_names_tuple=()):
"""
:param cols: List of categorical columns
:param encoders_names_tuple: Tuple of categorical encoders names. Possible values in tuple are:
"FrequencyEncoder", "WOEEncoder", "TargetEncoder", "SumEncoder", "MEstimateEncoder", "LeaveOneOutEncoder",
"HelmertEncoder", "BackwardDifferenceEncoder", "JamesSteinEncoder", "OrdinalEncoder""CatBoostEncoder"
"""
self.cols = cols
self.num_cols = None
self.encoders_names_tuple = encoders_names_tuple
self.encoders_dict = {}
# list for storing results of transformation from each encoder
self.storage = None
def fit_transform(self, X: pd.DataFrame, y: np.array) -> pd.DataFrame:
self.num_cols = [col for col in X.columns if col not in self.cols]
self.storage = []
for encoder_name in self.encoders_names_tuple:
encoder = get_single_encoder(encoder_name=encoder_name, cat_cols=self.cols)
cols_representation = encoder.fit_transform(X, y)
self.encoders_dict[encoder_name] = encoder
cols_representation = cols_representation[
[col for col in cols_representation.columns if col not in self.num_cols]
].values
cols_representation = pd.DataFrame(cols_representation)
cols_representation.columns = [
f"encoded_{encoder_name}_{i}"
for i in range(cols_representation.shape[1])
]
self.storage.append(cols_representation)
# concat cat cols representations with initial dataframe
for df in self.storage:
X = pd.concat([X, df], axis=1)
# remove all columns as far as we have their representations
X.drop(self.cols, axis=1, inplace=True)
return X
def transform(self, X) -> pd.DataFrame:
self.storage = []
for encoder_name in self.encoders_names_tuple:
# get representation of cat columns and form a pd.DataFrame for it
cols_representation = self.encoders_dict[encoder_name].transform(X)
cols_representation = cols_representation[
[col for col in cols_representation.columns if col not in self.num_cols]
].values
cols_representation = pd.DataFrame(cols_representation)
cols_representation.columns = [
f"encoded_{encoder_name}_{i}"
for i in range(cols_representation.shape[1])
]
self.storage.append(cols_representation)
# concat cat cols representations with initial dataframe
for df in self.storage:
X = pd.concat([X, df], axis=1)
# remove all columns as far as we have their representations
X.drop(self.cols, axis=1, inplace=True)
return X
class FrequencyEncoder:
def __init__(self, cols):
self.cols = cols
self.counts_dict = None
def fit(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
counts_dict = {}
for col in self.cols:
values, counts = np.unique(X[col], return_counts=True)
counts_dict[col] = dict(zip(values, counts))
self.counts_dict = counts_dict
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
counts_dict_test = {}
res = []
for col in self.cols:
values, counts = np.unique(X[col], return_counts=True)
counts_dict_test[col] = dict(zip(values, counts))
# if value is in "train" keys - replace "test" counts with "train" counts
for k in [
key
for key in counts_dict_test[col].keys()
if key in self.counts_dict[col].keys()
]:
counts_dict_test[col][k] = self.counts_dict[col][k]
res.append(X[col].map(counts_dict_test[col]).values.reshape(-1, 1))
res = np.hstack(res)
X[self.cols] = res
return X
def fit_transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
self.fit(X, y)
X = self.transform(X)
return X
if __name__ == "__main__":
df = pd.DataFrame({})
df["cat_col"] = [1, 2, 3, 1, 2, 3, 1, 1, 1]
df["target"] = [0, 1, 0, 1, 0, 1, 0, 1, 0]
#
temp = df.copy()
enc = CatBoostEncoder(cols=["cat_col"])
print(enc.fit_transform(temp, temp["target"]))
#
temp = df.copy()
enc = MultipleEncoder(cols=["cat_col"], encoders_names_tuple=("CatBoostEncoder",))
print(enc.fit_transform(temp, temp["target"]))
#
temp = df.copy()
enc = DoubleValidationEncoderNumerical(
cols=["cat_col"], encoders_names_tuple=("CatBoostEncoder",)
)
print(enc.fit_transform(temp, temp["target"]))
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/model.py
================================================
import numpy as np
import pandas as pd
from lightgbm import LGBMClassifier
from scipy.stats import rankdata
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from encoders import MultipleEncoder, DoubleValidationEncoderNumerical
class Model:
def __init__(
self,
cat_validation="None",
encoders_names=None,
cat_cols=None,
model_validation=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
model_params=None,
):
self.cat_validation = cat_validation
self.encoders_names = encoders_names
self.cat_cols = cat_cols
self.model_validation = model_validation
if model_params is None:
self.model_params = {
"metrics": "AUC",
"n_estimators": 5000,
"learning_rate": 0.04,
"random_state": 42,
}
else:
self.model_params = model_params
self.encoders_list = []
self.models_list = []
self.scores_list_train = []
self.scores_list_val = []
self.models_trees = []
def fit(self, X: pd.DataFrame, y: np.array) -> tuple:
# process cat cols
if self.cat_validation == "None":
encoder = MultipleEncoder(
cols=self.cat_cols, encoders_names_tuple=self.encoders_names
)
X = encoder.fit_transform(X, y)
for n_fold, (train_idx, val_idx) in enumerate(
self.model_validation.split(X, y)
):
X_train, X_val = (
X.iloc[train_idx].reset_index(drop=True),
X.iloc[val_idx].reset_index(drop=True),
)
y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
if self.cat_validation == "Single":
encoder = MultipleEncoder(
cols=self.cat_cols, encoders_names_tuple=self.encoders_names
)
X_train = encoder.fit_transform(X_train, y_train)
X_val = encoder.transform(X_val)
if self.cat_validation == "Double":
encoder = DoubleValidationEncoderNumerical(
cols=self.cat_cols, encoders_names_tuple=self.encoders_names
)
X_train = encoder.fit_transform(X_train, y_train)
X_val = encoder.transform(X_val)
pass
self.encoders_list.append(encoder)
# check for OrdinalEncoder encoding
for col in [col for col in X_train.columns if "OrdinalEncoder" in col]:
X_train[col] = X_train[col].astype("category")
X_val[col] = X_val[col].astype("category")
# fit model
model = LGBMClassifier(**self.model_params)
model.fit(
X_train,
y_train,
eval_set=[(X_train, y_train), (X_val, y_val)],
early_stopping_rounds=50,
verbose=False,
)
self.models_trees.append(model.best_iteration_)
self.models_list.append(model)
y_hat = model.predict_proba(X_train)[:, 1]
score_train = roc_auc_score(y_train, y_hat)
self.scores_list_train.append(score_train)
y_hat = model.predict_proba(X_val)[:, 1]
score_val = roc_auc_score(y_val, y_hat)
self.scores_list_val.append(score_val)
mean_score_train = np.mean(self.scores_list_train)
mean_score_val = np.mean(self.scores_list_val)
avg_num_trees = int(np.mean(self.models_trees))
print(f"Mean score train : {np.round(mean_score_train, 4)}")
print(f"Mean score val : {np.round(mean_score_val, 4)}")
return mean_score_train, mean_score_val, avg_num_trees
def predict(self, X: pd.DataFrame, return_shape=True) -> np.array:
y_hat = np.zeros(X.shape[0])
for encoder, model in zip(self.encoders_list, self.models_list):
X_test = X.copy()
X_test = encoder.transform(X_test)
# check for OrdinalEncoder encoding
for col in [col for col in X_test.columns if "OrdinalEncoder" in col]:
X_test[col] = X_test[col].astype("category")
unranked_preds = model.predict_proba(X_test)[:, 1]
y_hat += rankdata(unranked_preds)
if return_shape:
return y_hat, X_test.shape[1]
else:
return y_hat
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/results/fit_predict_scores.txt
================================================
dataset_name Encoder validation_type sample_type train_shape test_shape mean_target_before_sampling_train mean_target_after_sampling_train mean_target_test num_cat_cols train_score val_score test_score time features_before_encoding features_after_encoding avg_tress_number train_prop_size
telecom CatBoostEncoder Single None 140 4226 0.24285714285714285 0.24285714285714285 0.26786559394226217 16 0.9203579209461562 0.7823902288188003 0.7783220858335179 2.6647253036499023 19 19 21 0.05
telecom CatBoostEncoder Single None 281 4226 0.2526690391459075 0.2526690391459075 0.26786559394226217 16 0.9721151837928155 0.8211678004535148 0.7922811962512648 2.913510799407959 19 19 51 0.1
telecom CatBoostEncoder Single None 704 4226 0.25 0.25 0.26786559394226217 16 0.9507839530560027 0.7711436700466351 0.7909773789918251 2.9542291164398193 19 19 28 0.25
telecom CatBoostEncoder Single None 1408 4226 0.2649147727272727 0.2649147727272727 0.26786559394226217 16 0.9473990035520276 0.7625649127388258 0.7941651857807543 3.3633759021759033 19 19 31 0.5
telecom CatBoostEncoder Single None 2112 4226 0.26609848484848486 0.26609848484848486 0.26786559394226217 16 0.9625114751219357 0.796838984951674 0.8173239668251101 3.6398279666900635 19 19 55 0.75
telecom CatBoostEncoder Single gan 147 4226 0.24285714285714285 0.2789115646258503 0.26786559394226217 16 0.935829991087344 0.7108946608946609 0.7348377173647388 2.7197842597961426 19 19 126 0.05
telecom CatBoostEncoder Single gan 309 4226 0.2526690391459075 0.32038834951456313 0.26786559394226217 16 0.9710341320072333 0.8431954887218046 0.7764859205438086 2.84492826461792 19 19 53 0.1
telecom CatBoostEncoder Single gan 880 4226 0.25 0.4 0.26786559394226217 16 0.986566069551656 0.8351047163484049 0.7945499210828664 3.8594696521759033 19 19 106 0.25
telecom CatBoostEncoder Single gan 2112 4226 0.2649147727272727 0.5099431818181818 0.26786559394226217 16 0.9774739758417266 0.7292480245665282 0.670671720713292 3.547983407974243 19 19 63 0.5
telecom CatBoostEncoder Single gan 3696 4226 0.26609848484848486 0.5308441558441559 0.26786559394226217 16 0.9596056401070886 0.579395160858396 0.4839657458525677 4.959321975708008 19 19 205 0.75
telecom CatBoostEncoder Single sample_original 147 4226 0.24285714285714285 0.2789115646258503 0.26786559394226217 16 0.9231801735845855 0.8339375901875903 0.7753863913056389 2.622987985610962 19 19 27 0.05
telecom CatBoostEncoder Single sample_original 309 4226 0.2526690391459075 0.32038834951456313 0.26786559394226217 16 0.9652377185051236 0.8419611528822056 0.7942735398046143 2.9789845943450928 19 19 36 0.1
telecom CatBoostEncoder Single sample_original 880 4226 0.25 0.2840909090909091 0.26786559394226217 16 0.9757579365079365 0.7878412698412698 0.7787949033921804 3.535475969314575 19 19 52 0.25
telecom CatBoostEncoder Single sample_original 2112 4226 0.2649147727272727 0.2689393939393939 0.26786559394226217 16 0.9769307643413366 0.8337605791432761 0.8100511134054057 3.666048288345337 19 19 65 0.5
telecom CatBoostEncoder Single sample_original 3696 4226 0.26609848484848486 0.25703463203463206 0.26786559394226217 16 0.9572892638827744 0.8329185190995373 0.8034537952174617 3.8924672603607178 19 19 57 0.75
adult CatBoostEncoder Single None 976 29306 0.23668032786885246 0.23668032786885246 0.23995086330444276 8 0.9712404721251744 0.847279737255462 0.8868313280568222 2.230457067489624 14 14 46 0.05
adult CatBoostEncoder Single None 1953 29306 0.24270353302611367 0.24270353302611367 0.23995086330444276 8 0.960174625363074 0.8285939436314103 0.8827378219839923 2.432476282119751 14 14 53 0.1
adult CatBoostEncoder Single None 4884 29306 0.23361998361998362 0.23361998361998362 0.23995086330444276 8 0.9482754738723914 0.8366190735101722 0.8784073030913059 2.8655831813812256 14 14 71 0.25
adult CatBoostEncoder Single None 9768 29306 0.23474610974610974 0.23474610974610974 0.23995086330444276 8 0.9206448457558245 0.8782990082565686 0.902996855636946 3.118391275405884 14 14 62 0.5
adult CatBoostEncoder Single None 14652 29306 0.23744198744198744 0.23744198744198744 0.23995086330444276 8 0.9524850648340255 0.8764301580520678 0.9108225945747773 5.403693914413452 14 14 194 0.75
adult CatBoostEncoder Single gan 1024 29306 0.23668032786885246 0.2724609375 0.23995086330444276 8 0.9622633701507809 0.8412385600976204 0.8551866259124772 2.529022455215454 14 14 84 0.05
adult CatBoostEncoder Single gan 2148 29306 0.24270353302611367 0.31145251396648044 0.23995086330444276 8 0.9775502159218641 0.8808715364664044 0.8665231182420047 2.5598251819610596 14 14 65 0.1
adult CatBoostEncoder Single gan 6105 29306 0.23361998361998362 0.3868959868959869 0.23995086330444276 8 0.9643542299126151 0.9215707610412934 0.852855730107893 2.577143430709839 14 14 40 0.25
adult CatBoostEncoder Single gan 14652 29306 0.23474610974610974 0.4224679224679225 0.23995086330444276 8 0.9791659801783968 0.9516803799287248 0.8952397526391495 5.079102039337158 14 14 165 0.5
adult CatBoostEncoder Single gan 25641 29306 0.23744198744198744 0.564057564057564 0.23995086330444276 8 0.9774536571575293 0.9655222496928223 0.8646375819340936 4.192373991012573 14 14 51 0.75
adult CatBoostEncoder Single sample_original 1024 29306 0.23668032786885246 0.2255859375 0.23995086330444276 8 0.9800756039427319 0.8421736597006977 0.8876648999128958 2.370562791824341 14 14 65 0.05
adult CatBoostEncoder Single sample_original 2148 29306 0.24270353302611367 0.2532588454376164 0.23995086330444276 8 0.9861732005759378 0.8412644823955254 0.8810382421160062 2.72615122795105 14 14 87 0.1
adult CatBoostEncoder Single sample_original 6105 29306 0.23361998361998362 0.23095823095823095 0.23995086330444276 8 0.9262703787415312 0.8858975520963149 0.8959786368410068 2.753687858581543 14 14 42 0.25
adult CatBoostEncoder Single sample_original 14652 29306 0.23474610974610974 0.2308899808899809 0.23995086330444276 8 0.9100725384410294 0.8825036397065548 0.8970955438333802 3.8601276874542236 14 14 64 0.5
adult CatBoostEncoder Single sample_original 25641 29306 0.23744198744198744 0.23294723294723294 0.23995086330444276 8 0.9341026630291239 0.8953411238314624 0.912838220265893 6.400696754455566 14 14 184 0.75
employee CatBoostEncoder Single None 655 19662 0.9404580152671755 0.9404580152671755 0.9421218594242702 9 0.8574978229171155 0.6160597205050392 0.5376888159909587 2.1917824745178223 9 9 17 0.05
employee CatBoostEncoder Single None 1310 19662 0.9396946564885497 0.9396946564885497 0.9421218594242702 9 0.903871914678855 0.521004390079326 0.5498886117055574 2.478717803955078 9 9 16 0.1
employee CatBoostEncoder Single None 3276 19662 0.9377289377289377 0.9377289377289377 0.9421218594242702 9 0.8387308459573963 0.5478813781285504 0.541631428415291 2.755147695541382 9 9 13 0.25
employee CatBoostEncoder Single None 6553 19662 0.9410956813673127 0.9410956813673127 0.9421218594242702 9 0.8435260045008717 0.5714672460292307 0.5968346199050565 3.289389133453369 9 9 31 0.5
employee CatBoostEncoder Single None 9830 19662 0.9429298067141404 0.9429298067141404 0.9421218594242702 9 0.7700627326608164 0.5572643636687524 0.6047351433887695 3.9642112255096436 9 9 61 0.75
employee CatBoostEncoder Single gan 687 19662 0.9404580152671755 0.9432314410480349 0.9421218594242702 9 0.9294351017812119 0.5967550472783032 0.5691309502440002 2.377863645553589 9 9 77 0.05
employee CatBoostEncoder Single gan 1441 19662 0.9396946564885497 0.945176960444136 0.9421218594242702 9 0.9171099370426935 0.5328193344645551 0.5059988912877571 2.5225038528442383 9 9 23 0.1
employee CatBoostEncoder Single gan 4095 19662 0.9377289377289377 0.9501831501831501 0.9421218594242702 9 0.8632844864380627 0.5733728644924116 0.5250210717943833 2.941354751586914 9 9 28 0.25
employee CatBoostEncoder Single gan 9829 19662 0.9410956813673127 0.9598127988605148 0.9421218594242702 9 0.7717158950022815 0.6473914752875848 0.5719551731492399 3.41788911819458 9 9 4 0.5
employee CatBoostEncoder Single gan 17202 19662 0.9429298067141404 0.9667480525520288 0.9421218594242702 9 0.8868439406407245 0.7010561954836108 0.6020863211132739 5.74671196937561 9 9 107 0.75
employee CatBoostEncoder Single sample_original 687 19662 0.9404580152671755 0.9432314410480349 0.9421218594242702 9 0.892207108066352 0.5881078030496635 0.5588777101591286 2.3120646476745605 9 9 41 0.05
employee CatBoostEncoder Single sample_original 1441 19662 0.9396946564885497 0.945176960444136 0.9421218594242702 9 0.9105744772730281 0.5817287211358902 0.5083986185783209 2.5556788444519043 9 9 26 0.1
employee CatBoostEncoder Single sample_original 4095 19662 0.9377289377289377 0.9391941391941392 0.9421218594242702 9 0.8347272230137334 0.5749850436180417 0.5445894254316539 2.9585955142974854 9 9 25 0.25
employee CatBoostEncoder Single sample_original 9829 19662 0.9410956813673127 0.9384474514192696 0.9421218594242702 9 0.7861733326006606 0.5720584718432115 0.5704311919102526 3.5040416717529297 9 9 10 0.5
employee CatBoostEncoder Single sample_original 17202 19662 0.9429298067141404 0.9450645273805371 0.9421218594242702 9 0.7690751192353558 0.576197312394728 0.6096386998446702 5.370056629180908 9 9 92 0.75
mortgages CatBoostEncoder Single None 912 27386 0.7763157894736842 0.7763157894736842 0.7893084057547652 9 0.9569518669341814 0.6219996357665105 0.5975373928677262 2.4561352729797363 19 19 30 0.05
mortgages CatBoostEncoder Single None 1825 27386 0.7797260273972603 0.7797260273972603 0.7893084057547652 9 0.9071498432071694 0.6354816393487632 0.6131081933339063 2.498446464538574 19 19 18 0.1
mortgages CatBoostEncoder Single None 4564 27386 0.7865907099035934 0.7865907099035934 0.7893084057547652 9 0.9019594727508462 0.621371441067906 0.6364322892279549 3.0621771812438965 19 19 46 0.25
mortgages CatBoostEncoder Single None 9128 27386 0.7853856266432954 0.7853856266432954 0.7893084057547652 9 0.8534152166259004 0.6354931025050673 0.653587648343162 3.783792734146118 19 19 87 0.5
mortgages CatBoostEncoder Single None 13692 27386 0.7879053461875548 0.7879053461875548 0.7893084057547652 9 0.8472016415045802 0.6562038901988799 0.6753054296066718 4.733774662017822 19 19 94 0.75
mortgages CatBoostEncoder Single gan 957 27386 0.7763157894736842 0.786833855799373 0.7893084057547652 9 0.939659739381808 0.6163126043180961 0.6027468019067974 2.4967386722564697 19 19 46 0.05
mortgages CatBoostEncoder Single gan 2007 27386 0.7797260273972603 0.7997010463378177 0.7893084057547652 9 0.983145339059089 0.6609822699126957 0.619393835941539 2.7110533714294434 19 19 46 0.1
mortgages CatBoostEncoder Single gan 5705 27386 0.7865907099035934 0.8292725679228746 0.7893084057547652 9 0.8963424681896441 0.699274272063823 0.6339991791496639 3.1721582412719727 19 19 45 0.25
mortgages CatBoostEncoder Single gan 13692 27386 0.7853856266432954 0.8569237510955302 0.7893084057547652 9 0.8794340464771435 0.7679436604859984 0.6327580098251888 3.8494341373443604 19 19 36 0.5
mortgages CatBoostEncoder Single gan 23961 27386 0.7879053461875548 0.8616084470598055 0.7893084057547652 9 0.9755118322911003 0.7869008438365362 0.6186028033666571 10.492693901062012 19 19 476 0.75
mortgages CatBoostEncoder Single sample_original 957 27386 0.7763157894736842 0.7575757575757576 0.7893084057547652 9 0.934040660794276 0.6276841366550767 0.575075710976015 2.3680293560028076 19 19 24 0.05
mortgages CatBoostEncoder Single sample_original 2007 27386 0.7797260273972603 0.7747882411559541 0.7893084057547652 9 0.9965417266065618 0.6916640558441202 0.6345660453390325 3.2200515270233154 19 19 113 0.1
mortgages CatBoostEncoder Single sample_original 5705 27386 0.7865907099035934 0.7782646801051709 0.7893084057547652 9 0.9704967751842751 0.7198687818253034 0.6499378028278687 4.217786073684692 19 19 172 0.25
mortgages CatBoostEncoder Single sample_original 13692 27386 0.7853856266432954 0.7884896289804265 0.7893084057547652 9 0.9336765448952681 0.7434918857339071 0.6622976817993476 5.424570322036743 19 19 158 0.5
mortgages CatBoostEncoder Single sample_original 23961 27386 0.7879053461875548 0.791327573974375 0.7893084057547652 9 0.9350927627392593 0.7573637753339764 0.6689071706303952 9.380746364593506 19 19 345 0.75
poverty_A CatBoostEncoder Single None 751 22536 0.5619174434087882 0.5619174434087882 0.5274671636492723 38 0.8859743934446591 0.5620650101532454 0.5401530752584143 7.994302988052368 40 40 79 0.05
poverty_A CatBoostEncoder Single None 1502 22536 0.5679094540612517 0.5679094540612517 0.5274671636492723 38 0.831990567462373 0.5293249059135732 0.5088747165207526 7.762101411819458 40 40 3 0.1
poverty_A CatBoostEncoder Single None 3756 22536 0.5362087326943556 0.5362087326943556 0.5274671636492723 38 0.858324953012095 0.5982342354289758 0.6306323183875759 8.875043392181396 40 40 52 0.25
poverty_A CatBoostEncoder Single None 7512 22536 0.5154419595314164 0.5154419595314164 0.5274671636492723 38 0.7746306123276163 0.5485171975228239 0.5720604833462329 9.434112787246704 40 40 25 0.5
poverty_A CatBoostEncoder Single None 11268 22536 0.5154419595314164 0.5154419595314164 0.5274671636492723 38 0.7783769232796187 0.5372990835640438 0.5539699268307093 11.178505897521973 40 40 87 0.75
poverty_A CatBoostEncoder Single gan 788 22536 0.5619174434087882 0.5824873096446701 0.5274671636492723 38 0.9421522331067809 0.5516229422751163 0.5869937181884348 8.291184902191162 40 40 100 0.05
poverty_A CatBoostEncoder Single gan 1652 22536 0.5679094540612517 0.6071428571428571 0.5274671636492723 38 0.9128941151595166 0.5922561736635842 0.5445247344064106 8.876214265823364 40 40 74 0.1
poverty_A CatBoostEncoder Single gan 4695 22536 0.5362087326943556 0.627689030883919 0.5274671636492723 38 0.891289713449121 0.6112995961809725 0.5787338786848135 9.212333679199219 40 40 41 0.25
poverty_A CatBoostEncoder Single gan 11268 22536 0.5154419595314164 0.6769613063542776 0.5274671636492723 38 0.8806414626978063 0.7688883144925891 0.5688070955325766 10.436711311340332 40 40 42 0.5
poverty_A CatBoostEncoder Single gan 19719 22536 0.5154419595314164 0.7230082661392565 0.5274671636492723 38 0.8759691599411358 0.8052530370215738 0.5569678295071181 13.256612777709961 40 40 10 0.75
poverty_A CatBoostEncoder Single sample_original 788 22536 0.5619174434087882 0.5355329949238579 0.5274671636492723 38 0.9429218068418079 0.527533157654028 0.5362147347976903 8.915942430496216 40 40 135 0.05
poverty_A CatBoostEncoder Single sample_original 1652 22536 0.5679094540612517 0.5163438256658596 0.5274671636492723 38 0.9487247794533682 0.5118483671218681 0.49198515068132703 8.978588819503784 40 40 82 0.1
poverty_A CatBoostEncoder Single sample_original 4695 22536 0.5362087326943556 0.5033013844515442 0.5274671636492723 38 0.8632332457785014 0.5921758366469347 0.6177291359538557 8.948752880096436 40 40 44 0.25
poverty_A CatBoostEncoder Single sample_original 11268 22536 0.5154419595314164 0.5195243166489173 0.5274671636492723 38 0.7676107600906276 0.6234098148367526 0.6841640681225339 11.036270380020142 40 40 68 0.5
poverty_A CatBoostEncoder Single sample_original 19719 22536 0.5154419595314164 0.5316699629798671 0.5274671636492723 38 0.7549028313503439 0.5012396290200846 0.4823346055754006 14.260282754898071 40 40 58 0.75
credit CatBoostEncoder Single None 6150 184507 0.07788617886178861 0.07788617886178861 0.08026253746470324 18 0.9527241954477089 0.7323328624969723 0.7206022513811922 13.163346529006958 120 120 34 0.05
credit CatBoostEncoder Single None 12300 184507 0.07707317073170732 0.07707317073170732 0.08026253746470324 18 0.93940969341188 0.7321126469942714 0.727353023208482 14.704874038696289 120 120 60 0.1
credit CatBoostEncoder Single None 30751 184507 0.08012747552925108 0.08012747552925108 0.08026253746470324 18 0.8619422979468002 0.7307417093245144 0.7347498739187317 18.71873188018799 120 120 66 0.25
credit CatBoostEncoder Single None 61502 184507 0.08012747552925108 0.08012747552925108 0.08026253746470324 18 0.8156249961470617 0.7276302952342422 0.7347198003949358 26.168806552886963 120 120 74 0.5
credit CatBoostEncoder Single None 92253 184507 0.08095129697679208 0.08095129697679208 0.08026253746470324 18 0.8201713878566016 0.7397918520907603 0.7438959238341907 34.938451051712036 120 120 113 0.75
credit CatBoostEncoder Single gan 6457 184507 0.07788617886178861 0.07820969490475453 0.08026253746470324 18 0.9764313232501689 0.7512394460240639 0.7233483986540664 13.694202423095703 120 120 62 0.05
credit CatBoostEncoder Single gan 13530 184507 0.07707317073170732 0.07612712490761271 0.08026253746470324 18 0.9492452184466019 0.7380827184466019 0.7251976787296043 14.91127061843872 120 120 67 0.1
credit CatBoostEncoder Single gan 38438 184507 0.08012747552925108 0.08106561215463864 0.08026253746470324 18 0.9090391347620926 0.754624152577333 0.7350776851368745 22.562947273254395 120 120 124 0.25
credit CatBoostEncoder Single gan 92253 184507 0.08012747552925108 0.08194855451855224 0.08026253746470324 18 0.9420176537916681 0.7938256531874146 0.7389970416524646 53.786041021347046 120 120 433 0.5
credit CatBoostEncoder Single gan 161442 184507 0.08095129697679208 0.08093928469667125 0.08026253746470324 18 0.9773945217063078 0.831071104961706 0.7412463491556259 205.97851586341858 120 120 1701 0.75
credit CatBoostEncoder Single sample_original 6457 184507 0.07788617886178861 0.07882917763667338 0.08026253746470324 18 0.9632578345100518 0.7369329014193641 0.7176956499719532 13.45695972442627 120 120 74 0.05
credit CatBoostEncoder Single sample_original 13530 184507 0.07707317073170732 0.07723577235772358 0.08026253746470324 18 0.9563164074401242 0.7443696071649616 0.7271938929573681 16.719157934188843 120 120 100 0.1
credit CatBoostEncoder Single sample_original 38438 184507 0.08012747552925108 0.08124772360684739 0.08026253746470324 18 0.9243948541074898 0.7622272437440326 0.7365072545915403 25.5328631401062 120 120 198 0.25
credit CatBoostEncoder Single sample_original 92253 184507 0.08012747552925108 0.08205695207743921 0.08026253746470324 18 0.9810895339132031 0.8042303392590467 0.7370427261844282 79.69111585617065 120 120 844 0.5
credit CatBoostEncoder Single sample_original 161442 184507 0.08095129697679208 0.08109413907161706 0.08026253746470324 18 0.9687607131301714 0.8255935205394949 0.742136276798759 141.1157841682434 120 120 1042 0.75
taxi CatBoostEncoder Single None 17851 535535 0.561313091703546 0.561313091703546 0.8227697536108751 5 0.7615306475536557 0.646381333555182 0.5250318412788527 4.8244500160217285 7 7 29 0.05
taxi CatBoostEncoder Single None 35702 535535 0.4692454204246261 0.4692454204246261 0.8227697536108751 5 0.8141689234410135 0.7586013081249271 0.5155964532845364 5.683969020843506 7 7 30 0.1
taxi CatBoostEncoder Single None 89255 535535 0.5131253151083973 0.5131253151083973 0.8227697536108751 5 0.7451712701819895 0.7748186166915605 0.543891241982895 8.106499910354614 7 7 15 0.25
taxi CatBoostEncoder Single None 178511 535535 0.533126810112542 0.533126810112542 0.8227697536108751 5 0.7613935320974424 0.728291886604595 0.5629126746936868 20.373082399368286 7 7 109 0.5
taxi CatBoostEncoder Single None 267766 535535 0.5815226727814584 0.5815226727814584 0.8227697536108751 5 0.719079048652581 0.6419974904886212 0.49650558457388094 20.892715215682983 7 7 48 0.75
taxi CatBoostEncoder Single gan 18743 535535 0.561313091703546 0.5641572853865443 0.8227697536108751 5 0.7598714244280431 0.5914784556920778 0.48656500218380583 5.255659580230713 7 7 29 0.05
taxi CatBoostEncoder Single gan 39272 535535 0.4692454204246261 0.47787227541250765 0.8227697536108751 5 0.8336661476148247 0.7698562281858764 0.4912398915266225 8.154317140579224 7 7 76 0.1
taxi CatBoostEncoder Single gan 111568 535535 0.5131253151083973 0.48767567761365266 0.8227697536108751 5 0.7911678584412571 0.7797101169322348 0.5265006448716775 11.239099025726318 7 7 50 0.25
taxi CatBoostEncoder Single gan 267766 535535 0.533126810112542 0.6559085171380982 0.8227697536108751 5 0.8660098515755502 0.8456732827961997 0.5373428720240516 20.924078226089478 7 7 60 0.5
taxi CatBoostEncoder Single gan 468590 535535 0.5815226727814584 0.6128193089908022 0.8227697536108751 5 0.825626069418726 0.7902174878641041 0.526980184147693 40.348939657211304 7 7 96 0.75
taxi CatBoostEncoder Single sample_original 18743 535535 0.561313091703546 0.5562610041082003 0.8227697536108751 5 0.7675420142171343 0.639305055797575 0.5118678397122456 5.168023109436035 7 7 38 0.05
taxi CatBoostEncoder Single sample_original 39272 535535 0.4692454204246261 0.43649419433693215 0.8227697536108751 5 0.8273389418647688 0.8436326989449704 0.5470727547432738 7.189561367034912 7 7 66 0.1
taxi CatBoostEncoder Single sample_original 111568 535535 0.5131253151083973 0.42115122615803813 0.8227697536108751 5 0.8203346240505904 0.7872740574701714 0.5564292427170119 12.47060513496399 7 7 79 0.25
taxi CatBoostEncoder Single sample_original 267766 535535 0.533126810112542 0.3864232202744187 0.8227697536108751 5 0.7782898628462955 0.7787353721670882 0.5418087488008634 17.32105803489685 7 7 17 0.5
taxi CatBoostEncoder Single sample_original 468590 535535 0.5815226727814584 0.4923237798501889 0.8227697536108751 5 0.7478875643555635 0.7609802109811412 0.5445782397760078 28.39053201675415 7 7 27 0.75
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/run_experiment.py
================================================
import time
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from model import Model
from utils import save_exp_to_file, extend_gan_train, extend_from_original
def execute_experiment(dataset_name, encoders_list, validation_type, sample_type=None):
dataset_pth = f"./data/{dataset_name}/{dataset_name}.gz"
results = {}
# load processed dataset
data = pd.read_csv(dataset_pth)
data.fillna(data.mean(), inplace=True)
for train_prop_size in [0.05, 0.1, 0.25, 0.5, 0.75]:
# make train-test split
cat_cols = [col for col in data.columns if col.startswith("cat")]
X_train, X_test, y_train, y_test = train_test_split(
data.drop("target", axis=1),
data["target"],
test_size=0.6,
shuffle=False,
random_state=42,
)
X_test, y_test = X_test.reset_index(drop=True), y_test.reset_index(drop=True)
train_size = X_train.shape[0]
X_train = X_train.head(int(train_size * train_prop_size)).reset_index(drop=True)
y_train = y_train.head(int(train_size * train_prop_size)).reset_index(drop=True)
mean_target_before_sampling_train = np.mean(y_train)
if train_prop_size == 1:
continue
elif sample_type == "gan":
X_train, y_train = extend_gan_train(
X_train,
y_train,
X_test,
cat_cols,
epochs=500,
gen_x_times=train_prop_size,
)
elif sample_type == "sample_original":
X_train, y_train = extend_from_original(
X_train, y_train, X_test, cat_cols, gen_x_times=train_prop_size
)
y_train, y_test = y_train, y_test
for encoders_tuple in encoders_list:
print(
f"\n{encoders_tuple}, {dataset_name}, train size {int(100 * train_prop_size)}%, "
f"validation_type {validation_type}, sample_type {sample_type}"
)
time_start = time.time()
# train models
lgb_model = Model(
cat_validation=validation_type,
encoders_names=encoders_tuple,
cat_cols=cat_cols,
)
train_score, val_score, avg_num_trees = lgb_model.fit(X_train, y_train)
y_hat, test_features = lgb_model.predict(X_test)
# check score
test_score = roc_auc_score(y_test, y_hat)
time_end = time.time()
# write and save results
results = {
"dataset_name": dataset_name,
"Encoder": encoders_tuple[0],
"validation_type": validation_type,
"sample_type": sample_type,
"train_shape": X_train.shape[0],
"test_shape": X_test.shape[0],
"mean_target_before_sampling_train": mean_target_before_sampling_train,
"mean_target_after_sampling_train": np.mean(y_train),
"mean_target_test": np.mean(y_test),
"num_cat_cols": len(cat_cols),
"train_score": train_score,
"val_score": val_score,
"test_score": test_score,
"time": time_end - time_start,
"features_before_encoding": X_train.shape[1],
"features_after_encoding": test_features,
"avg_tress_number": avg_num_trees,
"train_prop_size": train_prop_size,
}
save_exp_to_file(dic=results, path=f"./results/fit_predict_scores.txt")
if __name__ == "__main__":
encoders_list = [("CatBoostEncoder",)]
dataset_list = [
"telecom",
"adult",
"employee",
"mortgages",
"poverty_A",
"credit",
"taxi",
] # "kick","kdd_upselling"
for dataset_name in tqdm(dataset_list):
validation_type = "Single"
execute_experiment(dataset_name, encoders_list, validation_type)
execute_experiment(
dataset_name, encoders_list, validation_type, sample_type="gan"
)
execute_experiment(
dataset_name, encoders_list, validation_type, sample_type="sample_original"
)
================================================
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/utils.py
================================================
import gc
from typing import List
import numpy as np
import pandas as pd
from ctgan import CTGANSynthesizer
from sklearn.model_selection import StratifiedKFold
from model import Model
def save_dict_to_file(dic: dict, path: str, save_raw=False) -> None:
"""
Save dict values into txt file
:param dic: Dict with values
:param path: Path to .txt file
:return: None
"""
f = open(path, "w")
if save_raw:
f.write(str(dic))
else:
for k, v in dic.items():
f.write(str(k))
f.write(str(v))
f.write("\n\n")
f.close()
def save_exp_to_file(dic: dict, path: str) -> None:
"""
Save dict values into txt file
:param dic: Dict with values
:param path: Path to .txt file
:return: None
"""
f = open(path, "a+")
keys = dic.keys()
vals = [str(val) for val in dic.values()]
if f.tell() == 0:
header = "\t".join(keys)
f.write(header + "\n")
row = "\t".join(vals)
f.write(row + "\n")
f.close()
def cat_cols_info(
X_train: pd.DataFrame, X_test: pd.DataFrame, cat_cols: List[str]
) -> dict:
"""
Get the main info about cat columns in dataframe, i.e. num of values, uniqueness
:param X_train: Train dataframe
:param X_test: Test dataframe
:param cat_cols: List of categorical columns
:return: Dict with results
"""
cc_info = {}
for col in cat_cols:
train_values = set(X_train[col])
number_of_new_test = len(set(X_test[col]) - train_values)
fraction_of_new_test = np.mean(
X_test[col].apply(lambda v: v not in train_values)
)
cc_info[col] = {
"num_uniq_train": X_train[col].nunique(),
"num_uniq_test": X_test[col].nunique(),
"number_of_new_test": number_of_new_test,
"fraction_of_new_test": fraction_of_new_test,
}
return cc_info
def adversarial_test(left_df, right_df, cat_cols):
"""
Trains adversarial model to distinguish train from test
:param left_df: dataframe
:param right_df: dataframe
:param cat_cols: List of categorical columns
:return: trained model
"""
# sample to shuffle the data
left_df = left_df.copy().sample(frac=1).reset_index(drop=True)
right_df = right_df.copy().sample(frac=1).reset_index(drop=True)
left_df = left_df.head(right_df.shape[0])
right_df = right_df.head(left_df.shape[0])
left_df["gt"] = 0
right_df["gt"] = 1
concated = pd.concat([left_df, right_df])
lgb_model = Model(
cat_validation="Single",
encoders_names=("OrdinalEncoder",),
cat_cols=cat_cols,
model_validation=StratifiedKFold(n_splits=3, shuffle=True, random_state=42),
model_params={
"metrics": "AUC",
"max_depth": 2,
"max_bin": 100,
"n_estimators": 500,
"learning_rate": 0.02,
"random_state": 42,
},
)
train_score, val_score, avg_num_trees = lgb_model.fit(
concated.drop("gt", axis=1), concated["gt"]
)
print(
"ROC AUC adversarial: train %.2f%% val %.2f%%"
% (train_score * 100.0, val_score * 100.0)
)
return lgb_model
def extend_gan_train(x_train, y_train, x_test, cat_cols, gen_x_times=1.2, epochs=300):
"""
Extends train by generating new data by GAN
:param x_train: train dataframe
:param y_train: target for train dataframe
:param x_test: dataframe
:param cat_cols: List of categorical columns
:param gen_x_times: Factor for which initial dataframe should be increased
:param cat_cols: List of categorical columns
:param epochs: Number of epoch max to train the GAN
:return: extended train with target
"""
if gen_x_times == 0:
raise ValueError("Passed gen_x_times with value 0!")
x_train["target"] = y_train
x_test_bigger = int(1.1 * x_test.shape[0] / x_train.shape[0])
ctgan = CTGANSynthesizer()
ctgan.fit(x_train, cat_cols, epochs=epochs)
generated_df = ctgan.sample((x_test_bigger) * x_train.shape[0])
data_dtype = x_train.dtypes.values
for i in range(len(generated_df.columns)):
generated_df[generated_df.columns[i]] = generated_df[
generated_df.columns[i]
].astype(data_dtype[i])
generated_df = pd.concat(
[
x_train.sample(frac=(x_test_bigger), replace=True, random_state=42),
generated_df,
]
).reset_index(drop=True)
num_cols = []
for col in x_train.columns:
if "num" in col:
num_cols.append(col)
for num_col in num_cols:
min_val = x_test[num_col].quantile(0.02)
max_val = x_test[num_col].quantile(0.98)
generated_df = generated_df.loc[
(generated_df[num_col] >= min_val) & (generated_df[num_col] <= max_val)
]
generated_df = generated_df.reset_index(drop=True)
ad_model = adversarial_test(x_test, generated_df.drop("target", axis=1), cat_cols)
generated_df["test_similarity"] = ad_model.predict(
generated_df.drop("target", axis=1), return_shape=False
)
generated_df.sort_values("test_similarity", ascending=False, inplace=True)
generated_df = generated_df.head(int(gen_x_times * x_train.shape[0]))
x_train = pd.concat(
[x_train, generated_df.drop("test_similarity", axis=1)], axis=0
).reset_index(drop=True)
del generated_df
gc.collect()
return x_train.drop("target", axis=1), x_train["target"]
def extend_from_original(x_train, y_train, x_test, cat_cols, gen_x_times=1.2):
"""
Extends train by generating new data by GAN
:param x_train: train dataframe
:param y_train: target for train dataframe
:param x_test: dataframe
:param cat_cols: List of categorical columns
:param gen_x_times: Factor for which initial dataframe should be increased
:param cat_cols: List of categorical columns
:return: extended train with target
"""
if gen_x_times == 0:
raise ValueError("Passed gen_x_times with value 0!")
x_train["target"] = y_train
x_test_bigger = int(1.1 * x_test.shape[0] / x_train.shape[0])
generated_df = x_train.sample(frac=x_test_bigger, replace=True, random_state=42)
num_cols = []
for col in x_train.columns:
if "num" in col:
num_cols.append(col)
for num_col in num_cols:
min_val = x_test[num_col].quantile(0.02)
max_val = x_test[num_col].quantile(0.98)
generated_df = generated_df.loc[
(generated_df[num_col] >= min_val) & (generated_df[num_col] <= max_val)
]
generated_df = generated_df.reset_index(drop=True)
ad_model = adversarial_test(x_test, generated_df.drop("target", axis=1), cat_cols)
generated_df["test_similarity"] = ad_model.predict(
generated_df.drop("target", axis=1), return_shape=False
)
generated_df.sort_values("test_similarity", ascending=False, inplace=True)
generated_df = generated_df.head(int(gen_x_times * x_train.shape[0]))
x_train = pd.concat(
[x_train, generated_df.drop("test_similarity", axis=1)], axis=0
).reset_index(drop=True)
del generated_df
gc.collect()
return x_train.drop("target", axis=1), x_train["target"]
================================================
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/CGAN/ConditionalGAN.py
================================================
#!/usr/bin/env python
# coding: utf-8
# # Implementation of Conditional GANs
# Reference: https://arxiv.org/pdf/1411.1784.pdf
# https://github.com/Yangyangii/GAN-Tutorial/blob/master/MNIST/Conditional-GAN.ipynb
# In[ ]:
# Run the comment below only when using Google Colab
# !pip install torch torchvision
# In[1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
# In[2]:
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torchvision.utils import save_image
# In[3]:
import numpy as np
import datetime
import os, sys
# In[4]:
from matplotlib.pyplot import imshow, imsave
get_ipython().run_line_magic("matplotlib", "inline")
# In[5]:
MODEL_NAME = "ConditionalGAN"
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# In[6]:
def to_onehot(x, num_classes=10):
assert isinstance(x, int) or isinstance(
x, (torch.LongTensor, torch.cuda.LongTensor)
)
if isinstance(x, int):
c = torch.zeros(1, num_classes).long()
c[0][x] = 1
else:
x = x.cpu()
c = torch.LongTensor(x.size(0), num_classes)
c.zero_()
c.scatter_(1, x, 1) # dim, index, src value
return c
# In[7]:
def get_sample_image(G, n_noise=100):
"""
save sample 100 images
"""
img = np.zeros([280, 280])
for j in range(10):
c = torch.zeros([10, 10]).to(DEVICE)
c[:, j] = 1
z = torch.randn(10, n_noise).to(DEVICE)
y_hat = G(z, c).view(10, 28, 28)
result = y_hat.cpu().data.numpy()
img[j * 28 : (j + 1) * 28] = np.concatenate([x for x in result], axis=-1)
return img
# In[8]:
class Discriminator(nn.Module):
"""
Simple Discriminator w/ MLP
"""
def __init__(self, input_size=784, condition_size=10, num_classes=1):
super(Discriminator, self).__init__()
self.layer = nn.Sequential(
nn.Linear(input_size + condition_size, 512),
nn.LeakyReLU(0.2),
nn.Linear(512, 256),
nn.LeakyReLU(0.2),
nn.Linear(256, num_classes),
nn.Sigmoid(),
)
def forward(self, x, c):
x, c = x.view(x.size(0), -1), c.view(c.size(0), -1).float()
v = torch.cat((x, c), 1) # v: [input, label] concatenated vector
y_ = self.layer(v)
return y_
# In[9]:
class Generator(nn.Module):
"""
Simple Generator w/ MLP
"""
def __init__(self, input_size=100, condition_size=10, num_classes=784):
super(Generator, self).__init__()
self.layer = nn.Sequential(
nn.Linear(input_size + condition_size, 128),
nn.LeakyReLU(0.2),
nn.Linear(128, 256),
nn.BatchNorm1d(256),
nn.LeakyReLU(0.2),
nn.Linear(256, 512),
nn.BatchNorm1d(512),
nn.LeakyReLU(0.2),
nn.Linear(512, 1024),
nn.BatchNorm1d(1024),
nn.LeakyReLU(0.2),
nn.Linear(1024, num_classes),
nn.Tanh(),
)
def forward(self, x, c):
x, c = x.view(x.size(0), -1), c.view(c.size(0), -1).float()
v = torch.cat((x, c), 1) # v: [input, label] concatenated vector
y_ = self.layer(v)
y_ = y_.view(x.size(0), 1, 28, 28)
return y_
# In[10]:
D = Discriminator().to(DEVICE)
G = Generator().to(DEVICE)
# In[11]:
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5])]
)
# In[12]:
mnist = datasets.MNIST(root="../data/", train=True, transform=transform, download=True)
# In[13]:
batch_size = 64
condition_size = 10
# In[14]:
data_loader = DataLoader(
dataset=mnist, batch_size=batch_size, shuffle=True, drop_last=True
)
# In[15]:
criterion = nn.BCELoss()
D_opt = torch.optim.Adam(D.parameters(), lr=0.0002, betas=(0.5, 0.999))
G_opt = torch.optim.Adam(G.parameters(), lr=0.0002, betas=(0.5, 0.999))
# In[16]:
max_epoch = 30 # need more than 100 epochs for training generator
step = 0
n_critic = 1 # for training more k steps about Discriminator
n_noise = 100
# In[17]:
D_labels = torch.ones([batch_size, 1]).to(DEVICE) # Discriminator Label to real
D_fakes = torch.zeros([batch_size, 1]).to(DEVICE) # Discriminator Label to fake
# In[18]:
if not os.path.exists("samples"):
os.makedirs("samples")
# In[19]:
for epoch in range(max_epoch):
for idx, (images, labels) in enumerate(data_loader):
# Training Discriminator
x = images.to(DEVICE)
y = labels.view(batch_size, 1)
y = to_onehot(y).to(DEVICE)
x_outputs = D(x, y)
D_x_loss = criterion(x_outputs, D_labels)
z = torch.randn(batch_size, n_noise).to(DEVICE)
z_outputs = D(G(z, y), y)
D_z_loss = criterion(z_outputs, D_fakes)
D_loss = D_x_loss + D_z_loss
D.zero_grad()
D_loss.backward()
D_opt.step()
if step % n_critic == 0:
# Training Generator
z = torch.randn(batch_size, n_noise).to(DEVICE)
z_outputs = D(G(z, y), y)
G_loss = criterion(z_outputs, D_labels)
G.zero_grad()
G_loss.backward()
G_opt.step()
if step % 500 == 0:
print(
"Epoch: {}/{}, Step: {}, D Loss: {}, G Loss: {}".format(
epoch, max_epoch, step, D_loss.item(), G_loss.item()
)
)
if step % 1000 == 0:
G.eval()
img = get_sample_image(G, n_noise)
imsave(
"samples/{}_step{}.jpg".format(MODEL_NAME, str(step).zfill(3)),
img,
cmap="gray",
)
G.train()
step += 1
# ## Sample
# In[22]:
# generation to image
G.eval()
imshow(get_sample_image(G, n_noise), cmap="gray")
# In[40]:
def save_checkpoint(state, file_name="checkpoint.pth.tar"):
torch.save(state, file_name)
# In[41]:
# Saving params.
save_checkpoint(
{"epoch": epoch + 1, "state_dict": D.state_dict(), "optimizer": D_opt.state_dict()},
"D_c.pth.tar",
)
save_checkpoint(
{"epoch": epoch + 1, "state_dict": G.state_dict(), "optimizer": G_opt.state_dict()},
"G_c.pth.tar",
)
# In[ ]:
================================================
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/DCGAN/dcgan.py
================================================
#!/usr/bin/env python
# coding: utf-8
# In[1]:
# reference https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html
from __future__ import print_function
#%matplotlib inline
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
# In[16]:
os.listdir(dataroot)
# In[19]:
# Root directory for dataset
dataroot = "./data/celeba/"
# Number of workers for dataloader
workers = 2
# Batch size during training
batch_size = 128
# Spatial size of training images. All images will be resized to this
# size using a transformer.
image_size = 64
# Number of channels in the training images. For color images this is 3
nc = 3
# Size of z latent vector (i.e. size of generator input)
nz = 100
# Size of feature maps in generator
ngf = 64
# Size of feature maps in discriminator
ndf = 64
# Number of training epochs
num_epochs = 5
# Learning rate for optimizers
lr = 0.0002
# Beta1 hyperparam for Adam optimizers
beta1 = 0.5
# Number of GPUs available. Use 0 for CPU mode.
ngpu = 1
# In[20]:
dataset = dset.ImageFolder(root=dataroot,
transform=transforms.Compose([
transforms.Resize(image_size),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]))
# Create the dataloader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
shuffle=True, num_workers=workers)
# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
# Plot some training images
real_batch = next(iter(dataloader))
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))
# In[21]:
# custom weights initialization called on netG and netD
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
# In[24]:
# Generator Code
class Generator(nn.Module):
def __init__(self, ngpu):
super(Generator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# state size. (ngf*8) x 4 x 4
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# state size. (ngf*4) x 8 x 8
nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# state size. (ngf*2) x 16 x 16
nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
# state size. (ngf) x 32 x 32
nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 64 x 64
)
def forward(self, input):
return self.main(input)
# Create the generator
netG = Generator(ngpu).to(device)
# Handle multi-gpu if desired
if (device.type == 'cuda') and (ngpu > 1):
netG = nn.DataParallel(netG, list(range(ngpu)))
# Apply the weights_init function to randomly initialize all weights
# to mean=0, stdev=0.2.
netG.apply(weights_init)
# Print the model
print(netG)
# In[26]:
# Discriminator
class Discriminator(nn.Module):
def __init__(self, ngpu):
super(Discriminator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 32 x 32
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 16 x 16
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 8 x 8
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 4 x 4
nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
def forward(self, input):
return self.main(input)
# Create the Discriminator
netD = Discriminator(ngpu).to(device)
# Handle multi-gpu if desired
if (device.type == 'cuda') and (ngpu > 1):
netD = nn.DataParallel(netD, list(range(ngpu)))
# Apply the weights_init function to randomly initialize all weights
# to mean=0, stdev=0.2.
netD.apply(weights_init)
# Print the model
print(netD)
# In[27]:
# Initialize BCELoss function
criterion = nn.BCELoss()
# Create batch of latent vectors that we will use to visualize
# the progression of the generator
fixed_noise = torch.randn(64, nz, 1, 1, device=device)
# Establish convention for real and fake labels during training
real_label = 1
fake_label = 0
# Setup Adam optimizers for both G and D
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))
# * Loss_D - discriminator loss calculated as the sum of losses for the all real and all fake batches (log(D(x))+log(D(G(z)))).
# * Loss_G - generator loss calculated as log(D(G(z)))
# * D(x) - the average output (across the batch) of the discriminator for the all real batch. This should start close to 1 then theoretically converge to 0.5 when G gets better. Think about why this is.
# * D(G(z)) - average discriminator outputs for the all fake batch. The first number is before D is updated and the second number is after D is updated. These numbers should start near 0 and converge to 0.5 as G gets better. Think about why this is.
# In[28]:
# Training Loop
# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0
print("Starting Training Loop...")
# For each epoch
for epoch in range(num_epochs):
# For each batch in the dataloader
for i, data in enumerate(dataloader, 0):
############################
# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
###########################
## Train with all-real batch
netD.zero_grad()
# Format batch
real_cpu = data[0].to(device)
b_size = real_cpu.size(0)
label = torch.full((b_size,), real_label, device=device)
# Forward pass real batch through D
output = netD(real_cpu).view(-1)
# Calculate loss on all-real batch
errD_real = criterion(output, label)
# Calculate gradients for D in backward pass
errD_real.backward()
D_x = output.mean().item()
## Train with all-fake batch
# Generate batch of latent vectors
noise = torch.randn(b_size, nz, 1, 1, device=device)
# Generate fake image batch with G
fake = netG(noise)
label.fill_(fake_label)
# Classify all fake batch with D
output = netD(fake.detach()).view(-1)
# Calculate D's loss on the all-fake batch
errD_fake = criterion(output, label)
# Calculate the gradients for this batch
errD_fake.backward()
D_G_z1 = output.mean().item()
# Add the gradients from the all-real and all-fake batches
errD = errD_real + errD_fake
# Update D
optimizerD.step()
############################
# (2) Update G network: maximize log(D(G(z)))
###########################
netG.zero_grad()
label.fill_(real_label) # fake labels are real for generator cost
# Since we just updated D, perform another forward pass of all-fake batch through D
output = netD(fake).view(-1)
# Calculate G's loss based on this output
errG = criterion(output, label)
# Calculate gradients for G
errG.backward()
D_G_z2 = output.mean().item()
# Update G
optimizerG.step()
# Output training stats
if i % 50 == 0:
print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
% (epoch, num_epochs, i, len(dataloader),
errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
# Save Losses for plotting later
G_losses.append(errG.item())
D_losses.append(errD.item())
# Check how the generator is doing by saving G's output on fixed_noise
if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
with torch.no_grad():
fake = netG(fixed_noise).detach().cpu()
img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
iters += 1
# In[29]:
plt.figure(figsize=(10,5))
plt.title("Generator and Discriminator Loss During Training")
plt.plot(G_losses,label="G")
plt.plot(D_losses,label="D")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()
# In[30]:
#%%capture
fig = plt.figure(figsize=(8,8))
plt.axis("off")
ims = [[plt.imshow(np.transpose(i,(1,2,0)), animated=True)] for i in img_list]
ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True)
HTML(ani.to_jshtml())
# In[32]:
# Grab a batch of real images from the dataloader
real_batch = next(iter(dataloader))
# Plot the real images
plt.figure(figsize=(15,15))
plt.subplot(1,2,1)
plt.axis("off")
plt.title("Real Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=5, normalize=True).cpu(),(1,2,0)))
# Plot the fake images from the last epoch
plt.subplot(1,2,2)
plt.axis("off")
plt.title("Fake Images")
plt.imshow(np.transpose(img_list[-1],(1,2,0)))
plt.show()
# Where to Go Next
# We have reached the end of our journey, but there are several places you could go from here. You could:
#
# Train for longer to see how good the results get
# Modify this model to take a different dataset and possibly change the size of the images and the model architecture
# Check out some other cool GAN projects here
# Create GANs that generate music
================================================
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/ProgressiveGAN/README.md
================================================
reference code and repo https://github.com/odegeasslbc/Progressive-GAN-pytorch
# Progressive-GAN-pytorch
A pytorch implementation of Progressive-GAN that is actually works, readable and simple to customize
## Description
I simplify the code of training a Progressive-GAN, making it easier to read and customize, for the purpose of research.
This implementation is portable with minimal library dependency (only torch and torchvision) and just 2 code modules. In the code, you can easily modeify the training-schema, the loss function, and the network structure, etc.
The key contributions in the paper: 1. progressively growing og GAN, 2. minibatch std on Discriminator, 3. pixel-norm on Generator, 4. equalized learning rate; are all implemented.
Enjoy the benefit of the progressive-growing infrastructure and port it to your own research and product!
## How to run
To start a training, just run:
```
python train.py --path /path/to/image-folder
```
An example with more configuration can be:
```
python train.py --path /path/to/imagefolder --trial_name experiment-1 --z_dim 100 --channel 512 --batch_size 4 --init_step 2 --total_iter 300000 --pixel_norm --tanh
```
For a comprehensive explanation of all the parameters, run:
```
python train.py --help
```
Each new running of the code will create a new folder with the specified trail_name, all the generated images, model checkpoints and loss value loging file will be stored in this new folder. A copy of the codes that you run will also be intimately stored (because you might have modefied them).
## Dataset
This code is ready for your own image datasets with the **torchvision.datasets.ImageFolder** module.
Place all your images in a way like:
```
<image_root_folder>
|--<subfolder 1>
|--image 1
|--image 2 ...
|--<subfolder 2>
...
```
## Training results
This code performs consistently well on various datasets I tested, I just don't bother upload them here.
## Reference
1. *Progressive Growing of GANs for Improved Quality, Stability, and Variation*, **Tero Karras** (NVIDIA), **Timo Aila** (NVIDIA), **Samuli Laine** (NVIDIA), **Jaakko Lehtinen** (NVIDIA and Aalto University) [Paper (NVIDIA research)](http://research.nvidia.com/publication/2017-10_Progressive-Growing-of)
2. This implementation is based on: https://github.com/rosinality/progressive-gan-pytorch
================================================
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/ProgressiveGAN/progan_modules.py
================================================
import torch
from torch import nn
from torch.nn import functional as F
from math import sqrt
class EqualLR:
def __init__(self, name):
self.name = name
def compute_weight(self, module):
weight = getattr(module, self.name + "_orig")
fan_in = weight.data.size(1) * weight.data[0][0].numel()
return weight * sqrt(2 / fan_in)
@staticmethod
def apply(module, name):
fn = EqualLR(name)
weight = getattr(module, name)
del module._parameters[name]
module.register_parameter(name + "_orig", nn.Parameter(weight.data))
module.register_forward_pre_hook(fn)
return fn
def __call__(self, module, input):
weight = self.compute_weight(module)
setattr(module, self.name, weight)
def equal_lr(module, name="weight"):
EqualLR.apply(module, name)
return module
class PixelNorm(nn.Module):
def __init__(self):
super().__init__()
def forward(self, input):
return input / torch.sqrt(torch.mean(input ** 2, dim=1, keepdim=True) + 1e-8)
class EqualConv2d(nn.Module):
def __init__(self, *args, **kwargs):
super().__init__()
conv = nn.Conv2d(*args, **kwargs)
conv.weight.data.normal_()
conv.bias.data.zero_()
self.conv = equal_lr(conv)
def forward(self, input):
return self.conv(input)
class EqualConvTranspose2d(nn.Module):
### additional module for OOGAN usage
def __init__(self, *args, **kwargs):
super().__init__()
conv = nn.ConvTranspose2d(*args, **kwargs)
conv.weight.data.normal_()
conv.bias.data.zero_()
self.conv = equal_lr(conv)
def forward(self, input):
return self.conv(input)
class EqualLinear(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
linear = nn.Linear(in_dim, out_dim)
linear.weight.data.normal_()
linear.bias.data.zero_()
self.linear = equal_lr(linear)
def forward(self, input):
return self.linear(input)
class ConvBlock(nn.Module):
def __init__(
self,
in_channel,
out_channel,
kernel_size,
padding,
kernel_size2=None,
padding2=None,
pixel_norm=True,
):
super().__init__()
pad1 = padding
pad2 = padding
if padding2 is not None:
pad2 = padding2
kernel1 = kernel_size
kernel2 = kernel_size
if kernel_size2 is not None:
kernel2 = kernel_size2
convs = [EqualConv2d(in_channel, out_channel, kernel1, padding=pad1)]
if pixel_norm:
convs.append(PixelNorm())
convs.append(nn.LeakyReLU(0.1))
convs.append(EqualConv2d(out_channel, out_channel, kernel2, padding=pad2))
if pixel_norm:
convs.append(PixelNorm())
convs.append(nn.LeakyReLU(0.1))
self.conv = nn.Sequential(*convs)
def forward(self, input):
out = self.conv(input)
return out
def upscale(feat):
return F.interpolate(feat, scale_factor=2, mode="bilinear", align_corners=False)
class Generator(nn.Module):
def __init__(self, input_code_dim=128, in_channel=128, pixel_norm=True, tanh=True):
super().__init__()
self.input_dim = input_code_dim
self.tanh = tanh
self.input_layer = nn.Sequential(
EqualConvTranspose2d(input_code_dim, in_channel, 4, 1, 0),
PixelNorm(),
nn.LeakyReLU(0.1),
)
self.progression_4 = ConvBlock(
in_channel, in_channel, 3, 1, pixel_norm=pixel_norm
)
self.progression_8 = ConvBlock(
in_channel, in_channel, 3, 1, pixel_norm=pixel_norm
)
self.progression_16 = ConvBlock(
in_channel, in_channel, 3, 1, pixel_norm=pixel_norm
)
self.progression_32 = ConvBlock(
in_channel, in_channel, 3, 1, pixel_norm=pixel_norm
)
self.progression_64 = ConvBlock(
in_channel, in_channel // 2, 3, 1, pixel_norm=pixel_norm
)
self.progression_128 = ConvBlock(
in_channel // 2, in_channel // 4, 3, 1, pixel_norm=pixel_norm
)
self.progression_256 = ConvBlock(
in_channel // 4, in_channel // 4, 3, 1, pixel_norm=pixel_norm
)
self.to_rgb_8 = EqualConv2d(in_channel, 3, 1)
self.to_rgb_16 = EqualConv2d(in_channel, 3, 1)
self.to_rgb_32 = EqualConv2d(in_channel, 3, 1)
self.to_rgb_64 = EqualConv2d(in_channel // 2, 3, 1)
self.to_rgb_128 = EqualConv2d(in_channel // 4, 3, 1)
self.to_rgb_256 = EqualConv2d(in_channel // 4, 3, 1)
self.max_step = 6
def progress(self, feat, module):
out = F.interpolate(feat, scale_factor=2, mode="bilinear", align_corners=False)
out = module(out)
return out
def output(self, feat1, feat2, module1, module2, alpha):
if 0 <= alpha < 1:
skip_rgb = upscale(module1(feat1))
out = (1 - alpha) * skip_rgb + alpha * module2(feat2)
else:
out = module2(feat2)
if self.tanh:
return torch.tanh(out)
return out
def forward(self, input, step=0, alpha=-1):
if step > self.max_step:
step = self.max_step
out_4 = self.input_layer(input.view(-1, self.input_dim, 1, 1))
out_4 = self.progression_4(out_4)
out_8 = self.progress(out_4, self.progression_8)
if step == 1:
if self.tanh:
return torch.tanh(self.to_rgb_8(out_8))
return self.to_rgb_8(out_8)
out_16 = self.progress(out_8, self.progression_16)
if step == 2:
return self.output(out_8, out_16, self.to_rgb_8, self.to_rgb_16, alpha)
out_32 = self.progress(out_16, self.progression_32)
if step == 3:
return self.output(out_16, out_32, self.to_rgb_16, self.to_rgb_32, alpha)
out_64 = self.progress(out_32, self.progression_64)
if step == 4:
return self.output(out_32, out_64, self.to_rgb_32, self.to_rgb_64, alpha)
out_128 = self.progress(out_64, self.progression_128)
if step == 5:
return self.output(out_64, out_128, self.to_rgb_64, self.to_rgb_128, alpha)
out_256 = self.progress(out_128, self.progression_256)
if step == 6:
return self.output(
out_128, out_256, self.to_rgb_128, self.to_rgb_256, alpha
)
class Discriminator(nn.Module):
def __init__(self, feat_dim=128):
super().__init__()
self.progression = nn.ModuleList(
[
ConvBlock(feat_dim // 4, feat_dim // 4, 3, 1),
ConvBlock(feat_dim // 4, feat_dim // 2, 3, 1),
ConvBlock(feat_dim // 2, feat_dim, 3, 1),
ConvBlock(feat_dim, feat_dim, 3, 1),
ConvBlock(feat_dim, feat_dim, 3, 1),
ConvBlock(feat_dim, feat_dim, 3, 1),
ConvBlock(feat_dim + 1, feat_dim, 3, 1, 4, 0),
]
)
self.from_rgb = nn.ModuleList(
[
EqualConv2d(3, feat_dim // 4, 1),
EqualConv2d(3, feat_dim // 4, 1),
EqualConv2d(3, feat_dim // 2, 1),
EqualConv2d(3, feat_dim, 1),
EqualConv2d(3, feat_dim, 1),
EqualConv2d(3, feat_dim, 1),
EqualConv2d(3, feat_dim, 1),
]
)
self.n_layer = len(self.progression)
self.linear = EqualLinear(feat_dim, 1)
def forward(self, input, step=0, alpha=-1):
for i in range(step, -1, -1):
index = self.n_layer - i - 1
if i == step:
out = self.from_rgb[index](input)
if i == 0:
out_std = torch.sqrt(out.var(0, unbiased=False) + 1e-8)
mean_std = out_std.mean()
mean_std = mean_std.expand(out.size(0), 1, 4, 4)
out = torch.cat([out, mean_std], 1)
out = self.progression[index](out)
if i > 0:
# out = F.avg_pool2d(out, 2)
out = F.interpolate(
out, scale_factor=0.5, mode="bilinear", align_corners=False
)
if i == step and 0 <= alpha < 1:
# skip_rgb = F.avg_pool2d(input, 2)
skip_rgb = F.interpolate(
input, scale_factor=0.5, mode="bilinear", align_corners=False
)
skip_rgb = self.from_rgb[index + 1](skip_rgb)
out = (1 - alpha) * skip_rgb + alpha * out
out = out.squeeze(2).squeeze(2)
# print(input.size(), out.size(), step)
out = self.linear(out)
return out
================================================
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/ProgressiveGAN/train.py
================================================
import argparse
import numpy as np
import random
import torch
import torch.nn.functional as F
from PIL import Image
from progan_modules import Generator, Discriminator
from torch import nn, optim
from torch.autograd import Variable, grad
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, utils
from tqdm import tqdm
def accumulate(model1, model2, decay=0.999):
par1 = dict(model1.named_parameters())
par2 = dict(model2.named_parameters())
for k in par1.keys():
par1[k].data.mul_(decay).add_(1 - decay, par2[k].data)
def imagefolder_loader(path):
def loader(transform):
data = datasets.ImageFolder(path, transform=transform)
data_loader = DataLoader(
data, shuffle=True, batch_size=batch_size, num_workers=4
)
return data_loader
return loader
def sample_data(dataloader, image_size=4):
transform = transforms.Compose(
[
transforms.Resize(image_size + int(image_size * 0.2) + 1),
transforms.RandomCrop(image_size),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]
)
loader = dataloader(transform)
return loader
def train(generator, discriminator, init_step, loader, total_iter=600000):
step = init_step # can be 1 = 8, 2 = 16, 3 = 32, 4 = 64, 5 = 128, 6 = 128
data_loader = sample_data(loader, 4 * 2 ** step)
dataset = iter(data_loader)
# total_iter = 600000
total_iter_remain = total_iter - (total_iter // 6) * (step - 1)
pbar = tqdm(range(total_iter_remain))
disc_loss_val = 0
gen_loss_val = 0
grad_loss_val = 0
from datetime import datetime
import os
date_time = datetime.now()
post_fix = "%s_%s_%d_%d.txt" % (
trial_name,
date_time.date(),
date_time.hour,
date_time.minute,
)
log_folder = "trial_%s_%s_%d_%d" % (
trial_name,
date_time.date(),
date_time.hour,
date_time.minute,
)
os.mkdir(log_folder)
os.mkdir(log_folder + "/checkpoint")
os.mkdir(log_folder + "/sample")
config_file_name = os.path.join(log_folder, "train_config_" + post_fix)
config_file = open(config_file_name, "w")
config_file.write(str(args))
config_file.close()
log_file_name = os.path.join(log_folder, "train_log_" + post_fix)
log_file = open(log_file_name, "w")
log_file.write("g,d,nll,onehot\n")
log_file.close()
from shutil import copy
copy("train.py", log_folder + "/train_%s.py" % post_fix)
copy("progan_modules.py", log_folder + "/model_%s.py" % post_fix)
alpha = 0
one = torch.tensor(1, dtype=torch.float).to(device)
mone = one * -1
iteration = 0
for i in pbar:
discriminator.zero_grad()
alpha = min(1, (2 / (total_iter // 6)) * iteration)
if iteration > total_iter // 6:
alpha = 0
iteration = 0
step += 1
if step > 6:
alpha = 1
step = 6
data_loader = sample_data(loader, 4 * 2 ** step)
dataset = iter(data_loader)
try:
real_image, label = next(dataset)
except (OSError, StopIteration):
dataset = iter(data_loader)
real_image, label = next(dataset)
iteration += 1
### 1. train Discriminator
b_size = real_image.size(0)
real_image = real_image.to(device)
label = label.to(device)
real_predict = discriminator(real_image, step=step, alpha=alpha)
real_predict = real_predict.mean() - 0.001 * (real_predict ** 2).mean()
real_predict.backward(mone)
# sample input data: vector for Generator
gen_z = torch.randn(b_size, input_code_size).to(device)
fake_image = generator(gen_z, step=step, alpha=alpha)
fake_predict = discriminator(fake_image.detach(), step=step, alpha=alpha)
fake_predict = fake_predict.mean()
fake_predict.backward(one)
### gradient penalty for D
eps = torch.rand(b_size, 1, 1, 1).to(device)
x_hat = eps * real_image.data + (1 - eps) * fake_image.detach().data
x_hat.requires_grad = True
hat_predict = discriminator(x_hat, step=step, alpha=alpha)
grad_x_hat = grad(outputs=hat_predict.sum(), inputs=x_hat, create_graph=True)[0]
grad_penalty = (
(grad_x_hat.view(grad_x_hat.size(0), -1).norm(2, dim=1) - 1) ** 2
).mean()
grad_penalty = 10 * grad_penalty
grad_penalty.backward()
grad_loss_val += grad_penalty.item()
disc_loss_val += (real_predict - fake_predict).item()
d_optimizer.step()
### 2. train Generator
if (i + 1) % n_critic == 0:
generator.zero_grad()
discriminator.zero_grad()
predict = discriminator(fake_image, step=step, alpha=alpha)
loss = -predict.mean()
gen_loss_val += loss.item()
loss.backward()
g_optimizer.step()
accumulate(g_running, generator)
if (i + 1) % 1000 == 0 or i == 0:
with torch.no_grad():
images = g_running(
torch.randn(5 * 10, input_code_size).to(device),
step=step,
alpha=alpha,
).data.cpu()
utils.save_image(
images,
f"{log_folder}/sample/{str(i + 1).zfill(6)}.png",
nrow=10,
normalize=True,
range=(-1, 1),
)
if (i + 1) % 10000 == 0 or i == 0:
try:
torch.save(
g_running.state_dict(),
f"{log_folder}/checkpoint/{str(i + 1).zfill(6)}_g.model",
)
torch.save(
discriminator.state_dict(),
f"{log_folder}/checkpoint/{str(i + 1).zfill(6)}_d.model",
)
except:
pass
if (i + 1) % 500 == 0:
state_msg = (
f"{i + 1}; G: {gen_loss_val / (500 // n_critic):.3f}; D: {disc_loss_val / 500:.3f};"
f" Grad: {grad_loss_val / 500:.3f}; Alpha: {alpha:.3f}"
)
log_file = open(log_file_name, "a+")
new_line = "%.5f,%.5f\n" % (
gen_loss_val / (500 // n_critic),
disc_loss_val / 500,
)
log_file.write(new_line)
log_file.close()
disc_loss_val = 0
gen_loss_val = 0
grad_loss_val = 0
print(state_msg)
# pbar.set_description(state_msg)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Progressive GAN, during training, the model will learn to generate images from a low resolution, then progressively getting high resolution "
)
parser.add_argument(
"--path",
type=str,
help="path of specified dataset, should be a folder that has one or many sub image folders inside",
)
parser.add_argument(
"--trial_name",
type=str,
default="test1",
help="a brief description of the training trial",
)
parser.add_argument(
"--gpu_id",
type=int,
default=0,
help="0 is the first gpu, 1 is the second gpu, etc.",
)
parser.add_argument(
"--lr",
type=float,
default=0.001,
help="learning rate, default is 1e-3, usually dont need to change it, you can try make it bigger, such as 2e-3",
)
parser.add_argument(
"--z_dim",
type=int,
default=128,
help="the initial latent vector's dimension, can be smaller such as 64, if the dataset is not diverse",
)
parser.add_argument(
"--channel",
type=int,
default=128,
help="determines how big the model is, smaller value means faster training, but less capacity of the model",
)
parser.add_argument(
"--batch_size",
type=int,
default=4,
help="how many images to train together at one iteration",
)
parser.add_argument(
"--n_critic",
type=int,
default=1,
help="train Dhow many times while train G 1 time",
)
parser.add_argument(
"--init_step",
type=int,
default=1,
help="start from what resolution, 1 means 8x8 resolution, 2 means 16x16 resolution, ..., 6 means 256x256 resolution",
)
parser.add_argument(
"--total_iter",
type=int,
default=300000,
help="how many iterations to train in total, the value is in assumption that init step is 1",
)
parser.add_argument(
"--pixel_norm",
default=False,
action="store_true",
help="a normalization method inside the model, you can try use it or not depends on the dataset",
)
parser.add_argument(
"--tanh",
default=False,
action="store_true",
help="an output non-linearity on the output of Generator, you can try use it or not depends on the dataset",
)
args = parser.parse_args()
print(str(args))
trial_name = args.trial_name
device = torch.device("cuda:%d" % (args.gpu_id))
input_code_size = args.z_dim
batch_size = args.batch_size
n_critic = args.n_critic
generator = Generator(
in_channel=args.channel,
input_code_dim=input_code_size,
pixel_norm=args.pixel_norm,
tanh=args.tanh,
).to(device)
discriminator = Discriminator(feat_dim=args.channel).to(device)
g_running = Generator(
in_channel=args.channel,
input_code_dim=input_code_size,
pixel_norm=args.pixel_norm,
tanh=args.tanh,
).to(device)
## you can directly load a pretrained model here
generator.load_state_dict(
torch.load(
"/home/dex/Desktop/ml/ML-DL-scripts/DEEP LEARNING/Autoencoders GANS/pytorch/ProgressiveGAN/trial_experiment-1_2020-01-08_23_5/checkpoint/010000_g.model"
)
)
g_running.load_state_dict(
torch.load(
"/home/dex/Desktop/ml/ML-DL-scripts/DEEP LEARNING/Autoencoders GANS/pytorch/ProgressiveGAN/trial_experiment-1_2020-01-08_23_5/checkpoint/010000_g.model"
)
)
discriminator.load_state_dict(
torch.load(
"/home/dex/Desktop/ml/ML-DL-scripts/DEEP LEARNING/Autoencoders GANS/pytorch/ProgressiveGAN/trial_experiment-1_2020-01-08_23_5/checkpoint/010000_d.model"
)
)
g_running.train(False)
g_optimizer = optim.Adam(generator.parameters(), lr=args.lr, betas=(0.0, 0.99))
d_optimizer = optim.Adam(discriminator.parameters(), lr=args.lr, betas=(0.0, 0.99))
accumulate(g_running, generator, 0)
loader = imagefolder_loader(args.path)
train(generator, discriminator, args.init_step, loader, args.total_iter)
================================================
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/Datasets.py
================================================
import numpy as np
import torch
from torch.utils.data import TensorDataset
from torchvision import datasets, transforms
def MnistLabel(class_num):
raw_dataset = datasets.MNIST(
"../data",
train=True,
download=True,
transform=transforms.Compose([transforms.ToTensor()]),
)
class_tot = [0] * 10
data = []
labels = []
positive_tot = 0
tot = 0
perm = np.random.permutation(raw_dataset.__len__())
for i in range(raw_dataset.__len__()):
datum, label = raw_dataset.__getitem__(perm[i])
if class_tot[label] < class_num:
data.append(datum.numpy())
labels.append(label)
class_tot[label] += 1
tot += 1
if tot >= 10 * class_num:
break
return TensorDataset(
torch.FloatTensor(np.array(data)), torch.LongTensor(np.array(labels))
)
def MnistUnlabel():
raw_dataset = datasets.MNIST(
"../data",
train=True,
download=True,
transform=transforms.Compose([transforms.ToTensor()]),
)
return raw_dataset
def MnistTest():
return datasets.MNIST(
"../data",
train=False,
download=True,
transform=transforms.Compose([transforms.ToTensor()]),
)
if __name__ == "__main__":
print(dir(MnistTest()))
================================================
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/ImprovedGAN.py
================================================
# -*- coding:utf-8 -*-
from __future__ import print_function
import argparse
import numpy as np
import os
import pdb
import sys
import tensorboardX
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from Datasets import *
from Nets import Generator, Discriminator
from functional import log_sum_exp
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset
class ImprovedGAN(object):
def __init__(self, G, D, labeled, unlabeled, test, args):
if os.path.exists(args.savedir):
print("Loading model from " + args.savedir)
self.G = torch.load(os.path.join(args.savedir, "G.pkl"))
self.D = torch.load(os.path.join(args.savedir, "D.pkl"))
else:
os.makedirs(args.savedir)
self.G = G
self.D = D
torch.save(self.G, os.path.join(args.savedir, "G.pkl"))
torch.save(self.D, os.path.join(args.savedir, "D.pkl"))
self.writer = tensorboardX.SummaryWriter(log_dir=args.logdir)
if args.cuda:
self.G.cuda()
self.D.cuda()
self.labeled = labeled
self.unlabeled = unlabeled
self.test = test
self.Doptim = optim.Adam(
self.D.parameters(), lr=args.lr, betas=(args.momentum, 0.999)
)
self.Goptim = optim.Adam(
self.G.parameters(), lr=args.lr, betas=(args.momentum, 0.999)
)
self.args = args
def trainD(self, x_label, y, x_unlabel):
x_label, x_unlabel, y = (
Variable(x_label),
Variable(x_unlabel),
Variable(y, requires_grad=False),
)
if self.args.cuda:
x_label, x_unlabel, y = x_label.cuda(), x_unlabel.cuda(), y.cuda()
output_label, output_unlabel, output_fake = (
self.D(x_label, cuda=self.args.cuda),
self.D(x_unlabel, cuda=self.args.cuda),
self.D(
self.G(x_unlabel.size()[0], cuda=self.args.cuda)
.view(x_unlabel.size())
.detach(),
cuda=self.args.cuda,
),
)
logz_label, logz_unlabel, logz_fake = (
log_sum_exp(output_label),
log_sum_exp(output_unlabel),
log_sum_exp(output_fake),
) # log ∑e^x_i
prob_label = torch.gather(
output_label, 1, y.unsqueeze(1)
) # log e^x_label = x_label
loss_supervised = -torch.mean(prob_label) + torch.mean(logz_label)
loss_unsupervised = 0.5 * (
-torch.mean(logz_unlabel)
+ torch.mean(F.softplus(logz_unlabel))
+ torch.mean(F.softplus(logz_fake)) # real_data: log Z/(1+Z)
) # fake_data: log 1/(1+Z)
loss = loss_supervised + self.args.unlabel_weight * loss_unsupervised
acc = torch.mean((output_label.max(1)[1] == y).float())
self.Doptim.zero_grad()
loss.backward()
self.Doptim.step()
return (
loss_supervised.data.cpu().numpy(),
loss_unsupervised.data.cpu().numpy(),
acc,
)
def trainG(self, x_unlabel):
fake = self.G(x_unlabel.size()[0], cuda=self.args.cuda).view(x_unlabel.size())
mom_gen, output_fake = self.D(fake, feature=True, cuda=self.args.cuda)
mom_unlabel, _ = self.D(Variable(x_unlabel), feature=True, cuda=self.args.cuda)
mom_gen = torch.mean(mom_gen, dim=0)
mom_unlabel = torch.mean(mom_unlabel, dim=0)
loss_fm = torch.mean((mom_gen - mom_unlabel) ** 2)
loss = loss_fm
self.Goptim.zero_grad()
self.Doptim.zero_grad()
loss.backward()
self.Goptim.step()
return loss.data.cpu().numpy()
def train(self):
assert self.unlabeled.__len__() > self.labeled.__len__()
assert type(self.labeled) == TensorDataset
times = int(np.ceil(self.unlabeled.__len__() * 1.0 / self.labeled.__len__()))
t1 = self.labeled.tensors[0].clone()
t2 = self.labeled.tensors[1].clone()
tile_labeled = TensorDataset(t1.repeat(times, 1, 1, 1), t2.repeat(times))
gn = 0
for epoch in range(self.args.epochs):
self.G.train()
self.D.train()
unlabel_loader1 = DataLoader(
self.unlabeled,
batch_size=self.args.batch_size,
shuffle=True,
drop_last=True,
num_workers=4,
)
unlabel_loader2 = DataLoader(
self.unlabeled,
batch_size=self.args.batch_size,
shuffle=True,
drop_last=True,
num_workers=4,
).__iter__()
label_loader = DataLoader(
tile_labeled,
batch_size=self.args.batch_size,
shuffle=True,
drop_last=True,
num_workers=4,
).__iter__()
loss_supervised = loss_unsupervised = loss_gen = accuracy = 0.0
batch_num = 0
for (unlabel1, _label1) in unlabel_loader1:
batch_num += 1
unlabel2, _label2 = unlabel_loader2.next()
x, y = label_loader.next()
if args.cuda:
x, y, unlabel1, unlabel2 = (
x.cuda(),
y.cuda(),
unlabel1.cuda(),
unlabel2.cuda(),
)
ll, lu, acc = self.trainD(x, y, unlabel1)
loss_supervised += ll
loss_unsupervised += lu
accuracy += acc
lg = self.trainG(unlabel2)
if epoch > 1 and lg > 1:
lg = self.trainG(unlabel2)
loss_gen += lg
if (batch_num + 1) % self.args.log_interval == 0:
print("Training: %d / %d" % (batch_num + 1, len(unlabel_loader1)))
gn += 1
with torch.no_grad():
self.writer.add_scalars(
"loss",
{
"loss_supervised": ll,
"loss_unsupervised": lu,
"loss_gen": lg,
},
gn,
)
self.writer.add_histogram(
"real_feature",
self.D(Variable(x), cuda=self.args.cuda, feature=True)[0],
gn,
)
self.writer.add_histogram(
"fake_feature",
self.D(
self.G(self.args.batch_size, cuda=self.args.cuda),
cuda=self.args.cuda,
feature=True,
)[0],
gn,
)
self.writer.add_histogram("fc3_bias", self.G.fc3.bias, gn)
self.writer.add_histogram(
"D_feature_weight", self.D.layers[-1].weight, gn
)
self.D.train()
self.G.train()
loss_supervised /= batch_num
loss_unsupervised /= batch_num
loss_gen /= batch_num
accuracy /= batch_num
print(
"Iteration %d, loss_supervised = %.4f, loss_unsupervised = %.4f, loss_gen = %.4f train acc = %.4f"
% (epoch, loss_supervised, loss_unsupervised, loss_gen, accuracy)
)
sys.stdout.flush()
if (epoch + 1) % self.args.eval_interval == 0:
print("Eval: correct %d / %d" % (self.eval(), self.test.__len__()))
torch.save(self.G, os.path.join(args.savedir, "G.pkl"))
torch.save(self.D, os.path.join(args.savedir, "D.pkl"))
def predict(self, x):
with torch.no_grad():
ret = torch.max(self.D(Variable(x), cuda=self.args.cuda), 1)[1].data
return ret
def eval(self):
self.G.eval()
self.D.eval()
d, l = [], []
for (datum, label) in self.test:
d.append(datum)
l.append(label)
x, y = torch.stack(d), torch.LongTensor(l)
if self.args.cuda:
x, y = x.cuda(), y.cuda()
pred = self.predict(x)
return torch.sum(pred == y)
def draw(self, batch_size):
self.G.eval()
return self.G(batch_size, cuda=self.args.cuda)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="PyTorch Improved GAN")
parser.add_argument(
"--batch-size",
type=int,
default=100,
metavar="N",
help="input batch size for training (default: 64)",
)
parser.add_argument(
"--epochs",
type=int,
default=10,
metavar="N",
help="number of epochs to train (default: 10)",
)
parser.add_argument(
"--lr",
type=float,
default=0.003,
metavar="LR",
help="learning rate (default: 0.003)",
)
parser.add_argument(
"--momentum",
type=float,
default=0.5,
metavar="M",
help="SGD momentum (default: 0.5)",
)
parser.add_argument(
"--cuda", action="store_true", default=False, help="CUDA training"
)
parser.add_argument(
"--seed", type=int, default=1, metavar="S", help="random seed (default: 1)"
)
parser.add_argument(
"--log-interval",
type=int,
default=100,
metavar="N",
help="how many batches to wait before logging training status",
)
parser.add_argument(
"--eval-interval",
type=int,
default=1,
metavar="N",
help="how many epochs to wait before evaling training status",
)
parser.add_argument(
"--unlabel-weight",
type=float,
default=1,
metavar="N",
help="scale factor between labeled and unlabeled data",
)
parser.add_argument(
"--logdir",
type=str,
default="./logfile",
metavar="LOG_PATH",
help="logfile path, tensorboard format",
)
parser.add_argument(
"--savedir",
type=str,
default="./models",
metavar="SAVE_PATH",
help="saving path, pickle format",
)
args = parser.parse_args()
args.cuda = args.cuda and torch.cuda.is_available()
np.random.seed(args.seed)
gan = ImprovedGAN(
Generator(100),
Discriminator(),
MnistLabel(10),
MnistUnlabel(),
MnistTest(),
args,
)
gan.train()
================================================
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/Nets.py
================================================
import torch
from torch.nn.parameter import Parameter
from torch import nn
from torch.nn import functional as F
from torch.autograd import Variable
import pdb
from functional import reset_normal_param, LinearWeightNorm
class Discriminator(nn.Module):
def __init__(self, input_dim=28 ** 2, output_dim=10):
super(Discriminator, self).__init__()
self.input_dim = input_dim
self.layers = torch.nn.ModuleList(
[
LinearWeightNorm(input_dim, 1000),
LinearWeightNorm(1000, 500),
LinearWeightNorm(500, 250),
LinearWeightNorm(250, 250),
LinearWeightNorm(250, 250),
]
)
self.final = LinearWeightNorm(250, output_dim, weight_scale=1)
def forward(self, x, feature=False, cuda=False):
x = x.view(-1, self.input_dim)
noise = torch.randn(x.size()) * 0.3 if self.training else torch.Tensor([0])
if cuda:
noise = noise.cuda()
x = x + Variable(noise, requires_grad=False)
for i in range(len(self.layers)):
m = self.layers[i]
x_f = F.relu(m(x))
noise = (
torch.randn(x_f.size()) * 0.5 if self.training else torch.Tensor([0])
)
if cuda:
noise = noise.cuda()
x = x_f + Variable(noise, requires_grad=False)
if feature:
return x_f, self.final(x)
return self.final(x)
class Generator(nn.Module):
def __init__(self, z_dim, output_dim=28 ** 2):
super(Generator, self).__init__()
self.z_dim = z_dim
self.fc1 = nn.Linear(z_dim, 500, bias=False)
self.bn1 = nn.BatchNorm1d(500, affine=False, eps=1e-6, momentum=0.5)
self.fc2 = nn.Linear(500, 500, bias=False)
self.bn2 = nn.BatchNorm1d(500, affine=False, eps=1e-6, momentum=0.5)
self.fc3 = LinearWeightNorm(500, output_dim, weight_scale=1)
self.bn1_b = Parameter(torch.zeros(500))
self.bn2_b = Parameter(torch.zeros(500))
nn.init.xavier_uniform(self.fc1.weight)
nn.init.xavier_uniform(self.fc2.weight)
def forward(self, batch_size, cuda=False):
x = Variable(
torch.rand(batch_size, self.z_dim),
requires_grad=False,
volatile=not self.training,
)
if cuda:
x = x.cuda()
x = F.softplus(self.bn1(self.fc1(x)) + self.bn1_b)
x = F.softplus(self.bn2(self.fc2(x)) + self.bn2_b)
x = F.softplus(self.fc3(x))
return x
================================================
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/README.md
================================================
reference https://github.com/Sleepychord/ImprovedGAN-pytorch
# Improved GAN (Semi-supervised GAN)
This is an implementation of *Semi-supervised generative adversarial network* in the paper [Improved Techniques for Training GANs](https://arxiv.org/abs/1606.03498) for **Mnist** dataset.
This method and its extensions have marvellous performance on traditional CV datasets, and remain state-of-art (by the end of November, 2017).
## Working Principle
Inspired by [Good Semi-supervised Learning that Requires a Bad GAN](https://arxiv.org/abs/1705.09783), semi-supervised GAN with feature matching actually generates unrealistic fake samples around high-density region. With the inborn continuity, the **fake region** in feature space split the bounds of different classes.
Refer to [Semi-supervised Learning on Graphs with Generative Adversarial Nets](https://arxiv.org/abs/1809.00130) for more details about this **density gap splitting** explaination.
## Running
The code was implemented in Python 3.7.
`python ImprovedGAN.py`
Default configs include **CPU, saving and autoloading, generating logfile in tensorboard format, etc**. You can use `python ImprovedGAN.py --cuda` to run it on GPU.
The **latest** `torch`(1.2 version), `tensorboardX`, `torchvision` are needed.
## Result
Default configs can train models achieving **98.5% accuracy** on test dataset with 100 labeled data(10 per class) and other 59,000 unlabeled data after 100 epochs.
### Loss curve during training

`loss_label => red, loss_unlabel => blue, loss_gen => green`
It must be noted that [OpenAI implementation](https://github.com/openai/improved-gan)(theano) demonstrates a different curve, where loss\_gen is nearly zero and loss\_unlabel increase gradually.
## Remark
* The implementation is based on [OpenAI implementation](https://github.com/openai/improved-gan).
* But I found it hard to reproduce expected results and suffered from exploding gradients. I changed the final layer in generator from **Sigmoid** to **Softplus**, and therefore fixed it.
* `./models` includes the trained model, you can simply delete it for retraining.
* The archectures of networks are elaborately designed, among them `Weight Normalization` is very important.
* Thank Jiapeng Hong for discussing with me.
## Change Logs
* (Nov 27, 2019) Update to pytorch 1.2 and Python 3.7. The version for pytorch 0.3 and Python 2.7 can be found in the history versions. Delete pretrained models.
================================================
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/functional.py
================================================
import math
import pdb
import torch
import torch.nn.functional as F
from torch.nn.parameter import Parameter
def log_sum_exp(x, axis=1):
m = torch.max(x, dim=1)[0]
return m + torch.log(torch.sum(torch.exp(x - m.unsqueeze(1)), dim=axis))
def reset_normal_param(L, stdv, weight_scale=1.0):
assert type(L) == torch.nn.Linear
torch.nn.init.normal(L.weight, std=weight_scale / math.sqrt(L.weight.size()[0]))
class LinearWeightNorm(torch.nn.Module):
def __init__(
self,
in_features,
out_features,
bias=True,
weight_scale=None,
weight_init_stdv=0.1,
):
super(LinearWeightNorm, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(
torch.randn(out_features, in_features) * weight_init_stdv
)
if bias:
self.bias = Parameter(torch.zeros(out_features))
else:
self.register_parameter("bias", None)
if weight_scale is not None:
assert type(weight_scale) == int
self.weight_scale = Parameter(torch.ones(out_features, 1) * weight_scale)
else:
self.weight_scale = 1
def forward(self, x):
W = (
self.weight
* self.weight_scale
/ torch.sqrt(torch.sum(self.weight ** 2, dim=1, keepdim=True))
)
return F.linear(x, W, self.bias)
def __repr__(self):
return (
self.__class__.__name__
+ "("
+ "in_features="
+ str(self.in_features)
+ ", out_features="
+ str(self.out_features)
+ ", weight_scale="
+ str(self.weight_scale)
+ ")"
)
================================================
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/VAE/VAR mnist.py
================================================
#!/usr/bin/env python
# coding: utf-8
# In[3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# In[ ]:
transforms = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST(
'./data',
train=True,
download=True,
transform=transforms)
test_dataset = datasets.MNIST(
'./data',
train=False,
download=True,
transform=transforms
)
# In[5]:
BATCH_SIZE = 64 # number of data points in each batch
N_EPOCHS = 10 # times to run the model on complete data
INPUT_DIM = 28 * 28 # size of each input
HIDDEN_DIM = 256 # hidden dimension
LATENT_DIM = 20 # latent vector dimension
lr = 1e-3 # learning rate
# In[6]:
train_iterator = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_iterator = DataLoader(test_dataset, batch_size=BATCH_SIZE)
# In[9]:
class Encoder(nn.Module):
''' This the encoder of VAE
'''
def __init__(self, input_dim, hidden_dim, z_dim):
'''
Args:
input_dim: A integer indicating the size of input (in case of MNIST 28 * 28).
hidden_dim: A integer indicating the size of hidden dimension.
z_dim: A integer indicating the latent dimension.
'''
super().__init__()
self.linear = nn.Linear(input_dim, hidden_dim)
self.mu = nn.Linear(hidden_dim, z_dim)
self.var = nn.Linear(hidden_dim, z_dim)
def forward(self, x):
# x is of shape [batch_size, input_dim]
hidden = F.relu(self.linear(x))
# hidden is of shape [batch_size, hidden_dim]
z_mu = self.mu(hidden)
# z_mu is of shape [batch_size, latent_dim]
z_var = self.var(hidden)
# z_var is of shape [batch_size, latent_dim]
return z_mu, z_var
class Decoder(nn.Module):
''' This the decoder part of VAE
'''
def __init__(self, z_dim, hidden_dim, output_dim):
'''
Args:
z_dim: A integer indicating the latent size.
hidden_dim: A integer indicating the size of hidden dimension.
output_dim: A integer indicating the output dimension (in case of MNIST it is 28 * 28)
'''
super().__init__()
self.linear = nn.Linear(z_dim, hidden_dim)
self.out = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# x is of shape [batch_size, latent_dim]
hidden = F.relu(self.linear(x))
# hidden is of shape [batch_size, hidden_dim]
predicted = torch.sigmoid(self.out(hidden))
# predicted is of shape [batch_size, output_dim]
return predicted
# In[10]:
class VAE(nn.Module):
''' This the VAE, which takes a encoder and decoder.
'''
def __init__(self, enc, dec):
super().__init__()
self.enc = enc
self.dec = dec
def forward(self, x):
# encode
z_mu, z_var = self.enc(x)
# sample from the distribution having latent parameters z_mu, z_var
# reparameterize
std = torch.exp(z_var / 2)
eps = torch.randn_like(std)
x_sample = eps.mul(std).add_(z_mu)
# decode
predicted = self.dec(x_sample)
return predicted, z_mu, z_var
# In[11]:
# encoder
encoder = Encoder(INPUT_DIM, HIDDEN_DIM, LATENT_DIM)
# decoder
decoder = Decoder(LATENT_DIM, HIDDEN_DIM, INPUT_DIM)
# vae
model = VAE(encoder, decoder).to(device)
# optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)
# In[12]:
def train():
# set the train mode
model.train()
# loss of the epoch
train_loss = 0
for i, (x, _) in enumerate(train_iterator):
# reshape the data into [batch_size, 784]
x = x.view(-1, 28 * 28)
x = x.to(device)
# update the gradients to zero
optimizer.zero_grad()
# forward pass
x_sample, z_mu, z_var = model(x)
# reconstruction loss
recon_loss = F.binary_cross_entropy(x_sample, x, size_average=False)
# kl divergence loss
kl_loss = 0.5 * torch.sum(torch.exp(z_var) + z_mu**2 - 1.0 - z_var)
# total loss
loss = recon_loss + kl_loss
# backward pass
loss.backward()
train_loss += loss.item()
# update the weights
optimizer.step()
return train_loss
def test():
# set the evaluation mode
model.eval()
# test loss for the data
test_loss = 0
# we don't need to track the gradients, since we are not updating the parameters during evaluation / testing
with torch.no_grad():
for i, (x, _) in enumerate(test_iterator):
# reshape the data
x = x.view(-1, 28 * 28)
x = x.to(device)
# forward pass
x_sample, z_mu, z_var = model(x)
# reconstruction loss
recon_loss = F.binary_cross_entropy(x_sample, x, size_average=False)
# kl divergence loss
kl_loss = 0.5 * torch.sum(torch.exp(z_var) + z_mu**2 - 1.0 - z_var)
# total loss
loss = recon_loss + kl_loss
test_loss += loss.item()
return test_loss
# In[14]:
best_test_loss = float('inf')
for e in range(N_EPOCHS):
train_loss = train()
test_loss = test()
train_loss /= len(train_dataset)
test_loss /= len(test_dataset)
print(f'Epoch {e}, Train Loss: {train_loss:.2f}, Test Loss: {test_loss:.2f}')
if best_test_loss > test_loss:
best_test_loss = test_loss
patience_counter = 1
else:
patience_counter += 1
if patience_counter > 3:
break
# In[32]:
# sample and generate a image
z = torch.randn(1, LATENT_DIM).to(device)
# run only the decoder
reconstructed_img = model.dec(z).cpu()
img = reconstructed_img.view(28, 28).data
print(z.shape)
print(img.shape)
plt.imshow(img, cmap='gray')
# In[36]:
# sample and generate a image
z = torch.randn(1, LATENT_DIM).to(device)
# run only the decoder
reconstructed_img = model.dec(z).cpu()
img = reconstructed_img.view(28, 28).data
print(z.shape)
print(img.shape)
plt.imshow(img, cmap='gray')
================================================
FILE: DEEP LEARNING/Google Landmark Retrieval Challenge.py
================================================
### Google Landmark Retrieval Challenge
# export PATH=~/anaconda3/bin:$PATH
# pip install --ignore-installed --upgrade "https://github.com/sigilioso/tensorflow-build/raw/master/tensorflow-1.4.0-cp36-cp36m-linux_x86_64.whl"
from tqdm import tqdm
import tensorflow as tf
from keras.applications.resnet50 import ResNet50
from keras.layers import Flatten, Input
from keras.models import Model
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
import numpy as np
from google.cloud import storage
from io import BytesIO
import time
start = time.time()
model = ResNet50(weights="imagenet", pooling=max, include_top=False)
client = storage.Client()
bucket = client.get_bucket("landsbyconst")
bucket_list = list(bucket.list_blobs())
f = BytesIO(file.download_as_string())
X_test = []
####### GENERATING FEATURES
# file creation
train_filenames = open("train_filenames.txt", "w+")
test_filenames = open("test_filenames.txt", "w+")
train_featues = open("train_featues.txt", "w+")
test_features = open("test_features.txt", "w+")
i = 0
start = time.time()
# for file in tqdm(bucket_list[0:5000]):
for file in bucket_list[0:501]:
try:
f = BytesIO(file.download_as_string())
img = image.load_img(f, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
features = model.predict(x)
features_reduce = features.squeeze()
X_test.append(features_reduce)
file_name = file.path.split("/o/")[1].split("%2F")
if file_name[0] == "train":
# print(file.path.split('/o/')[1].split('%2F')[1])
train_filenames.write(file_name[1] + "\n")
train_featues.write(" ".join(str(x) for x in features.squeeze()) + "\n")
else:
test_filenames.write(file_name[1] + "\n")
test_features.write(" ".join(str(x) for x in features.squeeze()) + "\n")
i = i + 1
# if i % 100:
# print(i)
except:
pass
print(i)
end = time.time()
print("\n\ntime spend: ", (end - start) / 60, " minutes \n\n")
train_filenames.close()
test_filenames.close()
train_featues.close()
test_features.close()
# my_file = open('test_filenames.txt', 'r')
# print(my_file.read())
# my_file.close()
# sum(1 for line in open('test_filenames.txt'))
# file_len('test_filenames.txt')
# xb for the database, that contains all the vectors that must be indexed, and that we are going to search in. Its size is nb-by-d
# xq for the query vectors, for which we need to find the nearest neighbors. Its size is nq-by-d. If we have a single query vector, nq=1.
# need to contactinate tests as well
xb = np.load("train_filenames.txt")
xq = np.load("train_featues.txt")
print(xb.shape)
print(xq.shape)
print(xq.shape)
import faiss
res = faiss.StandardGpuResources() # use a single GPU
# build a flat (CPU) index
index_flat = faiss.IndexFlatL2(d)
# make it into a gpu index
gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
gpu_index_flat.add(xb) # add vectors to the index
print(gpu_index_flat.ntotal)
k = 100 # we want to see 4 nearest neighbors
D, I = gpu_index_flat.search(xq, k) # actual search
# print(I[:5]) # neighbors of the 5 first queries
# print(I[-5:]) # neighbors of the 5 last queries
np.save("output/I.npy", I)
np.save("output/D.npy", D)
### make submission
index_path = "input/index/"
index_list = sorted(glob.glob(index_path + "*")) # 1091756
index_list = pd.DataFrame(index_list, columns=["id"])
index_list["id"] = index_list["id"].apply(lambda x: os.path.basename(x)[:-4])
index_list = np.array(index_list["id"])
query_path = "input/query/"
query_list = sorted(glob.glob(query_path + "*")) # 114943
sub = pd.DataFrame(query_list, columns=["id"])
sub["id"] = sub["id"].apply(lambda x: os.path.basename(x)[:-4])
images_list = index_list[I]
images_list = images_list + " "
images_list = np.sum(images_list, axis=1)
sub["images"] = images_list
sub2 = pd.read_csv("input/sample_submission.csv")
sub2["images"] = ""
sub = pd.concat([sub, sub2])
sub = sub.drop_duplicates(["id"])
# sub.to_csv("output/sub_{}_{}.csv".format(model_name, feature_layer), index=None)
sub.to_csv("output/resnet50_output.csv", index=None)
================================================
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/README.MD
================================================
## Solution to Avito Challenge 2018
Link: https://www.kaggle.com/c/avito-demand-prediction
When selling used goods online, a combination of tiny, nuanced details in a product description can make a big difference in drumming up interest. And, even with an optimized product listing, demand for a product may simply not exist–frustrating sellers who may have over-invested in marketing.
Avito, Russia’s largest classified advertisements website, is deeply familiar with this problem. Sellers on their platform sometimes feel frustrated with both too little demand (indicating something is wrong with the product or the product listing) or too much demand (indicating a hot item with a good description was underpriced).
In their fourth Kaggle competition, Avito is challenging you to predict demand for an online advertisement based on its full description (title, description, images, etc.), its context (geographically where it was posted, similar ads already posted) and historical demand for similar ads in similar contexts. With this information, Avito can inform sellers on how to best optimize their listing and provide some indication of how much interest they should realistically expect to receive.
I with my team ranked 131st (TOP 7%) in the Avito Demand Prediction Challenge on Kaggle platform. And achieved bronze medals/
Teammates:
* [Artgor](https://github.com/Erlemar/Avito_demand_prediction_2018)
* [Nikita](https://github.com/ML-Person/My-solution-to-Avito-Challenge-2018)
* @Kmike
I want to thank opendatascience (http://ODS.ai/) community.
================================================
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/avito_deepIQA/deepIQA/LICENSE
================================================
MIT License
Copyright (c) 2016 Dominique Maniry
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/avito_deepIQA/deepIQA/README.md
================================================
# deepIQA
This is the reference implementation of [Deep Neural Networks for No-Reference and Full-Reference Image Quality Assessment][arxiv].
The pretrained models contained in the models directory were trained for both NR and FR IQA and for both model variants described in the paper.
They were trained on the full LIVE or TID2013 database respectively, as used in the cross-dataset evaluations. This evaluation script uses non-overlapping 32x32 patches to produce deterministic scores, whereas the evaluation in the paper uses randomly sampled overlapping patches.
> usage: evaluate.py [-h] [--model MODEL] [--top {patchwise,weighted}]
> [--gpu GPU]
> INPUT [REF]
## Dependencies
* [chainer](http://chainer.org/)
* scikit-learn
* opencv
## TODO
* add training code
* add cpu support (minor change)
* remove opencv and scikit-learn dependencies for loading data (minor changes)
[arxiv]: http://arxiv.org/abs/1612.01697
================================================
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/avito_deepIQA/deepIQA/evaluate.py
================================================
#!/usr/bin/python2
import argparse
import os
import cv2
import numpy as np
import pandas as pd
import six
from chainer import cuda
from chainer import serializers
from sklearn.feature_extraction.image import extract_patches
from tqdm import tqdm
from deepIQA.fr_model import FRModel
from deepIQA.nr_model import Model
top = "models/nr_live_weighted.model"
model = Model(top=top)
cuda.cudnn_enabled = True
cuda.check_cuda_available()
xp = cuda.cupy
serializers.load_hdf5(top, model)
model.to_gpu()
images_path = "../../test_jpg/"
# images_path_test = '../input/test_jpg/'
names = []
extracted_features = []
file_path = "../input/deepIQA_features_test.csv"
os.mknod(file_path)
train_ids = next(os.walk(images_path))[2]
f = True
for name in tqdm(train_ids):
try:
img = cv2.imread(images_path + name)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
patches = extract_patches(img, (32, 32, 3), 32)
X = np.transpose(patches.reshape((-1, 32, 32, 3)), (0, 3, 1, 2))
y = []
weights = []
batchsize = min(2000, X.shape[0])
t = xp.zeros((1, 1), np.float32)
for i in six.moves.range(0, X.shape[0], batchsize):
X_batch = X[i : i + batchsize]
X_batch = xp.array(X_batch.astype(np.float32))
model.forward(X_batch, t, False, X_batch.shape[0])
y.append(xp.asnumpy(model.y[0].data).reshape((-1,)))
weights.append(xp.asnumpy(model.a[0].data).reshape((-1,)))
y = np.concatenate(y)
weights = np.concatenate(weights)
names.append(name[:-4])
v = np.sum(y * weights) / np.sum(weights)
extracted_features.append(v)
if len(names) >= 10000:
df = pd.DataFrame(extracted_features)
se = pd.Series(names)
df["ids"] = se.values # df.set_index('id', inplace=True)
if f:
df.to_csv(
file_path,
mode="a",
index_label=False,
index=False,
chunksize=len(names),
)
f = False
else:
df.to_csv(
file_path,
mode="a",
index_label=False,
index=False,
chunksize=len(names),
header=False,
)
names = []
extracted_features = []
except:
print(name)
if len(names) > 0:
df = pd.DataFrame(extracted_features)
se = pd.Series(names)
df["ids"] = se.values # df.set_index('id', inplace=True)
if f:
df.to_csv(
file_path, mode="a", index_label=False, index=False, chunksize=len(names)
)
f = False
else:
df.to_csv(
file_path,
mode="a",
index_label=False,
index=False,
chunksize=len(names),
header=False,
)
"""
--model
models/nr_tid_patchwise.model
--top
patchwise
/home/alex/work/py/avito/input/train_jpg/0a0a5a3f22320e0508139273d23f390ca837aef252036034ed640fb939529bd9.jpg
"""
================================================
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/avito_deepIQA/deepIQA/evaluate_back.py
================================================
#!/usr/bin/python2
import argparse
import cv2
import numpy as np
import six
from chainer import cuda
from chainer import serializers
from sklearn.feature_extraction.image import extract_patches
from deepIQA.fr_model import FRModel
from deepIQA.nr_model import Model
parser = argparse.ArgumentParser(description="evaluate.py")
parser.add_argument("INPUT", help="path to input image")
parser.add_argument(
"REF",
default="",
nargs="?",
help="path to reference image, if omitted NR IQA is assumed",
)
parser.add_argument("--model", "-m", default="", help="path to the trained model")
parser.add_argument(
"--top",
choices=("patchwise", "weighted"),
default="weighted",
help="top layer and loss definition",
)
parser.add_argument("--gpu", "-g", default=0, type=int, help="GPU ID")
args = parser.parse_args()
FR = True
if args.REF == "":
FR = False
if FR:
model = FRModel(top=args.top)
else:
model = Model(top=args.top)
cuda.cudnn_enabled = True
cuda.check_cuda_available()
xp = cuda.cupy
serializers.load_hdf5(args.model, model)
model.to_gpu()
if FR:
ref_img = cv2.imread(args.REF)
ref_img = cv2.cvtColor(ref_img, cv2.COLOR_BGR2RGB)
patches = extract_patches(ref_img, (32, 32, 3), 32)
X_ref = np.transpose(patches.reshape((-1, 32, 32, 3)), (0, 3, 1, 2))
img = cv2.imread(args.INPUT)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
patches = extract_patches(img, (32, 32, 3), 32)
X = np.transpose(patches.reshape((-1, 32, 32, 3)), (0, 3, 1, 2))
y = []
weights = []
batchsize = min(2000, X.shape[0])
t = xp.zeros((1, 1), np.float32)
for i in six.moves.range(0, X.shape[0], batchsize):
X_batch = X[i : i + batchsize]
X_batch = xp.array(X_batch.astyp
gitextract_2g5lvqpj/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── Bug_report.md
│ │ ├── Feature_request.md
│ │ └── custom.md
│ └── workflows/
│ └── label.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── DEEP LEARNING/
│ ├── Autoencoders GANS/
│ │ ├── GAN-for-tabular-data/
│ │ │ ├── CODE_OF_CONDUCT.md
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── ctgan/
│ │ │ │ ├── README.MD
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __main__.py
│ │ │ │ ├── conditional.py
│ │ │ │ ├── data.py
│ │ │ │ ├── demo.py
│ │ │ │ ├── models.py
│ │ │ │ ├── sampler.py
│ │ │ │ ├── synthesizer.py
│ │ │ │ └── transformer.py
│ │ │ ├── encoders.py
│ │ │ ├── model.py
│ │ │ ├── results/
│ │ │ │ └── fit_predict_scores.txt
│ │ │ ├── run_experiment.py
│ │ │ └── utils.py
│ │ └── pytorch/
│ │ ├── CGAN/
│ │ │ └── ConditionalGAN.py
│ │ ├── DCGAN/
│ │ │ └── dcgan.py
│ │ ├── ProgressiveGAN/
│ │ │ ├── README.md
│ │ │ ├── progan_modules.py
│ │ │ └── train.py
│ │ ├── Semi-supervised GAN/
│ │ │ ├── Datasets.py
│ │ │ ├── ImprovedGAN.py
│ │ │ ├── Nets.py
│ │ │ ├── README.md
│ │ │ └── functional.py
│ │ └── VAE/
│ │ └── VAR mnist.py
│ ├── Google Landmark Retrieval Challenge.py
│ ├── Kaggle Avito Demand Prediction Challenge/
│ │ ├── README.MD
│ │ ├── image feat. extraction/
│ │ │ ├── avito_deepIQA/
│ │ │ │ └── deepIQA/
│ │ │ │ ├── LICENSE
│ │ │ │ ├── README.md
│ │ │ │ ├── evaluate.py
│ │ │ │ ├── evaluate_back.py
│ │ │ │ ├── fr_model.py
│ │ │ │ └── nr_model.py
│ │ │ ├── neural-image-assessment/
│ │ │ │ ├── README.md
│ │ │ │ ├── evaluate_inception_resnet.py
│ │ │ │ ├── evaluate_mobilenet.py
│ │ │ │ ├── evaluate_nasnet.py
│ │ │ │ └── utils/
│ │ │ │ ├── check_dataset.py
│ │ │ │ ├── data_loader.py
│ │ │ │ ├── nasnet.py
│ │ │ │ └── score_utils.py
│ │ │ └── nn_image_features.py
│ │ ├── stem to SVD.py
│ │ └── text embeddings.py
│ ├── NLP/
│ │ ├── Kaggle Quora Insincere Questions Classification/
│ │ │ ├── 3rd-place.py
│ │ │ ├── README.MD
│ │ │ └── fix misspellings.py
│ │ ├── LSTM RNN/
│ │ │ ├── Next Chars pytorch/
│ │ │ │ ├── Char level RNN/
│ │ │ │ │ └── data/
│ │ │ │ │ └── anna.txt
│ │ │ │ └── project-tv-script-generation/
│ │ │ │ ├── data/
│ │ │ │ │ └── Seinfeld_Scripts.txt
│ │ │ │ ├── helper.py
│ │ │ │ └── problem_unittests.py
│ │ │ └── Sentiment pytorch/
│ │ │ ├── labels.txt
│ │ │ └── reviews.txt
│ │ ├── WSDM - Fake News Classification/
│ │ │ └── Berd generate embeddings/
│ │ │ ├── 0_bert_encode_en_train.py
│ │ │ ├── 1_bert_encode_en_test.py
│ │ │ ├── 2_bert_encode_ch_train.py
│ │ │ ├── 3_bert_encode_ch_test.py
│ │ │ └── 4_gen_encoded_dfs.py
│ │ ├── elmo EMBEDDINGS/
│ │ │ └── Sentence encode.html
│ │ └── text analyses/
│ │ └── Logistic regression with words and char n-grams.py
│ ├── Object detection/
│ │ ├── YOLO Object Localization Keras/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── font/
│ │ │ │ ├── FiraMono-Medium.otf
│ │ │ │ └── SIL Open Font License.txt
│ │ │ ├── model_data/
│ │ │ │ ├── coco_classes.txt
│ │ │ │ ├── object_classes.txt
│ │ │ │ └── yolo_anchors.txt
│ │ │ ├── requirements.txt
│ │ │ ├── yad2k/
│ │ │ │ └── utils/
│ │ │ │ ├── __init__.py
│ │ │ │ └── utils.py
│ │ │ ├── yolo_run.py
│ │ │ └── yolo_utils.py
│ │ └── keras retinanet/
│ │ └── train.py
│ ├── Pytorch from scratch/
│ │ ├── CNN/
│ │ │ └── project-dog-classification/
│ │ │ ├── README.md
│ │ │ └── haarcascades/
│ │ │ └── haarcascade_frontalface_alt.xml
│ │ ├── MLP/
│ │ │ ├── fc_model.py
│ │ │ └── helper.py
│ │ ├── TODO/
│ │ │ └── GAN/
│ │ │ ├── cycle-gan/
│ │ │ │ ├── helpers.py
│ │ │ │ └── samples_cyclegan/
│ │ │ │ └── samples_dir.txt
│ │ │ └── project-face-generation/
│ │ │ └── problem_unittests.py
│ │ └── word2vec-embeddings/
│ │ ├── data/
│ │ │ └── download_data.txt
│ │ └── utils.py
│ └── segmentation/
│ ├── Kaggle TGS Salt Identification Challenge/
│ │ ├── README.MD
│ │ ├── v1/
│ │ │ ├── data_loader.py
│ │ │ ├── data_process/
│ │ │ │ ├── 10fold/
│ │ │ │ │ └── test.txt
│ │ │ │ └── transform.py
│ │ │ ├── evaluate.py
│ │ │ ├── loss/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bce_losses.py
│ │ │ │ ├── cyclic_lr.py
│ │ │ │ └── lovasz_losses.py
│ │ │ ├── main.py
│ │ │ ├── model/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ibnnet.py
│ │ │ │ ├── model.py
│ │ │ │ └── senet.py
│ │ │ └── utils.py
│ │ ├── v2/
│ │ │ ├── common_blocks/
│ │ │ │ ├── augmentation.py
│ │ │ │ ├── callbacks.py
│ │ │ │ ├── loaders.py
│ │ │ │ ├── metrics.py
│ │ │ │ ├── models.py
│ │ │ │ ├── pipelines.py
│ │ │ │ ├── pnasnet.py
│ │ │ │ ├── postprocessing.py
│ │ │ │ ├── preprocessing.py
│ │ │ │ ├── resnext.py
│ │ │ │ ├── unet_models.py
│ │ │ │ └── utils.py
│ │ │ ├── configs/
│ │ │ │ └── neptune.yaml
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bn.py
│ │ │ │ ├── build.py
│ │ │ │ ├── build.sh
│ │ │ │ ├── functions.py
│ │ │ │ ├── misc.py
│ │ │ │ ├── residual.py
│ │ │ │ ├── src/
│ │ │ │ │ ├── common.h
│ │ │ │ │ ├── inplace_abn.cpp
│ │ │ │ │ ├── inplace_abn.h
│ │ │ │ │ ├── inplace_abn_cpu.cpp
│ │ │ │ │ └── inplace_abn_cuda.cu
│ │ │ │ └── wider_resnet.py
│ │ │ └── results.ods
│ │ └── vanilla unet/
│ │ └── utils/
│ │ ├── cyclelr_callback.py
│ │ ├── lovasz_losses_tf.py
│ │ └── zf_unet_224_model.py
│ ├── Segmentation pipeline/
│ │ ├── README.MD
│ │ ├── get dataset.py
│ │ ├── segmentation pipeline.html
│ │ ├── segmentation pipeline.py
│ │ └── weights/
│ │ └── .gitkeep
│ ├── Severstal-Steel-Defect-Detection-master/
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── classification_pytorch_dummy.py
│ │ ├── common_blocks/
│ │ │ ├── __init__.py
│ │ │ ├── bam.py
│ │ │ ├── cbam.py
│ │ │ ├── dataloader.py
│ │ │ ├── generate_folds.py
│ │ │ ├── logger.py
│ │ │ ├── losses.py
│ │ │ ├── lovasz_losses.py
│ │ │ ├── metric.py
│ │ │ ├── new_metrics.py
│ │ │ ├── optimizers.py
│ │ │ ├── training_helper.py
│ │ │ └── utils.py
│ │ ├── configs/
│ │ │ ├── __init__.py
│ │ │ └── train_params.py
│ │ ├── inference.py
│ │ ├── model_resnet.py
│ │ └── train.py
│ └── Understanding-Clouds-from-Satellite-Images-master/
│ ├── .gitattributes
│ ├── .gitignore
│ ├── README.md
│ ├── augs.py
│ ├── callbacks.py
│ ├── config.py
│ ├── dataset.py
│ ├── inference_blend.py
│ ├── losses/
│ │ ├── losses.py
│ │ └── lovasz_losses.py
│ ├── optimizers.py
│ ├── predict.py
│ ├── schedulers.py
│ ├── train.py
│ ├── train.sh
│ └── utils.py
├── LICENSE
├── README.md
├── _config.yml
├── classification/
│ ├── Kaggle Home Credit Default Risk/
│ │ └── README.MD
│ ├── Kaggle Malware Prediction/
│ │ ├── README.MD
│ │ ├── kaggle.py
│ │ ├── models.py
│ │ ├── models_zoo.py
│ │ ├── oof_preds_level_1/
│ │ │ └── readme.md
│ │ ├── target_encoding.py
│ │ ├── test_preds_level_1/
│ │ │ └── readme.md
│ │ └── test_preds_level_2/
│ │ └── readme.md
│ ├── Kaggle Petfinder/
│ │ ├── 8th-place-solution-code.py
│ │ └── README.MD
│ └── Kaggle red hat user/
│ └── README.MD
├── deployment/
│ ├── docker flask fit predict/
│ │ ├── Dockerfile
│ │ ├── README.MD
│ │ ├── docker-compose.yml
│ │ ├── hello.py
│ │ ├── templates/
│ │ │ └── submit.html
│ │ └── train_model.py
│ └── ds docker db template/
│ ├── README.md
│ ├── docker/
│ │ ├── jupyter/
│ │ │ ├── Dockerfile
│ │ │ └── requirements.txt
│ │ └── postgres/
│ │ ├── Dockerfile
│ │ └── initdb.sql
│ └── docker-compose.yml
├── general studies/
│ ├── finetune gbm.md
│ ├── finetune xgb.md
│ └── get feature importance.py
├── images/
│ └── road-detection
├── recommendations/
│ └── ods_course/
│ ├── README.md
│ ├── competition/
│ │ ├── requirements.txt
│ │ └── tools.py
│ ├── lecture_2/
│ │ └── requirements.txt
│ ├── lecture_4/
│ │ ├── Dockerfile
│ │ ├── Readme.md
│ │ ├── ann/
│ │ │ ├── __init__.py
│ │ │ └── recommender.py
│ │ ├── config/
│ │ │ ├── __init__.py
│ │ │ ├── config.py
│ │ │ └── config.yaml
│ │ ├── main.py
│ │ └── pyproject.toml
│ └── lecture_5/
│ ├── README.md
│ ├── requirements.txt
│ └── tools.py
├── regression/
│ └── kaggle santander value prediction/
│ └── README.md
└── time series regression/
├── ARIMA/
│ ├── AR.py
│ ├── ARIMA.py
│ ├── ARMA.py
│ ├── ARMA_IBMstock.py
│ └── MA.py
├── Data Files/
│ ├── DJIA_Jan2016_Dec2016.xlsx
│ ├── Data Files
│ ├── Monthly_CO2_Concentrations.xlsx
│ ├── World Bank Mobile Phone Statistics.xlsx
│ ├── inflation-consumer-prices-annual.xlsx
│ └── mean-daily-temperature-fisher-river.xlsx
├── anomaly detection/
│ ├── README.md
│ ├── anomaly-detection-using-facebook-s-prophet.py
│ └── sunspots.txt
└── autocorelation, mov avg etc/
├── decomposition.py
├── doubleExponentialSmoothing.py
├── simpleExponentialSmoothing.py
└── tripleExponentialSmoothing.py
SYMBOL INDEX (1371 symbols across 121 files)
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/__main__.py
function _parse_args (line 7) | def _parse_args():
function main (line 45) | def main():
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/conditional.py
class ConditionalGenerator (line 4) | class ConditionalGenerator(object):
method __init__ (line 5) | def __init__(self, data, output_info, log_frequency):
method random_choice_prob_index (line 66) | def random_choice_prob_index(self, idx):
method sample (line 71) | def sample(self, batch):
method sample_zero (line 87) | def sample_zero(self, batch):
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/data.py
function read_csv (line 7) | def read_csv(csv_filename, meta_filename=None, header=True, discrete=None):
function read_tsv (line 32) | def read_tsv(data_filename, meta_filename):
function write_tsv (line 78) | def write_tsv(data, meta, output_filename):
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/demo.py
function load_demo (line 6) | def load_demo():
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/models.py
class Discriminator (line 5) | class Discriminator(Module):
method calc_gradient_penalty (line 6) | def calc_gradient_penalty(
method __init__ (line 33) | def __init__(self, input_dim, dis_dims, pack=10):
method forward (line 46) | def forward(self, input):
class Residual (line 51) | class Residual(Module):
method __init__ (line 52) | def __init__(self, i, o):
method forward (line 58) | def forward(self, input):
class Generator (line 65) | class Generator(Module):
method __init__ (line 66) | def __init__(self, embedding_dim, gen_dims, data_dim):
method forward (line 76) | def forward(self, input):
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/sampler.py
class Sampler (line 4) | class Sampler(object):
method __init__ (line 7) | def __init__(self, data, output_info):
method sample (line 37) | def sample(self, n, col, opt):
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/synthesizer.py
class EarlyStopping (line 11) | class EarlyStopping:
method __init__ (line 14) | def __init__(self, patience=7, verbose=False, delta=0):
method __call__ (line 32) | def __call__(self, val_loss):
class CTGANSynthesizer (line 48) | class CTGANSynthesizer(object):
method __init__ (line 72) | def __init__(
method _apply_activate (line 90) | def _apply_activate(self, data):
method _cond_loss (line 107) | def _cond_loss(self, data, c, m):
method fit (line 141) | def fit(self, train_data, discrete_columns=tuple(), epochs=300, log_fr...
method sample (line 278) | def sample(self, n):
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/transformer.py
class DataTransformer (line 9) | class DataTransformer(object):
method __init__ (line 23) | def __init__(self, n_clusters=10, epsilon=0.005):
method _fit_continuous (line 28) | def _fit_continuous(self, column, data):
method _fit_discrete (line 47) | def _fit_discrete(self, column, data):
method fit (line 59) | def fit(self, data, discrete_columns=tuple()):
method _transform_continuous (line 81) | def _transform_continuous(self, column_meta, data):
method _transform_discrete (line 109) | def _transform_discrete(self, column_meta, data):
method transform (line 113) | def transform(self, data):
method _inverse_transform_continuous (line 127) | def _inverse_transform_continuous(self, meta, data, sigma):
method _inverse_transform_discrete (line 150) | def _inverse_transform_discrete(self, meta, data):
method inverse_transform (line 154) | def inverse_transform(self, data, sigmas):
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/encoders.py
function get_single_encoder (line 19) | def get_single_encoder(encoder_name: str, cat_cols: list):
class DoubleValidationEncoderNumerical (line 68) | class DoubleValidationEncoderNumerical:
method __init__ (line 73) | def __init__(self, cols, encoders_names_tuple=()):
method fit_transform (line 89) | def fit_transform(self, X: pd.DataFrame, y: np.array) -> pd.DataFrame:
method transform (line 133) | def transform(self, X: pd.DataFrame) -> pd.DataFrame:
class MultipleEncoder (line 165) | class MultipleEncoder:
method __init__ (line 170) | def __init__(self, cols: List[str], encoders_names_tuple=()):
method fit_transform (line 186) | def fit_transform(self, X: pd.DataFrame, y: np.array) -> pd.DataFrame:
method transform (line 212) | def transform(self, X) -> pd.DataFrame:
class FrequencyEncoder (line 236) | class FrequencyEncoder:
method __init__ (line 237) | def __init__(self, cols):
method fit (line 241) | def fit(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
method transform (line 248) | def transform(self, X: pd.DataFrame) -> pd.DataFrame:
method fit_transform (line 269) | def fit_transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/model.py
class Model (line 11) | class Model:
method __init__ (line 12) | def __init__(
method fit (line 41) | def fit(self, X: pd.DataFrame, y: np.array) -> tuple:
method predict (line 105) | def predict(self, X: pd.DataFrame, return_shape=True) -> np.array:
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/run_experiment.py
function execute_experiment (line 13) | def execute_experiment(dataset_name, encoders_list, validation_type, sam...
FILE: DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/utils.py
function save_dict_to_file (line 12) | def save_dict_to_file(dic: dict, path: str, save_raw=False) -> None:
function save_exp_to_file (line 31) | def save_exp_to_file(dic: dict, path: str) -> None:
function cat_cols_info (line 52) | def cat_cols_info(
function adversarial_test (line 81) | def adversarial_test(left_df, right_df, cat_cols):
function extend_gan_train (line 125) | def extend_gan_train(x_train, y_train, x_test, cat_cols, gen_x_times=1.2...
function extend_from_original (line 186) | def extend_from_original(x_train, y_train, x_test, cat_cols, gen_x_times...
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/CGAN/ConditionalGAN.py
function to_onehot (line 58) | def to_onehot(x, num_classes=10):
function get_sample_image (line 76) | def get_sample_image(G, n_noise=100):
class Discriminator (line 94) | class Discriminator(nn.Module):
method __init__ (line 99) | def __init__(self, input_size=784, condition_size=10, num_classes=1):
method forward (line 110) | def forward(self, x, c):
class Generator (line 120) | class Generator(nn.Module):
method __init__ (line 125) | def __init__(self, input_size=100, condition_size=10, num_classes=784):
method forward (line 143) | def forward(self, x, c):
function save_checkpoint (line 281) | def save_checkpoint(state, file_name="checkpoint.pth.tar"):
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/DCGAN/dcgan.py
function weights_init (line 113) | def weights_init(m):
class Generator (line 127) | class Generator(nn.Module):
method __init__ (line 128) | def __init__(self, ngpu):
method forward (line 154) | def forward(self, input):
class Discriminator (line 176) | class Discriminator(nn.Module):
method __init__ (line 177) | def __init__(self, ngpu):
method forward (line 201) | def forward(self, input):
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/ProgressiveGAN/progan_modules.py
class EqualLR (line 8) | class EqualLR:
method __init__ (line 9) | def __init__(self, name):
method compute_weight (line 12) | def compute_weight(self, module):
method apply (line 19) | def apply(module, name):
method __call__ (line 29) | def __call__(self, module, input):
function equal_lr (line 34) | def equal_lr(module, name="weight"):
class PixelNorm (line 40) | class PixelNorm(nn.Module):
method __init__ (line 41) | def __init__(self):
method forward (line 44) | def forward(self, input):
class EqualConv2d (line 48) | class EqualConv2d(nn.Module):
method __init__ (line 49) | def __init__(self, *args, **kwargs):
method forward (line 57) | def forward(self, input):
class EqualConvTranspose2d (line 61) | class EqualConvTranspose2d(nn.Module):
method __init__ (line 63) | def __init__(self, *args, **kwargs):
method forward (line 71) | def forward(self, input):
class EqualLinear (line 75) | class EqualLinear(nn.Module):
method __init__ (line 76) | def __init__(self, in_dim, out_dim):
method forward (line 85) | def forward(self, input):
class ConvBlock (line 89) | class ConvBlock(nn.Module):
method __init__ (line 90) | def __init__(
method forward (line 123) | def forward(self, input):
function upscale (line 128) | def upscale(feat):
class Generator (line 132) | class Generator(nn.Module):
method __init__ (line 133) | def __init__(self, input_code_dim=128, in_channel=128, pixel_norm=True...
method progress (line 174) | def progress(self, feat, module):
method output (line 179) | def output(self, feat1, feat2, module1, module2, alpha):
method forward (line 189) | def forward(self, input, step=0, alpha=-1):
class Discriminator (line 224) | class Discriminator(nn.Module):
method __init__ (line 225) | def __init__(self, feat_dim=128):
method forward (line 256) | def forward(self, input, step=0, alpha=-1):
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/ProgressiveGAN/train.py
function accumulate (line 15) | def accumulate(model1, model2, decay=0.999):
function imagefolder_loader (line 23) | def imagefolder_loader(path):
function sample_data (line 34) | def sample_data(dataloader, image_size=4):
function train (line 50) | def train(generator, discriminator, init_step, loader, total_iter=600000):
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/Datasets.py
function MnistLabel (line 7) | def MnistLabel(class_num):
function MnistUnlabel (line 34) | def MnistUnlabel():
function MnistTest (line 44) | def MnistTest():
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/ImprovedGAN.py
class ImprovedGAN (line 21) | class ImprovedGAN(object):
method __init__ (line 22) | def __init__(self, G, D, labeled, unlabeled, test, args):
method trainD (line 48) | def trainD(self, x_label, y, x_unlabel):
method trainG (line 91) | def trainG(self, x_unlabel):
method train (line 105) | def train(self):
method predict (line 205) | def predict(self, x):
method eval (line 210) | def eval(self):
method draw (line 223) | def draw(self, batch_size):
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/Nets.py
class Discriminator (line 10) | class Discriminator(nn.Module):
method __init__ (line 11) | def __init__(self, input_dim=28 ** 2, output_dim=10):
method forward (line 25) | def forward(self, x, feature=False, cuda=False):
class Generator (line 45) | class Generator(nn.Module):
method __init__ (line 46) | def __init__(self, z_dim, output_dim=28 ** 2):
method forward (line 59) | def forward(self, batch_size, cuda=False):
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/functional.py
function log_sum_exp (line 8) | def log_sum_exp(x, axis=1):
function reset_normal_param (line 13) | def reset_normal_param(L, stdv, weight_scale=1.0):
class LinearWeightNorm (line 18) | class LinearWeightNorm(torch.nn.Module):
method __init__ (line 19) | def __init__(
method forward (line 43) | def forward(self, x):
method __repr__ (line 51) | def __repr__(self):
FILE: DEEP LEARNING/Autoencoders GANS/pytorch/VAE/VAR mnist.py
class Encoder (line 58) | class Encoder(nn.Module):
method __init__ (line 62) | def __init__(self, input_dim, hidden_dim, z_dim):
method forward (line 75) | def forward(self, x):
class Decoder (line 87) | class Decoder(nn.Module):
method __init__ (line 91) | def __init__(self, z_dim, hidden_dim, output_dim):
method forward (line 103) | def forward(self, x):
class VAE (line 118) | class VAE(nn.Module):
method __init__ (line 122) | def __init__(self, enc, dec):
method forward (line 128) | def forward(self, x):
function train (line 162) | def train():
function test (line 198) | def test():
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/avito_deepIQA/deepIQA/fr_model.py
class FRModel (line 8) | class FRModel(chainer.Chain):
method __init__ (line 9) | def __init__(self, top="patchwise"):
method extract_features (line 35) | def extract_features(self, x, train=True):
method forward (line 62) | def forward(self, x_data, x_ref_data, y_data, train=True, n_patches_pe...
method patchwise_loss (line 103) | def patchwise_loss(self, h, a, t):
method weighted_loss (line 114) | def weighted_loss(self, h, a, t):
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/avito_deepIQA/deepIQA/nr_model.py
class Model (line 11) | class Model(chainer.Chain):
method __init__ (line 12) | def __init__(self, top="patchwise"):
method forward (line 32) | def forward(self, x_data, y_data, train=True, n_patches=32):
method patchwise_loss (line 84) | def patchwise_loss(self, h, a, t):
method weighted_loss (line 95) | def weighted_loss(self, h, a, t):
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/neural-image-assessment/utils/check_dataset.py
function parse_data (line 54) | def parse_data(filename):
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/neural-image-assessment/utils/data_loader.py
function parse_data (line 51) | def parse_data(filename, scores):
function parse_data_without_augmentation (line 71) | def parse_data_without_augmentation(filename, scores):
function train_generator (line 89) | def train_generator(batchsize, shuffle=True):
function val_generator (line 131) | def val_generator(batchsize):
function features_generator (line 169) | def features_generator(record_path, faeture_size, batchsize, shuffle=True):
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/neural-image-assessment/utils/nasnet.py
function NASNet (line 81) | def NASNet(
function NASNetLarge (line 393) | def NASNetLarge(
function NASNetMobile (line 475) | def NASNetMobile(
function NASNetCIFAR (line 557) | def NASNetCIFAR(
function _separable_conv_block (line 639) | def _separable_conv_block(
function _adjust_block (line 694) | def _adjust_block(p, ip, filters, weight_decay=5e-5, id=None):
function _normal_A (line 785) | def _normal_A(ip, p, filters, weight_decay=5e-5, id=None):
function _reduction_A (line 874) | def _reduction_A(ip, p, filters, weight_decay=5e-5, id=None):
function _add_auxiliary_head (line 983) | def _add_auxiliary_head(x, classes, weight_decay):
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/neural-image-assessment/utils/score_utils.py
function mean_score (line 4) | def mean_score(scores):
function std_score (line 11) | def std_score(scores):
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/nn_image_features.py
function get_names_paths (line 41) | def get_names_paths(images_dir):
function img_generator (line 49) | def img_generator(
function get_model_and_data (line 77) | def get_model_and_data(mode, model_name):
function extract_features (line 146) | def extract_features(model_name="vgg16", batch_size=64):
function create_features_df (line 178) | def create_features_df(model_name="vgg16", mode="train"):
FILE: DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/text embeddings.py
function embeding_reading (line 9) | def embeding_reading(path):
function text2features (line 22) | def text2features(embeddings_index, text):
function df_to_embed_features (line 37) | def df_to_embed_features(df, column, embeddings_index):
function load_emb (line 53) | def load_emb(embedding_path, tokenizer, max_features, default=False, emb...
FILE: DEEP LEARNING/NLP/Kaggle Quora Insincere Questions Classification/3rd-place.py
class AttentionWeightedAverage (line 57) | class AttentionWeightedAverage(Layer):
method __init__ (line 63) | def __init__(self, return_attention=False, **kwargs):
method build (line 69) | def build(self, input_shape):
method call (line 81) | def call(self, x, mask=None):
method get_output_shape_for (line 102) | def get_output_shape_for(self, input_shape):
method compute_output_shape (line 105) | def compute_output_shape(self, input_shape):
method compute_mask (line 111) | def compute_mask(self, input, input_mask=None):
function words (line 128) | def words(text):
function P (line 132) | def P(word):
function correction (line 139) | def correction(word):
function candidates (line 144) | def candidates(word):
function known (line 149) | def known(words):
function edits1 (line 154) | def edits1(word):
function edits2 (line 165) | def edits2(word):
function singlify (line 170) | def singlify(word):
function load_glove (line 180) | def load_glove(word_dict, lemma_dict):
function load_fasttext (line 243) | def load_fasttext(word_dict, lemma_dict):
function load_para (line 308) | def load_para(word_dict, lemma_dict):
function build_model (line 375) | def build_model(embedding_matrix, nb_words, embedding_size=300):
FILE: DEEP LEARNING/NLP/Kaggle Quora Insincere Questions Classification/fix misspellings.py
function include_spell_mistake (line 95) | def include_spell_mistake(word, similar_word, score):
FILE: DEEP LEARNING/NLP/LSTM RNN/Next Chars pytorch/project-tv-script-generation/helper.py
function load_data (line 9) | def load_data(path):
function preprocess_and_save_data (line 20) | def preprocess_and_save_data(dataset_path, token_lookup, create_lookup_t...
function load_preprocess (line 45) | def load_preprocess():
function save_model (line 52) | def save_model(filename, decoder):
function load_model (line 57) | def load_model(filename):
FILE: DEEP LEARNING/NLP/LSTM RNN/Next Chars pytorch/project-tv-script-generation/problem_unittests.py
class _TestNN (line 6) | class _TestNN(torch.nn.Module):
method __init__ (line 7) | def __init__(self, input_size, output_size):
method forward (line 12) | def forward(self, nn_input, hidden):
function _print_success_message (line 19) | def _print_success_message():
class AssertTest (line 23) | class AssertTest(object):
method __init__ (line 24) | def __init__(self, params):
method test (line 29) | def test(self, assert_condition, assert_message):
function test_create_lookup_tables (line 37) | def test_create_lookup_tables(create_lookup_tables):
function test_tokenize (line 121) | def test_tokenize(token_lookup):
function test_rnn (line 173) | def test_rnn(RNN, train_on_gpu):
function test_forward_back_prop (line 236) | def test_forward_back_prop(RNN, forward_back_prop, train_on_gpu):
FILE: DEEP LEARNING/NLP/WSDM - Fake News Classification/Berd generate embeddings/0_bert_encode_en_train.py
function gen_encodings (line 26) | def gen_encodings(df, column):
FILE: DEEP LEARNING/NLP/WSDM - Fake News Classification/Berd generate embeddings/1_bert_encode_en_test.py
function gen_encodings (line 26) | def gen_encodings(df, column):
FILE: DEEP LEARNING/NLP/WSDM - Fake News Classification/Berd generate embeddings/2_bert_encode_ch_train.py
function gen_encodings (line 26) | def gen_encodings(df, column):
FILE: DEEP LEARNING/NLP/WSDM - Fake News Classification/Berd generate embeddings/3_bert_encode_ch_test.py
function gen_encodings (line 26) | def gen_encodings(df, column):
FILE: DEEP LEARNING/NLP/WSDM - Fake News Classification/Berd generate embeddings/4_gen_encoded_dfs.py
function label_encode_target (line 9) | def label_encode_target(df, _inplace=True):
FILE: DEEP LEARNING/Object detection/YOLO Object Localization Keras/yad2k/utils/utils.py
function compose (line 6) | def compose(*funcs):
FILE: DEEP LEARNING/Object detection/YOLO Object Localization Keras/yolo_utils.py
function read_classes (line 11) | def read_classes(classes_path):
function read_anchors (line 18) | def read_anchors(anchors_path):
function generate_colors (line 26) | def generate_colors(class_names):
function scale_boxes (line 38) | def scale_boxes(boxes, image_shape):
function preprocess_image (line 48) | def preprocess_image(img_path, model_image_size):
function draw_boxes (line 58) | def draw_boxes(image, out_scores, out_boxes, out_classes, class_names, c...
FILE: DEEP LEARNING/Object detection/keras retinanet/train.py
function makedirs (line 53) | def makedirs(path):
function get_session (line 64) | def get_session():
function model_with_weights (line 72) | def model_with_weights(model, weights, skip_mismatch):
function create_models (line 85) | def create_models(
function create_callbacks (line 153) | def create_callbacks(
function create_generators (line 238) | def create_generators(args, preprocess_image):
function check_args (line 348) | def check_args(parsed_args):
function parse_args (line 389) | def parse_args(args):
function main (line 576) | def main(args=None):
FILE: DEEP LEARNING/Pytorch from scratch/MLP/fc_model.py
class Network (line 6) | class Network(nn.Module):
method __init__ (line 7) | def __init__(self, input_size, output_size, hidden_layers, drop_p=0.5):
method forward (line 29) | def forward(self, x):
function validation (line 40) | def validation(model, testloader, criterion):
function train (line 61) | def train(
FILE: DEEP LEARNING/Pytorch from scratch/MLP/helper.py
function test_network (line 7) | def test_network(net, trainloader):
function imshow (line 31) | def imshow(image, ax=None, title=None, normalize=True):
function view_recon (line 55) | def view_recon(img, recon):
function view_classify (line 68) | def view_classify(img, ps, version="MNIST"):
FILE: DEEP LEARNING/Pytorch from scratch/TODO/GAN/cycle-gan/helpers.py
function checkpoint (line 23) | def checkpoint(
function merge_images (line 38) | def merge_images(sources, targets, batch_size=16):
function to_data (line 56) | def to_data(x):
function save_samples (line 65) | def save_samples(
FILE: DEEP LEARNING/Pytorch from scratch/TODO/GAN/project-face-generation/problem_unittests.py
function _print_success_message (line 6) | def _print_success_message():
class AssertTest (line 10) | class AssertTest(object):
method __init__ (line 11) | def __init__(self, params):
method test (line 16) | def test(self, assert_condition, assert_message):
function test_discriminator (line 24) | def test_discriminator(Discriminator):
function test_generator (line 53) | def test_generator(Generator):
FILE: DEEP LEARNING/Pytorch from scratch/word2vec-embeddings/utils.py
function preprocess (line 5) | def preprocess(text):
function create_lookup_tables (line 30) | def create_lookup_tables(words):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/data_loader.py
function read_txt (line 9) | def read_txt(txt):
class SaltDataset (line 16) | class SaltDataset(Dataset):
method __init__ (line 17) | def __init__(self, transform, mode, image_size, fold_index, aug_list):
method set_mode (line 35) | def set_mode(self, mode, fold_index):
method __getitem__ (line 61) | def __getitem__(self, index):
method __len__ (line 131) | def __len__(self):
function get_foldloader (line 135) | def get_foldloader(image_size, batch_size, fold_index, aug_list=None, mo...
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/data_process/transform.py
function do_resize2 (line 9) | def do_resize2(image, mask, H, W):
function compute_center_pad (line 20) | def compute_center_pad(H, W, factor=32):
function do_center_pad_to_factor (line 38) | def do_center_pad_to_factor(image, factor=32):
function do_center_pad_to_factor_edgeYreflectX (line 46) | def do_center_pad_to_factor_edgeYreflectX(image, factor=32):
function do_center_pad_to_factor2 (line 55) | def do_center_pad_to_factor2(image, mask, factor=32):
function do_horizontal_flip (line 62) | def do_horizontal_flip(image):
function do_horizontal_flip2 (line 68) | def do_horizontal_flip2(image, mask):
function compute_random_pad (line 77) | def compute_random_pad(H, W, limit=(-4, 4), factor=32):
function do_random_pad_to_factor2 (line 95) | def do_random_pad_to_factor2(image, mask, limit=(-4, 4), factor=32):
function do_random_pad_to_factor2_edgeYreflectX (line 105) | def do_random_pad_to_factor2_edgeYreflectX(image, mask, limit=(-4, 4), f...
function do_invert_intensity (line 121) | def do_invert_intensity(image):
function do_brightness_shift (line 127) | def do_brightness_shift(image, alpha=0.125):
function do_brightness_multiply (line 133) | def do_brightness_multiply(image, alpha=1):
function do_gamma (line 140) | def do_gamma(image, gamma=1.0):
function do_flip_transpose2 (line 147) | def do_flip_transpose2(image, mask, type=0):
function do_shift_scale_crop (line 198) | def do_shift_scale_crop(image, mask, x0=0, y0=0, x1=1, y1=1):
function do_random_shift_scale_crop_pad2 (line 211) | def do_random_shift_scale_crop_pad2(image, mask, limit=0.10):
function do_shift_scale_rotate2 (line 228) | def do_shift_scale_rotate2(image, mask, dx=0, dy=0, scale=1, angle=0):
function do_elastic_transform2 (line 269) | def do_elastic_transform2(image, mask, grid=32, distort=0.2):
function do_horizontal_shear2 (line 330) | def do_horizontal_shear2(image, mask, dx=0):
function resize_and_pad (line 366) | def resize_and_pad(image, resize_size, factor):
function resize_and_pad_edgeYreflectX (line 372) | def resize_and_pad_edgeYreflectX(image, resize_size, factor):
function resize_and_random_pad (line 378) | def resize_and_random_pad(image, mask, resize_size, factor, limit=(-13, ...
function resize_and_random_pad_edgeYreflectX (line 385) | def resize_and_random_pad_edgeYreflectX(image, mask, resize_size, factor):
function center_corp (line 395) | def center_corp(image, image_size, crop_size):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/evaluate.py
function do_kaggle_metric (line 9) | def do_kaggle_metric(predict, truth, threshold=0.5):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/loss/bce_losses.py
class DiceLoss (line 9) | class DiceLoss(nn.Module):
method __init__ (line 10) | def __init__(self, smooth=0, eps=1e-7):
method forward (line 15) | def forward(self, output, target):
function mixed_dice_bce_loss (line 21) | def mixed_dice_bce_loss(
function multiclass_dice_loss (line 43) | def multiclass_dice_loss(output, target, smooth=0, activation="softmax"):
function where (line 73) | def where(cond, x_1, x_2):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/loss/cyclic_lr.py
class CosineAnnealingLR_with_Restart (line 6) | class CosineAnnealingLR_with_Restart(_LRScheduler):
method __init__ (line 37) | def __init__(
method get_lr (line 62) | def get_lr(self):
method step (line 74) | def step(self, epoch=None):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/loss/lovasz_losses.py
function lovasz_grad (line 19) | def lovasz_grad(gt_sorted):
function iou_binary (line 34) | def iou_binary(preds, labels, EMPTY=1.0, ignore=None, per_image=True):
function iou (line 54) | def iou(preds, labels, C, EMPTY=1.0, ignore=None, per_image=False):
function lovasz_hinge (line 81) | def lovasz_hinge(logits, labels, per_image=True, ignore=None):
function lovasz_hinge_flat (line 101) | def lovasz_hinge_flat(logits, labels):
function flatten_binary_scores (line 124) | def flatten_binary_scores(scores, labels, ignore=None):
class StableBCELoss (line 139) | class StableBCELoss(torch.nn.modules.Module):
method __init__ (line 140) | def __init__(self):
method forward (line 143) | def forward(self, input, target):
function binary_xloss (line 149) | def binary_xloss(logits, labels, ignore=None):
function lovasz_softmax (line 164) | def lovasz_softmax(probas, labels, only_present=False, per_image=False, ...
function lovasz_softmax_flat (line 188) | def lovasz_softmax_flat(probas, labels, only_present=False):
function flatten_probas (line 209) | def flatten_probas(probas, labels, ignore=None):
function xloss (line 224) | def xloss(logits, labels, ignore=None):
function mean (line 234) | def mean(l, ignore_nan=False, empty=0):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/main.py
class SingleModelSolver (line 20) | class SingleModelSolver(object):
method __init__ (line 21) | def __init__(self, config):
method build_model (line 67) | def build_model(self):
method print_network (line 89) | def print_network(self, model, name):
method load_pretrained_model (line 97) | def load_pretrained_model(self, fold_index, mode=None, Cycle=None):
method update_lr (line 165) | def update_lr(self, g_lr):
method to_var (line 169) | def to_var(self, x, volatile=False):
method criterion (line 174) | def criterion(self, logits, label):
method train_fold (line 180) | def train_fold(self, fold_index, aug_list):
method val_TTA (line 405) | def val_TTA(self, fold_index, val_loader, is_load=False, mode=None, Cy...
method get_infer_TTA (line 508) | def get_infer_TTA(self, fold_index, thres):
method infer_fold_TTA (line 556) | def infer_fold_TTA(self, fold_index, mode="max_map", Cycle=None):
method infer_fold_all_Cycle (line 582) | def infer_fold_all_Cycle(self, fold_index, mode="max_map"):
function main (line 587) | def main(config, aug_list):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/model/ibnnet.py
class IBN (line 19) | class IBN(nn.Module):
method __init__ (line 20) | def __init__(self, planes):
method forward (line 28) | def forward(self, x):
class Bottleneck (line 36) | class Bottleneck(nn.Module):
method __init__ (line 43) | def __init__(
method forward (line 84) | def forward(self, x):
class ResNeXt (line 107) | class ResNeXt(nn.Module):
method __init__ (line 113) | def __init__(self, baseWidth, cardinality, layers, num_classes):
method _make_layer (line 153) | def _make_layer(self, block, planes, blocks, stride=1):
method forward (line 206) | def forward(self, x):
function resnext50_ibn_a (line 222) | def resnext50_ibn_a(baseWidth, cardinality):
function resnext101_ibn_a (line 233) | def resnext101_ibn_a(baseWidth, cardinality, pretrained=True):
function resnext152_ibn_a (line 248) | def resnext152_ibn_a(baseWidth, cardinality):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/model/model.py
function Stconv3x3 (line 9) | def Stconv3x3(in_, out, bias=True):
function Stconv7x7 (line 13) | def Stconv7x7(in_, out, bias=True):
function Stconv5x5 (line 17) | def Stconv5x5(in_, out, bias=True):
function Stconv1x1 (line 21) | def Stconv1x1(in_, out, bias=True):
class StConvRelu (line 25) | class StConvRelu(nn.Module):
method __init__ (line 26) | def __init__(self, in_, out, kernel_size, norm_type=None):
method forward (line 50) | def forward(self, x):
class ImprovedIBNaDecoderBlock (line 61) | class ImprovedIBNaDecoderBlock(nn.Module):
method __init__ (line 62) | def __init__(self, in_channels, n_filters):
method forward (line 77) | def forward(self, x):
class SELayer (line 90) | class SELayer(nn.Module):
method __init__ (line 91) | def __init__(self, channel, reduction=16):
method forward (line 101) | def forward(self, x):
class SCSEBlock (line 108) | class SCSEBlock(nn.Module):
method __init__ (line 109) | def __init__(self, channel, reduction=16):
method forward (line 125) | def forward(self, x):
class Decoder (line 138) | class Decoder(nn.Module):
method __init__ (line 139) | def __init__(self, in_channels, channels, out_channels):
method forward (line 153) | def forward(self, x, e=None):
class model34_DeepSupervion (line 165) | class model34_DeepSupervion(nn.Module):
method __init__ (line 166) | def __init__(self, num_classes=1, mask_class=2):
method forward (line 214) | def forward(self, x):
class model50A_DeepSupervion (line 254) | class model50A_DeepSupervion(nn.Module):
method __init__ (line 255) | def __init__(self, num_classes=1):
method forward (line 305) | def forward(self, x):
class model101A_DeepSupervion (line 345) | class model101A_DeepSupervion(nn.Module):
method __init__ (line 346) | def __init__(self, num_classes=1):
method forward (line 397) | def forward(self, x):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/model/senet.py
class SEModule (line 92) | class SEModule(nn.Module):
method __init__ (line 93) | def __init__(self, channels, reduction):
method forward (line 101) | def forward(self, x):
class Bottleneck (line 111) | class Bottleneck(nn.Module):
method forward (line 116) | def forward(self, x):
class SEBottleneck (line 139) | class SEBottleneck(Bottleneck):
method __init__ (line 146) | def __init__(self, inplanes, planes, groups, reduction, stride=1, down...
class SEResNetBottleneck (line 168) | class SEResNetBottleneck(Bottleneck):
method __init__ (line 177) | def __init__(self, inplanes, planes, groups, reduction, stride=1, down...
class SEResNeXtBottleneck (line 195) | class SEResNeXtBottleneck(Bottleneck):
method __init__ (line 202) | def __init__(
class SENet (line 235) | class SENet(nn.Module):
method __init__ (line 236) | def __init__(
method _make_layer (line 364) | def _make_layer(
method features (line 399) | def features(self, x):
method logits (line 407) | def logits(self, x):
method forward (line 415) | def forward(self, x):
function initialize_pretrained_model (line 421) | def initialize_pretrained_model(model, num_classes, settings):
function senet154 (line 435) | def senet154(num_classes=1000, pretrained="imagenet"):
function se_resnet50 (line 450) | def se_resnet50(num_classes=1000, pretrained="imagenet"):
function se_resnet101 (line 469) | def se_resnet101(num_classes=1000, pretrained="imagenet"):
function se_resnet152 (line 488) | def se_resnet152(num_classes=1000, pretrained="imagenet"):
function se_resnext50_32x4d (line 507) | def se_resnext50_32x4d(num_classes=1000, pretrained="imagenet"):
function se_resnext101_32x4d (line 526) | def se_resnext101_32x4d(num_classes=1000, pretrained="imagenet"):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/utils.py
function do_length_encode (line 21) | def do_length_encode(x):
function do_length_decode (line 43) | def do_length_decode(rle, H, W, fill_value=255):
function decode_csv (line 58) | def decode_csv(csv_name):
function save_id_fea (line 73) | def save_id_fea(predict_dict, save_dir):
function state_dict_remove_moudle (line 79) | def state_dict_remove_moudle(moudle_state_dict, model):
function write_and_plot (line 94) | def write_and_plot(name, aver_num, logits, max_y=1.0, color="blue"):
function decompose (line 109) | def decompose(labeled):
function encode_rle (line 124) | def encode_rle(predictions):
function create_submission (line 128) | def create_submission(predictions):
function run_length_encoding (line 140) | def run_length_encoding(x):
function run_length_decoding (line 158) | def run_length_decoding(mask_rle, shape):
function sigmoid (line 179) | def sigmoid(x):
function softmax (line 183) | def softmax(X, theta=1.0, axis=None):
function from_pil (line 229) | def from_pil(*images):
function to_pil (line 237) | def to_pil(*images):
function binary_from_rle (line 245) | def binary_from_rle(rle):
function get_crop_pad_sequence (line 249) | def get_crop_pad_sequence(vertical, horizontal):
function get_list_of_image_predictions (line 257) | def get_list_of_image_predictions(batch_predictions):
function set_seed (line 264) | def set_seed(seed):
class ImgAug (line 272) | class ImgAug:
method __init__ (line 273) | def __init__(self, augmenters):
method _pre_call_hook (line 279) | def _pre_call_hook(self):
method transform (line 284) | def transform(self, *images):
method __call__ (line 291) | def __call__(self, *args):
function get_seed (line 296) | def get_seed():
function reseed (line 301) | def reseed(augmenter, deterministic=True):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/augmentation.py
function _perspective_transform_augment_images (line 9) | def _perspective_transform_augment_images(self, images, random_state, pa...
function resize_pad_seq (line 84) | def resize_pad_seq(resize_target_size, pad_method, pad_size):
function pad_to_fit_net (line 96) | def pad_to_fit_net(divisor, pad_mode, rest_of_augs=iaa.Noop()):
class PadFixed (line 100) | class PadFixed(iaa.Augmenter):
method __init__ (line 103) | def __init__(
method _augment_images (line 115) | def _augment_images(self, images, random_state, parents, hooks):
method _augment_keypoints (line 122) | def _augment_keypoints(self, keypoints_on_images, random_state, parent...
method _pad (line 126) | def _pad(self, img):
method get_parameters (line 147) | def get_parameters(self):
method _is_expanded_grey_format (line 150) | def _is_expanded_grey_format(self, img):
function test_time_augmentation_transform (line 157) | def test_time_augmentation_transform(image, tta_parameters):
function test_time_augmentation_inverse_transform (line 169) | def test_time_augmentation_inverse_transform(image, tta_parameters):
function per_channel_flipud (line 179) | def per_channel_flipud(x):
function per_channel_fliplr (line 186) | def per_channel_fliplr(x):
function per_channel_rotation (line 193) | def per_channel_rotation(x, angle):
function rotate (line 197) | def rotate(image, angle, axes=(0, 1)):
class RandomCropFixedSize (line 204) | class RandomCropFixedSize(iaa.Augmenter):
method __init__ (line 205) | def __init__(self, px=None, name=None, deterministic=False, random_sta...
method _augment_images (line 218) | def _augment_images(self, images, random_state, parents, hooks):
method _augment_keypoints (line 228) | def _augment_keypoints(self, keypoints_on_images, random_state, parent...
method _random_crop (line 232) | def _random_crop(self, seed, image):
method get_parameters (line 259) | def get_parameters(self):
class InferencePad (line 263) | class InferencePad(iaa.Augmenter):
method __init__ (line 264) | def __init__(
method _augment_keypoints (line 278) | def _augment_keypoints(self, keypoints_on_images, random_state, parent...
method _augment_images (line 281) | def _augment_images(self, images, random_state, parents, hooks):
method _pad_image (line 289) | def _pad_image(self, image):
method _get_pad_sequence (line 297) | def _get_pad_sequence(self, height, width):
method _get_pad (line 302) | def _get_pad(self, dim):
method get_parameters (line 308) | def get_parameters(self):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/callbacks.py
class Callback (line 35) | class Callback:
method __init__ (line 36) | def __init__(self):
method set_params (line 47) | def set_params(self, transformer, validation_datagen, *args, **kwargs):
method on_train_begin (line 55) | def on_train_begin(self, *args, **kwargs):
method on_train_end (line 59) | def on_train_end(self, *args, **kwargs):
method on_epoch_begin (line 62) | def on_epoch_begin(self, *args, **kwargs):
method on_epoch_end (line 65) | def on_epoch_end(self, *args, **kwargs):
method training_break (line 68) | def training_break(self, *args, **kwargs):
method on_batch_begin (line 71) | def on_batch_begin(self, *args, **kwargs):
method on_batch_end (line 74) | def on_batch_end(self, *args, **kwargs):
method get_validation_loss (line 77) | def get_validation_loss(self):
class CallbackList (line 85) | class CallbackList:
method __init__ (line 86) | def __init__(self, callbacks=None):
method __len__ (line 94) | def __len__(self):
method set_params (line 97) | def set_params(self, *args, **kwargs):
method on_train_begin (line 101) | def on_train_begin(self, *args, **kwargs):
method on_train_end (line 105) | def on_train_end(self, *args, **kwargs):
method on_epoch_begin (line 109) | def on_epoch_begin(self, *args, **kwargs):
method on_epoch_end (line 113) | def on_epoch_end(self, *args, **kwargs):
method training_break (line 117) | def training_break(self, *args, **kwargs):
method on_batch_begin (line 123) | def on_batch_begin(self, *args, **kwargs):
method on_batch_end (line 127) | def on_batch_end(self, *args, **kwargs):
class TrainingMonitor (line 132) | class TrainingMonitor(Callback):
method __init__ (line 133) | def __init__(self, epoch_every=None, batch_every=None):
method on_train_begin (line 145) | def on_train_begin(self, *args, **kwargs):
method on_epoch_end (line 150) | def on_epoch_end(self, *args, **kwargs):
method on_batch_end (line 162) | def on_batch_end(self, metrics, *args, **kwargs):
class ExponentialLRScheduler (line 180) | class ExponentialLRScheduler(Callback):
method __init__ (line 181) | def __init__(self, gamma, epoch_every=1, batch_every=None):
method set_params (line 193) | def set_params(self, transformer, validation_datagen, *args, **kwargs):
method on_train_begin (line 200) | def on_train_begin(self, *args, **kwargs):
method on_epoch_end (line 209) | def on_epoch_end(self, *args, **kwargs):
method on_batch_end (line 220) | def on_batch_end(self, *args, **kwargs):
class ExperimentTiming (line 233) | class ExperimentTiming(Callback):
method __init__ (line 234) | def __init__(self, epoch_every=None, batch_every=None):
method on_train_begin (line 249) | def on_train_begin(self, *args, **kwargs):
method on_train_end (line 254) | def on_train_end(self, *args, **kwargs):
method on_epoch_begin (line 257) | def on_epoch_begin(self, *args, **kwargs):
method on_batch_begin (line 272) | def on_batch_begin(self, *args, **kwargs):
class NeptuneMonitor (line 292) | class NeptuneMonitor(Callback):
method __init__ (line 293) | def __init__(self, image_nr, image_resize, model_name):
method on_train_begin (line 301) | def on_train_begin(self, *args, **kwargs):
method on_batch_end (line 306) | def on_batch_end(self, metrics, *args, **kwargs):
method on_epoch_end (line 324) | def on_epoch_end(self, *args, **kwargs):
method _send_numeric_channels (line 328) | def _send_numeric_channels(self, *args, **kwargs):
class ValidationMonitor (line 350) | class ValidationMonitor(Callback):
method __init__ (line 351) | def __init__(self, data_dir, loader_mode, epoch_every=None, batch_ever...
method set_params (line 369) | def set_params(
method get_validation_loss (line 382) | def get_validation_loss(self):
method on_epoch_end (line 385) | def on_epoch_end(self, *args, **kwargs):
method _get_validation_loss (line 399) | def _get_validation_loss(self):
method _transform (line 421) | def _transform(self):
method _generate_prediction (line 483) | def _generate_prediction(self, outputs):
class ModelCheckpoint (line 495) | class ModelCheckpoint(Callback):
method __init__ (line 496) | def __init__(self, filepath, metric_name="sum", epoch_every=1, minimiz...
method on_train_begin (line 508) | def on_train_begin(self, *args, **kwargs):
method on_epoch_end (line 513) | def on_epoch_end(self, *args, **kwargs):
class EarlyStopping (line 539) | class EarlyStopping(Callback):
method __init__ (line 540) | def __init__(self, metric_name="sum", patience=1000, minimize=True):
method training_break (line 549) | def training_break(self, *args, **kwargs):
method on_epoch_end (line 552) | def on_epoch_end(self, *args, **kwargs):
function postprocessing_pipeline_simplified (line 576) | def postprocessing_pipeline_simplified(cache_dirpath, loader_mode):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/loaders.py
class ImageReader (line 20) | class ImageReader(BaseTransformer):
method __init__ (line 21) | def __init__(self, train_mode, x_columns, y_columns, target_format="pn...
method transform (line 27) | def transform(self, meta):
method load_images (line 39) | def load_images(self, filepaths, filetype, grayscale=False):
method load_image (line 54) | def load_image(self, img_filepath, grayscale):
method read_json (line 62) | def read_json(self, path):
class XYSplit (line 69) | class XYSplit(BaseTransformer):
method __init__ (line 70) | def __init__(self, train_mode, x_columns, y_columns):
method transform (line 78) | def transform(self, meta):
class ImageSegmentationBaseDataset (line 88) | class ImageSegmentationBaseDataset(Dataset):
method __init__ (line 89) | def __init__(
method __len__ (line 121) | def __len__(self):
method __getitem__ (line 127) | def __getitem__(self, index):
method load_from_memory (line 161) | def load_from_memory(self, data_source, index, **kwargs):
method load_from_disk (line 164) | def load_from_disk(self, data_source, index, *, filetype, grayscale=Fa...
method load_image (line 174) | def load_image(self, img_filepath, grayscale):
method read_json (line 182) | def read_json(self, path):
method load_target (line 188) | def load_target(self, data_source, index, load_func):
class ImageSegmentationJsonDataset (line 192) | class ImageSegmentationJsonDataset(ImageSegmentationBaseDataset):
method load_target (line 193) | def load_target(self, data_source, index, load_func):
class ImageSegmentationPngDataset (line 198) | class ImageSegmentationPngDataset(ImageSegmentationBaseDataset):
method load_target (line 199) | def load_target(self, data_source, index, load_func):
class ImageSegmentationTTADataset (line 206) | class ImageSegmentationTTADataset(ImageSegmentationBaseDataset):
method __init__ (line 207) | def __init__(self, tta_params, tta_transform, *args, **kwargs):
method __getitem__ (line 212) | def __getitem__(self, index):
class ImageSegmentationLoaderBasic (line 237) | class ImageSegmentationLoaderBasic(BaseTransformer):
method __init__ (line 238) | def __init__(self, train_mode, loader_params, dataset_params, augmenta...
method transform (line 255) | def transform(self, X, y, X_valid=None, y_valid=None, **kwargs):
method get_datagen (line 274) | def get_datagen(self, X, y, train_mode, loader_params):
method load (line 302) | def load(self, filepath):
method save (line 307) | def save(self, filepath):
class ImageSegmentationLoaderBasicTTA (line 312) | class ImageSegmentationLoaderBasicTTA(ImageSegmentationLoaderBasic):
method __init__ (line 313) | def __init__(self, loader_params, dataset_params, augmentation_params):
method transform (line 328) | def transform(self, X, tta_params, **kwargs):
method get_datagen (line 337) | def get_datagen(self, X, tta_params, loader_params):
class ImageSegmentationLoaderResizePad (line 356) | class ImageSegmentationLoaderResizePad(ImageSegmentationLoaderBasic):
method __init__ (line 357) | def __init__(self, train_mode, loader_params, dataset_params, augmenta...
class ImageSegmentationLoaderPadTTA (line 394) | class ImageSegmentationLoaderPadTTA(ImageSegmentationLoaderBasicTTA):
method __init__ (line 395) | def __init__(self, loader_params, dataset_params, augmentation_params):
class ImageSegmentationLoaderResize (line 420) | class ImageSegmentationLoaderResize(ImageSegmentationLoaderBasic):
method __init__ (line 421) | def __init__(self, train_mode, loader_params, dataset_params, augmenta...
class ImageSegmentationLoaderResizeTTA (line 459) | class ImageSegmentationLoaderResizeTTA(ImageSegmentationLoaderBasicTTA):
method __init__ (line 460) | def __init__(self, loader_params, dataset_params, augmentation_params):
class MetaTestTimeAugmentationGenerator (line 486) | class MetaTestTimeAugmentationGenerator(BaseTransformer):
method __init__ (line 487) | def __init__(self, **kwargs):
method transform (line 490) | def transform(self, X, **kwargs):
method _get_tta_data (line 500) | def _get_tta_data(self, i, row):
class TestTimeAugmentationGenerator (line 539) | class TestTimeAugmentationGenerator(BaseTransformer):
method __init__ (line 540) | def __init__(self, **kwargs):
method transform (line 543) | def transform(self, X, **kwargs):
method _get_tta_data (line 553) | def _get_tta_data(self, i, row):
class TestTimeAugmentationAggregator (line 592) | class TestTimeAugmentationAggregator(BaseTransformer):
method __init__ (line 593) | def __init__(self, tta_inverse_transform, method, nthreads):
method agg_method (line 599) | def agg_method(self):
method transform (line 603) | def transform(self, images, tta_params, img_ids, **kwargs):
function aggregate_augmentations (line 619) | def aggregate_augmentations(
function to_array (line 633) | def to_array(x):
function to_tensor (line 640) | def to_tensor(x):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/metrics.py
function iou (line 8) | def iou(gt, pred):
function compute_ious (line 21) | def compute_ious(gt, predictions):
function compute_precision_at (line 37) | def compute_precision_at(ious, threshold):
function compute_eval_metric (line 46) | def compute_eval_metric(gt, predictions):
function intersection_over_union (line 53) | def intersection_over_union(y_true, y_pred):
function intersection_over_union_thresholds (line 62) | def intersection_over_union_thresholds(y_true, y_pred):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/models.py
class PyTorchUNet (line 89) | class PyTorchUNet(Model):
method __init__ (line 90) | def __init__(self, architecture_config, training_config, callbacks_con...
method fit (line 104) | def fit(self, datagen, validation_datagen=None, meta_valid=None):
method _fit_loop (line 133) | def _fit_loop(self, data):
method transform (line 169) | def transform(self, datagen, validation_datagen=None, *args, **kwargs):
method _transform (line 183) | def _transform(self, datagen, validation_datagen=None, **kwargs):
method set_model (line 216) | def set_model(self):
method set_loss (line 225) | def set_loss(self):
method load (line 253) | def load(self, filepath):
function designed_loss (line 270) | def designed_loss(output, target):
function mean (line 276) | def mean(l, ignore_nan=False, empty=0):
function lovasz_grad (line 297) | def lovasz_grad(gt_sorted):
function lovasz_hinge (line 312) | def lovasz_hinge(logits, labels, per_image=True, ignore=None):
function lovasz_hinge_flat (line 332) | def lovasz_hinge_flat(logits, labels):
function flatten_binary_scores (line 354) | def flatten_binary_scores(scores, labels, ignore=None):
function weight_regularization (line 369) | def weight_regularization(model, regularize, weight_decay_conv2d):
function callbacks_unet (line 382) | def callbacks_unet(callbacks_config):
class DiceLoss (line 404) | class DiceLoss(nn.Module):
method __init__ (line 405) | def __init__(self, smooth=0, eps=1e-7):
method forward (line 410) | def forward(self, output, target):
function mixed_dice_bce_loss (line 416) | def mixed_dice_bce_loss(
function mixed_dice_cross_entropy_loss (line 437) | def mixed_dice_cross_entropy_loss(
function multiclass_dice_loss (line 464) | def multiclass_dice_loss(output, target, smooth=0, activation="softmax"):
function where (line 494) | def where(cond, x_1, x_2):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/pipelines.py
function preprocessing_train (line 11) | def preprocessing_train(config, model_name="unet", suffix=""):
function preprocessing_inference (line 82) | def preprocessing_inference(config, model_name="unet", suffix=""):
function preprocessing_inference_tta (line 129) | def preprocessing_inference_tta(config, model_name="unet", suffix=""):
function aggregator (line 197) | def aggregator(name, model, tta_generator, experiment_directory, config):
function mask_postprocessing (line 214) | def mask_postprocessing(config, suffix=""):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/pnasnet.py
class MaxPool (line 33) | class MaxPool(nn.Module):
method __init__ (line 34) | def __init__(self, kernel_size, stride=1, padding=1, zero_pad=False):
method forward (line 39) | def forward(self, x):
class SeparableConv2d (line 48) | class SeparableConv2d(nn.Module):
method __init__ (line 49) | def __init__(
method forward (line 66) | def forward(self, x):
class BranchSeparables (line 72) | class BranchSeparables(nn.Module):
method __init__ (line 73) | def __init__(
method forward (line 101) | def forward(self, x):
class ReluConvBn (line 115) | class ReluConvBn(nn.Module):
method __init__ (line 116) | def __init__(self, in_channels, out_channels, kernel_size, stride=1):
method forward (line 128) | def forward(self, x):
class FactorizedReduction (line 135) | class FactorizedReduction(nn.Module):
method __init__ (line 136) | def __init__(self, in_channels, out_channels):
method forward (line 168) | def forward(self, x):
class CellBase (line 182) | class CellBase(nn.Module):
method cell_forward (line 183) | def cell_forward(self, x_left, x_right):
class CellStem0 (line 214) | class CellStem0(CellBase):
method __init__ (line 215) | def __init__(
method forward (line 265) | def forward(self, x_left):
class Cell (line 271) | class Cell(CellBase):
method __init__ (line 272) | def __init__(
method forward (line 351) | def forward(self, x_left, x_right):
class PNASNet5Large (line 358) | class PNASNet5Large(nn.Module):
method __init__ (line 359) | def __init__(self, num_classes=1001):
method features (line 467) | def features(self, x):
method logits (line 485) | def logits(self, features):
method forward (line 493) | def forward(self, input):
function pnasnet5large (line 499) | def pnasnet5large(num_classes=1001, pretrained="imagenet"):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/postprocessing.py
function resize_image (line 8) | def resize_image(image, target_size):
function crop_image (line 26) | def crop_image(image, target_size):
function binarize (line 48) | def binarize(image, threshold):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/preprocessing.py
function img_cumsum (line 4) | def img_cumsum(img):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/resnext.py
function conv3x3 (line 13) | def conv3x3(in_planes, out_planes, stride=1):
class BasicBlock (line 20) | class BasicBlock(nn.Module):
method __init__ (line 23) | def __init__(self, inplanes, planes, stride=1, downsample=None, num_gr...
method forward (line 33) | def forward(self, x):
class Bottleneck (line 52) | class Bottleneck(nn.Module):
method __init__ (line 55) | def __init__(self, inplanes, planes, stride=1, downsample=None, num_gr...
method forward (line 75) | def forward(self, x):
class ResNeXt (line 98) | class ResNeXt(nn.Module):
method __init__ (line 99) | def __init__(self, block, layers, num_classes=1000, num_group=32):
method _make_layer (line 121) | def _make_layer(self, block, planes, blocks, num_group, stride=1):
method forward (line 145) | def forward(self, x):
function resnext18 (line 163) | def resnext18(**kwargs):
function resnext34 (line 170) | def resnext34(**kwargs):
function resnext50 (line 177) | def resnext50(**kwargs):
function resnext101 (line 184) | def resnext101(**kwargs):
function resnext152 (line 191) | def resnext152(**kwargs):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/unet_models.py
function conv3x3 (line 34) | def conv3x3(in_, out):
class ConvRelu (line 38) | class ConvRelu(nn.Module):
method __init__ (line 39) | def __init__(self, in_, out):
method forward (line 44) | def forward(self, x):
method __init__ (line 894) | def __init__(self, in_, out):
method forward (line 899) | def forward(self, x):
class NoOperation (line 50) | class NoOperation(nn.Module):
method forward (line 51) | def forward(self, x):
class DecoderBlock_old (line 55) | class DecoderBlock_old(nn.Module):
method __init__ (line 56) | def __init__(self, in_channels, middle_channels, out_channels):
method forward (line 73) | def forward(self, x):
class DecoderBlock (line 77) | class DecoderBlock(nn.Module):
method __init__ (line 78) | def __init__(self, in_channels, middle_channels, out_channels):
method forward (line 87) | def forward(self, x, e=None):
class UNet11 (line 101) | class UNet11(nn.Module):
method __init__ (line 102) | def __init__(self, num_classes=1, num_filters=32, pretrained=False):
method forward (line 144) | def forward(self, x):
function unet11 (line 164) | def unet11(pretrained=False, **kwargs):
class DecoderBlockV2 (line 180) | class DecoderBlockV2(nn.Module):
method __init__ (line 181) | def __init__(self, in_channels, middle_channels, out_channels, is_deco...
method forward (line 206) | def forward(self, x):
class DecoderCenter (line 210) | class DecoderCenter(nn.Module):
method __init__ (line 211) | def __init__(self, in_channels, middle_channels, out_channels, is_deco...
method forward (line 237) | def forward(self, x):
class AlbuNet (line 241) | class AlbuNet(nn.Module):
method __init__ (line 249) | def __init__(
method forward (line 305) | def forward(self, x):
class UNetVGG16 (line 325) | class UNetVGG16(nn.Module):
method __init__ (line 326) | def __init__(
method forward (line 398) | def forward(self, x):
class UNetResNet (line 417) | class UNetResNet(nn.Module):
method __init__ (line 418) | def __init__(
method forward (line 492) | def forward(self, x):
class UNetResNet_wo_pool (line 511) | class UNetResNet_wo_pool(nn.Module):
method __init__ (line 512) | def __init__(
method forward (line 575) | def forward(self, x):
class UNetResNext_wo_pool (line 589) | class UNetResNext_wo_pool(nn.Module):
method __init__ (line 590) | def __init__(
method forward (line 644) | def forward(self, x):
class UNetResNetAttentionv2 (line 666) | class UNetResNetAttentionv2(nn.Module):
method __init__ (line 667) | def __init__(
method forward (line 723) | def forward(self, x):
class UNetResNetAttention (line 755) | class UNetResNetAttention(nn.Module):
method __init__ (line 756) | def __init__(
method forward (line 824) | def forward(self, x):
class EncoderBlock (line 855) | class EncoderBlock(nn.Module):
method __init__ (line 856) | def __init__(self, block, out_channels):
method forward (line 863) | def forward(self, x):
class ChannelAttentionGate (line 871) | class ChannelAttentionGate(nn.Module):
method __init__ (line 872) | def __init__(self, channel, reduction=16):
method forward (line 882) | def forward(self, x):
function conv3x3 (line 889) | def conv3x3(in_, out):
class ConvRelu (line 893) | class ConvRelu(nn.Module):
method __init__ (line 39) | def __init__(self, in_, out):
method forward (line 44) | def forward(self, x):
method __init__ (line 894) | def __init__(self, in_, out):
method forward (line 899) | def forward(self, x):
class ConvBn2d (line 905) | class ConvBn2d(nn.Module):
method __init__ (line 906) | def __init__(
method forward (line 925) | def forward(self, x):
class SpatialAttentionGate (line 931) | class SpatialAttentionGate(nn.Module):
method __init__ (line 932) | def __init__(self, channel, reduction=16):
method forward (line 937) | def forward(self, x):
class UNetResNext_wo_pool_hyper (line 946) | class UNetResNext_wo_pool_hyper(nn.Module):
method __init__ (line 947) | def __init__(
method forward (line 1007) | def forward(self, x):
class UNetResNext50 (line 1039) | class UNetResNext50(nn.Module):
method __init__ (line 1040) | def __init__(
method forward (line 1102) | def forward(self, x):
class UNetResNext (line 1130) | class UNetResNext(nn.Module):
method __init__ (line 1131) | def __init__(
method forward (line 1205) | def forward(self, x):
class UNetPNASNet (line 1224) | class UNetPNASNet(nn.Module):
method __init__ (line 1225) | def __init__(
method forward (line 1270) | def forward(self, x):
class TernausNetV2 (line 1284) | class TernausNetV2(nn.Module):
method __init__ (line 1289) | def __init__(
method forward (line 1343) | def forward(self, x):
class DecoderBlockTernaus (line 1361) | class DecoderBlockTernaus(nn.Module):
method __init__ (line 1366) | def __init__(self, in_channels, middle_channels, out_channels, is_deco...
method forward (line 1385) | def forward(self, x):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/utils.py
function read_yaml (line 33) | def read_yaml(fallback_file=NEPTUNE_CONFIG_PATH):
function init_logger (line 39) | def init_logger():
function get_logger (line 58) | def get_logger():
function create_submission (line 62) | def create_submission(meta, predictions):
function encode_rle (line 72) | def encode_rle(predictions):
function read_masks (line 76) | def read_masks(masks_filepaths):
function read_images (line 87) | def read_images(filepaths):
function run_length_encoding (line 95) | def run_length_encoding(x):
function run_length_decoding (line 110) | def run_length_decoding(mask_rle, shape):
function generate_metadata (line 129) | def generate_metadata(train_images_dir, test_images_dir, depths_filepath):
function sigmoid (line 159) | def sigmoid(x):
function softmax (line 163) | def softmax(X, theta=1.0, axis=None):
function from_pil (line 207) | def from_pil(*images):
function to_pil (line 215) | def to_pil(*images):
function make_apply_transformer (line 223) | def make_apply_transformer(func, output_name="output", apply_on=None):
function rle_from_binary (line 273) | def rle_from_binary(prediction):
function binary_from_rle (line 278) | def binary_from_rle(rle):
function get_segmentations (line 282) | def get_segmentations(labeled):
function get_crop_pad_sequence (line 293) | def get_crop_pad_sequence(vertical, horizontal):
function get_list_of_image_predictions (line 301) | def get_list_of_image_predictions(batch_predictions):
function set_seed (line 308) | def set_seed(seed):
class ImgAug (line 316) | class ImgAug:
method __init__ (line 317) | def __init__(self, augmenters):
method _pre_call_hook (line 323) | def _pre_call_hook(self):
method transform (line 328) | def transform(self, *images):
method __call__ (line 335) | def __call__(self, *args):
function get_seed (line 340) | def get_seed():
function reseed (line 345) | def reseed(augmenter, deterministic=True):
class KFoldBySortedValue (line 356) | class KFoldBySortedValue(BaseCrossValidator):
method __init__ (line 357) | def __init__(self, n_splits=3, shuffle=False, random_state=None):
method _iter_test_indices (line 362) | def _iter_test_indices(self, X, y=None, groups=None):
method get_n_splits (line 373) | def get_n_splits(self, X=None, y=None, groups=None):
function plot_list (line 377) | def plot_list(images=[], labels=[]):
function clean_object_from_memory (line 393) | def clean_object_from_memory(obj):
class FineTuneStep (line 400) | class FineTuneStep(Step):
method __init__ (line 401) | def __init__(
method _cached_fit_transform (line 433) | def _cached_fit_transform(self, step_inputs):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/bn.py
function _pair (line 12) | def _pair(x):
class ABN (line 18) | class ABN(nn.Sequential):
method __init__ (line 24) | def __init__(self, num_features, activation=nn.ReLU(inplace=True), **k...
class InPlaceABN (line 43) | class InPlaceABN(nn.Module):
method __init__ (line 46) | def __init__(
method reset_parameters (line 89) | def reset_parameters(self):
method forward (line 96) | def forward(self, x):
method __repr__ (line 110) | def __repr__(self):
class InPlaceABNWrapper (line 122) | class InPlaceABNWrapper(nn.Module):
method __init__ (line 125) | def __init__(self, *args, **kwargs):
method forward (line 129) | def forward(self, x):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/functions.py
function _check (line 13) | def _check(fn, *args, **kwargs):
function _broadcast_shape (line 19) | def _broadcast_shape(x):
function _reduce (line 29) | def _reduce(x):
function _count_samples (line 37) | def _count_samples(x):
function _act_forward (line 45) | def _act_forward(ctx, x):
function _act_backward (line 54) | def _act_backward(ctx, x, dx):
function _check_contiguous (line 65) | def _check_contiguous(*args):
class InPlaceABN (line 70) | class InPlaceABN(autograd.Function):
method forward (line 72) | def forward(
method backward (line 130) | def backward(ctx, dz):
class InPlaceABNSync (line 192) | class InPlaceABNSync(autograd.Function):
method forward (line 194) | def forward(
method backward (line 279) | def backward(ctx, dz):
method _parse_extra (line 361) | def _parse_extra(ctx, extra):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/misc.py
class GlobalAvgPool2d (line 4) | class GlobalAvgPool2d(nn.Module):
method __init__ (line 5) | def __init__(self):
method forward (line 9) | def forward(self, inputs):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/residual.py
class IdentityResidualBlock (line 8) | class IdentityResidualBlock(nn.Module):
method __init__ (line 9) | def __init__(
method forward (line 127) | def forward(self, x):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/src/common.h
function __device__ (line 14) | __device__ Pair() {}
function __device__ (line 15) | __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
function __device__ (line 16) | __device__ Pair(T v) : v1(v), v2(v) {}
function __device__ (line 17) | __device__ Pair(int v) : v1(v), v2(v) {}
function getMSB (line 38) | int getMSB(int val) { return 31 - __clz(val); }
function getNumThreads (line 40) | static int getNumThreads(int nElem) {
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/src/inplace_abn.cpp
function mean_var (line 7) | std::vector<at::Tensor> mean_var(at::Tensor x) {
function forward (line 15) | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Te...
function edz_eydz (line 24) | std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor...
function backward (line 33) | std::vector<at::Tensor> backward(at::Tensor z, at::Tensor dz, at::Tensor...
function leaky_relu_forward (line 42) | void leaky_relu_forward(at::Tensor z, float slope) {
function leaky_relu_backward (line 46) | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
function elu_forward (line 54) | void elu_forward(at::Tensor z) {
function elu_backward (line 58) | void elu_backward(at::Tensor z, at::Tensor dz) {
function PYBIND11_MODULE (line 66) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/src/inplace_abn_cpu.cpp
function reduce_sum (line 7) | at::Tensor reduce_sum(at::Tensor x) {
function broadcast_to (line 16) | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
function count (line 28) | int64_t count(at::Tensor x) {
function invert_affine (line 36) | at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bia...
function mean_var_cpu (line 44) | std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
function forward_cpu (line 53) | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at...
function edz_eydz_cpu (line 65) | std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Te...
function backward_cpu (line 74) | std::vector<at::Tensor> backward_cpu(at::Tensor z, at::Tensor dz, at::Te...
function leaky_relu_backward_cpu (line 92) | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
function elu_backward_cpu (line 107) | void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/wider_resnet.py
class WiderResNet (line 7) | class WiderResNet(nn.Module):
method __init__ (line 8) | def __init__(self, structure, norm_act=ABN, classes=0):
method forward (line 78) | def forward(self, img):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/vanilla unet/utils/cyclelr_callback.py
class CyclicLR (line 4) | class CyclicLR(Callback):
method __init__ (line 63) | def __init__(
method _reset (line 99) | def _reset(self, new_base_lr=None, new_max_lr=None, new_step_size=None):
method clr (line 111) | def clr(self):
method on_train_begin (line 123) | def on_train_begin(self, logs={}):
method on_batch_end (line 131) | def on_batch_end(self, epoch, logs=None):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/vanilla unet/utils/lovasz_losses_tf.py
function lovasz_grad (line 14) | def lovasz_grad(gt_sorted):
function lovasz_hinge (line 30) | def lovasz_hinge(logits, labels, per_image=True, ignore=None):
function lovasz_hinge_flat (line 53) | def lovasz_hinge_flat(logits, labels):
function flatten_binary_scores (line 86) | def flatten_binary_scores(scores, labels, ignore=None):
function lovasz_softmax (line 104) | def lovasz_softmax(
function lovasz_softmax_flat (line 133) | def lovasz_softmax_flat(probas, labels, classes="all"):
function flatten_probas (line 169) | def flatten_probas(probas, labels, ignore=None, order="BHWC"):
FILE: DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/vanilla unet/utils/zf_unet_224_model.py
function preprocess_input (line 30) | def preprocess_input(x):
function dice_coef (line 36) | def dice_coef(y_true, y_pred):
function jacard_coef (line 43) | def jacard_coef(y_true, y_pred):
function jacard_coef_loss (line 52) | def jacard_coef_loss(y_true, y_pred):
function dice_coef_loss (line 56) | def dice_coef_loss(y_true, y_pred):
function double_conv_layer (line 60) | def double_conv_layer(x, size, dropout=0.0, batch_norm=True):
function ZF_UNET_224 (line 78) | def ZF_UNET_224(dropout_val=0.2, weights=None):
FILE: DEEP LEARNING/segmentation/Segmentation pipeline/get dataset.py
function html_url_parser (line 14) | def html_url_parser(url, save_dir, show=False, wait=False):
FILE: DEEP LEARNING/segmentation/Segmentation pipeline/segmentation pipeline.py
class DataGeneratorFolder (line 41) | class DataGeneratorFolder(Sequence):
method __init__ (line 42) | def __init__(self, root_dir=r'../data/val_test', image_folder='img/', ...
method __len__ (line 56) | def __len__(self):
method on_epoch_end (line 62) | def on_epoch_end(self):
method read_image_mask (line 67) | def read_image_mask(self, image_name, mask_name):
method __getitem__ (line 70) | def __getitem__(self, index):
function aug_with_crop (line 121) | def aug_with_crop(image_size=256, crop_prob=1):
function plot_training_history (line 195) | def plot_training_history(history):
function iou_metric (line 224) | def iou_metric(y_true_in, y_pred_in):
function plot_mask_gt_image (line 251) | def plot_mask_gt_image(mask, groud_truth, img):
function iou_metric_batch (line 259) | def iou_metric_batch(y_true_in, y_pred_in):
function draw_get_best_threshold (line 305) | def draw_get_best_threshold(ious, thresholds):
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/classification_pytorch_dummy.py
function get_annot (line 57) | def get_annot(annot_path):
function prev_get_transform (line 72) | def prev_get_transform(phase):
function get_transform (line 91) | def get_transform(phase):
function criterion (line 110) | def criterion(logit, truth, weight=None):
class SteelDataset (line 129) | class SteelDataset(Dataset):
method __init__ (line 130) | def __init__(self, annot, image_folder, phase):
method __getitem__ (line 136) | def __getitem__(self, index):
method __len__ (line 144) | def __len__(self):
function get_dataloader (line 148) | def get_dataloader(annot, image_folder, phase, batch_size=16, num_worker...
function get_model (line 152) | def get_model(model_name):
class Trainer (line 162) | class Trainer:
method __init__ (line 163) | def __init__(self, model_name="resnet34", pretrained=False, epochs=1):
method forward (line 187) | def forward(self, inputs, targets):
method iterate (line 195) | def iterate(self, epoch, phase):
method save_model (line 230) | def save_model(self, epoch):
method summary (line 241) | def summary(self):
method start (line 253) | def start(self):
class Flatten (line 284) | class Flatten(nn.Module):
method forward (line 285) | def forward(self, x):
method forward (line 350) | def forward(self, x):
method forward (line 640) | def forward(self, x):
method forward (line 705) | def forward(self, x):
class ChannelGate (line 287) | class ChannelGate(nn.Module):
method __init__ (line 288) | def __init__(self, gate_channel, reduction_ratio=16, num_layers=1):
method forward (line 301) | def forward(self, in_tensor):
method __init__ (line 354) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 364) | def forward(self, x):
method __init__ (line 643) | def __init__(self, gate_channel, reduction_ratio=16, num_layers=1):
method forward (line 656) | def forward(self, in_tensor):
method __init__ (line 709) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 719) | def forward(self, x):
class SpatialGate (line 305) | class SpatialGate(nn.Module):
method __init__ (line 306) | def __init__(self, gate_channel, reduction_ratio=16, dilation_conv_num...
method forward (line 318) | def forward(self, in_tensor):
method __init__ (line 400) | def __init__(self):
method forward (line 405) | def forward(self, x):
method __init__ (line 661) | def __init__(self, gate_channel, reduction_ratio=16, dilation_conv_num...
method forward (line 673) | def forward(self, in_tensor):
method __init__ (line 755) | def __init__(self):
method forward (line 760) | def forward(self, x):
class BAM (line 320) | class BAM(nn.Module):
method __init__ (line 321) | def __init__(self, gate_channel):
method forward (line 325) | def forward(self,in_tensor):
method __init__ (line 676) | def __init__(self, gate_channel):
method forward (line 680) | def forward(self,in_tensor):
class BasicConv (line 333) | class BasicConv(nn.Module):
method __init__ (line 334) | def __init__(self, in_planes, out_planes, kernel_size, stride=1, paddi...
method forward (line 341) | def forward(self, x):
method __init__ (line 689) | def __init__(self, in_planes, out_planes, kernel_size, stride=1, paddi...
method forward (line 696) | def forward(self, x):
class Flatten (line 349) | class Flatten(nn.Module):
method forward (line 285) | def forward(self, x):
method forward (line 350) | def forward(self, x):
method forward (line 640) | def forward(self, x):
method forward (line 705) | def forward(self, x):
class ChannelGate (line 353) | class ChannelGate(nn.Module):
method __init__ (line 288) | def __init__(self, gate_channel, reduction_ratio=16, num_layers=1):
method forward (line 301) | def forward(self, in_tensor):
method __init__ (line 354) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 364) | def forward(self, x):
method __init__ (line 643) | def __init__(self, gate_channel, reduction_ratio=16, num_layers=1):
method forward (line 656) | def forward(self, in_tensor):
method __init__ (line 709) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 719) | def forward(self, x):
function logsumexp_2d (line 389) | def logsumexp_2d(tensor):
class ChannelPool (line 395) | class ChannelPool(nn.Module):
method forward (line 396) | def forward(self, x):
method forward (line 751) | def forward(self, x):
class SpatialGate (line 399) | class SpatialGate(nn.Module):
method __init__ (line 306) | def __init__(self, gate_channel, reduction_ratio=16, dilation_conv_num...
method forward (line 318) | def forward(self, in_tensor):
method __init__ (line 400) | def __init__(self):
method forward (line 405) | def forward(self, x):
method __init__ (line 661) | def __init__(self, gate_channel, reduction_ratio=16, dilation_conv_num...
method forward (line 673) | def forward(self, in_tensor):
method __init__ (line 755) | def __init__(self):
method forward (line 760) | def forward(self, x):
class CBAM (line 411) | class CBAM(nn.Module):
method __init__ (line 412) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 418) | def forward(self, x):
method __init__ (line 767) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 773) | def forward(self, x):
function conv3x3 (line 431) | def conv3x3(in_planes, out_planes, stride=1):
class BasicBlock (line 436) | class BasicBlock(nn.Module):
method __init__ (line 439) | def __init__(self, inplanes, planes, stride=1, downsample=None, use_cb...
method forward (line 454) | def forward(self, x):
method __init__ (line 794) | def __init__(self, inplanes, planes, stride=1, downsample=None, use_cb...
method forward (line 809) | def forward(self, x):
class Bottleneck (line 475) | class Bottleneck(nn.Module):
method __init__ (line 478) | def __init__(self, inplanes, planes, stride=1, downsample=None, use_cb...
method forward (line 496) | def forward(self, x):
method __init__ (line 833) | def __init__(self, inplanes, planes, stride=1, downsample=None, use_cb...
method forward (line 851) | def forward(self, x):
class ResNet (line 521) | class ResNet(nn.Module):
method __init__ (line 522) | def __init__(self, block, layers, network_type, num_classes, att_type...
method _make_layer (line 564) | def _make_layer(self, block, planes, blocks, stride=1, att_type=None):
method forward (line 581) | def forward(self, x):
method __init__ (line 877) | def __init__(self, block, layers, network_type, num_classes, att_type...
method _make_layer (line 919) | def _make_layer(self, block, planes, blocks, stride=1, att_type=None):
method forward (line 936) | def forward(self, x):
function ResidualNet (line 610) | def ResidualNet(network_type, depth, num_classes, att_type):
class Flatten (line 639) | class Flatten(nn.Module):
method forward (line 285) | def forward(self, x):
method forward (line 350) | def forward(self, x):
method forward (line 640) | def forward(self, x):
method forward (line 705) | def forward(self, x):
class ChannelGate (line 642) | class ChannelGate(nn.Module):
method __init__ (line 288) | def __init__(self, gate_channel, reduction_ratio=16, num_layers=1):
method forward (line 301) | def forward(self, in_tensor):
method __init__ (line 354) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 364) | def forward(self, x):
method __init__ (line 643) | def __init__(self, gate_channel, reduction_ratio=16, num_layers=1):
method forward (line 656) | def forward(self, in_tensor):
method __init__ (line 709) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 719) | def forward(self, x):
class SpatialGate (line 660) | class SpatialGate(nn.Module):
method __init__ (line 306) | def __init__(self, gate_channel, reduction_ratio=16, dilation_conv_num...
method forward (line 318) | def forward(self, in_tensor):
method __init__ (line 400) | def __init__(self):
method forward (line 405) | def forward(self, x):
method __init__ (line 661) | def __init__(self, gate_channel, reduction_ratio=16, dilation_conv_num...
method forward (line 673) | def forward(self, in_tensor):
method __init__ (line 755) | def __init__(self):
method forward (line 760) | def forward(self, x):
class BAM (line 675) | class BAM(nn.Module):
method __init__ (line 321) | def __init__(self, gate_channel):
method forward (line 325) | def forward(self,in_tensor):
method __init__ (line 676) | def __init__(self, gate_channel):
method forward (line 680) | def forward(self,in_tensor):
class BasicConv (line 688) | class BasicConv(nn.Module):
method __init__ (line 334) | def __init__(self, in_planes, out_planes, kernel_size, stride=1, paddi...
method forward (line 341) | def forward(self, x):
method __init__ (line 689) | def __init__(self, in_planes, out_planes, kernel_size, stride=1, paddi...
method forward (line 696) | def forward(self, x):
class Flatten (line 704) | class Flatten(nn.Module):
method forward (line 285) | def forward(self, x):
method forward (line 350) | def forward(self, x):
method forward (line 640) | def forward(self, x):
method forward (line 705) | def forward(self, x):
class ChannelGate (line 708) | class ChannelGate(nn.Module):
method __init__ (line 288) | def __init__(self, gate_channel, reduction_ratio=16, num_layers=1):
method forward (line 301) | def forward(self, in_tensor):
method __init__ (line 354) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 364) | def forward(self, x):
method __init__ (line 643) | def __init__(self, gate_channel, reduction_ratio=16, num_layers=1):
method forward (line 656) | def forward(self, in_tensor):
method __init__ (line 709) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 719) | def forward(self, x):
function logsumexp_2d (line 744) | def logsumexp_2d(tensor):
class ChannelPool (line 750) | class ChannelPool(nn.Module):
method forward (line 396) | def forward(self, x):
method forward (line 751) | def forward(self, x):
class SpatialGate (line 754) | class SpatialGate(nn.Module):
method __init__ (line 306) | def __init__(self, gate_channel, reduction_ratio=16, dilation_conv_num...
method forward (line 318) | def forward(self, in_tensor):
method __init__ (line 400) | def __init__(self):
method forward (line 405) | def forward(self, x):
method __init__ (line 661) | def __init__(self, gate_channel, reduction_ratio=16, dilation_conv_num...
method forward (line 673) | def forward(self, in_tensor):
method __init__ (line 755) | def __init__(self):
method forward (line 760) | def forward(self, x):
class CBAM (line 766) | class CBAM(nn.Module):
method __init__ (line 412) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 418) | def forward(self, x):
method __init__ (line 767) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 773) | def forward(self, x):
function conv3x3 (line 786) | def conv3x3(in_planes, out_planes, stride=1):
class BasicBlock (line 791) | class BasicBlock(nn.Module):
method __init__ (line 439) | def __init__(self, inplanes, planes, stride=1, downsample=None, use_cb...
method forward (line 454) | def forward(self, x):
method __init__ (line 794) | def __init__(self, inplanes, planes, stride=1, downsample=None, use_cb...
method forward (line 809) | def forward(self, x):
class Bottleneck (line 830) | class Bottleneck(nn.Module):
method __init__ (line 478) | def __init__(self, inplanes, planes, stride=1, downsample=None, use_cb...
method forward (line 496) | def forward(self, x):
method __init__ (line 833) | def __init__(self, inplanes, planes, stride=1, downsample=None, use_cb...
method forward (line 851) | def forward(self, x):
class ResNet (line 876) | class ResNet(nn.Module):
method __init__ (line 522) | def __init__(self, block, layers, network_type, num_classes, att_type...
method _make_layer (line 564) | def _make_layer(self, block, planes, blocks, stride=1, att_type=None):
method forward (line 581) | def forward(self, x):
method __init__ (line 877) | def __init__(self, block, layers, network_type, num_classes, att_type...
method _make_layer (line 919) | def _make_layer(self, block, planes, blocks, stride=1, att_type=None):
method forward (line 936) | def forward(self, x):
function ResidualNet (line 965) | def ResidualNet(network_type, depth, num_classes, att_type):
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/bam.py
class Flatten (line 7) | class Flatten(nn.Module):
method forward (line 8) | def forward(self, x):
class ChannelGate (line 12) | class ChannelGate(nn.Module):
method __init__ (line 13) | def __init__(self, gate_channel, reduction_ratio=16, num_layers=1):
method forward (line 33) | def forward(self, in_tensor):
class SpatialGate (line 38) | class SpatialGate(nn.Module):
method __init__ (line 39) | def __init__(
method forward (line 72) | def forward(self, in_tensor):
class BAM (line 76) | class BAM(nn.Module):
method __init__ (line 77) | def __init__(self, gate_channel):
method forward (line 82) | def forward(self, in_tensor):
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/cbam.py
class BasicConv (line 7) | class BasicConv(nn.Module):
method __init__ (line 8) | def __init__(
method forward (line 40) | def forward(self, x):
class Flatten (line 49) | class Flatten(nn.Module):
method forward (line 50) | def forward(self, x):
class ChannelGate (line 54) | class ChannelGate(nn.Module):
method __init__ (line 55) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg...
method forward (line 66) | def forward(self, x):
function logsumexp_2d (line 98) | def logsumexp_2d(tensor):
class ChannelPool (line 105) | class ChannelPool(nn.Module):
method forward (line 106) | def forward(self, x):
class SpatialGate (line 112) | class SpatialGate(nn.Module):
method __init__ (line 113) | def __init__(self):
method forward (line 121) | def forward(self, x):
class CBAM (line 128) | class CBAM(nn.Module):
method __init__ (line 129) | def __init__(
method forward (line 142) | def forward(self, x):
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/dataloader.py
class SteelDataset (line 45) | class SteelDataset(Dataset):
method __init__ (line 46) | def __init__(self, df, data_folder, mean, std, phase, df_full, data_fo...
method __getitem__ (line 58) | def __getitem__(self, idx):
method __len__ (line 76) | def __len__(self):
function get_transforms (line 80) | def get_transforms(phase, mean, std):
function provider_trai_test_split (line 111) | def provider_trai_test_split(
function provider_cv (line 141) | def provider_cv(
function provider_cv___ (line 198) | def provider_cv___(
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/logger.py
function flatten (line 19) | def flatten(l):
function getargnames (line 29) | def getargnames(func):
function getcallargs_ordered (line 38) | def getcallargs_ordered(func, *args, **kwargs):
function describe_call (line 46) | def describe_call(func, *args, **kwargs):
function log_to (line 55) | def log_to(logger_func):
function timeit (line 81) | def timeit(method):
function debug (line 100) | def debug(fn):
function myfunc (line 115) | def myfunc(a, b, c, *args, **kwargs):
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/losses.py
class FocalLoss (line 10) | class FocalLoss(nn.Module):
method __init__ (line 11) | def __init__(self, alpha=1, gamma=2, logits=False, reduce=True):
method forward (line 18) | def forward(self, inputs, targets):
class JaccardLoss (line 32) | class JaccardLoss(nn.Module):
method __init__ (line 35) | def __init__(self, eps=1e-7, activation="sigmoid"):
method forward (line 40) | def forward(self, y_pr, y_gt):
class DiceLoss (line 46) | class DiceLoss(nn.Module):
method __init__ (line 49) | def __init__(self, eps=1e-7, activation="sigmoid"):
method forward (line 54) | def forward(self, y_pr, y_gt):
class BCEJaccardLoss (line 65) | class BCEJaccardLoss(JaccardLoss):
method __init__ (line 68) | def __init__(self, eps=1e-7, activation="sigmoid"):
method forward (line 72) | def forward(self, y_pr, y_gt):
class BCEDiceLoss (line 78) | class BCEDiceLoss(DiceLoss):
method __init__ (line 81) | def __init__(self, eps=1e-7, activation="sigmoid"):
method forward (line 85) | def forward(self, y_pr, y_gt):
function iou (line 91) | def iou(pr, gt, eps=1e-7, threshold=None, activation="sigmoid"):
function f_score (line 126) | def f_score(pr, gt, beta=1, eps=1e-7, threshold=None, activation="sigmoi...
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/lovasz_losses.py
function _lovasz_grad (line 19) | def _lovasz_grad(gt_sorted):
function _lovasz_hinge (line 33) | def _lovasz_hinge(logits, labels, per_image=True, ignore=None):
function _lovasz_hinge_flat (line 53) | def _lovasz_hinge_flat(logits, labels):
function _flatten_binary_scores (line 73) | def _flatten_binary_scores(scores, labels, ignore=None):
function _lovasz_softmax (line 90) | def _lovasz_softmax(
function _lovasz_softmax_flat (line 118) | def _lovasz_softmax_flat(probas, labels, classes="present", anti=False):
function _flatten_probas (line 152) | def _flatten_probas(probas, labels, ignore=None, anti=False):
function isnan (line 171) | def isnan(x):
function mean (line 175) | def mean(values, ignore_nan=False, empty=0):
class BinaryLovaszLoss (line 195) | class BinaryLovaszLoss(_Loss):
method __init__ (line 196) | def __init__(self, per_image=False, ignore=None):
method forward (line 201) | def forward(self, logits, target):
class LovaszLoss (line 207) | class LovaszLoss(_Loss):
method __init__ (line 208) | def __init__(self, per_image=False, ignore=None, anti=False, classes="...
method forward (line 215) | def forward(self, logits, target):
class LovaszLossSymmetric (line 230) | class LovaszLossSymmetric(_Loss):
method __init__ (line 231) | def __init__(self, per_image=True, ignore=None, classes="all"):
method forward (line 237) | def forward(self, logits, target):
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/metric.py
class Meter (line 18) | class Meter:
method __init__ (line 21) | def __init__(self, phase, epoch):
method update (line 28) | def update(self, targets, outputs):
method get_metrics (line 40) | def get_metrics(self):
function predict (line 49) | def predict(X, threshold):
function metric_old (line 56) | def metric_old(probability, truth, threshold=0.5):
function dice_channel_torch (line 92) | def dice_channel_torch(probability, truth, threshold):
function dice_single_channel (line 119) | def dice_single_channel(probability, truth, threshold, batch_size, eps=1...
function epoch_log (line 126) | def epoch_log(phase, epoch, epoch_loss, meter, start):
function compute_ious (line 133) | def compute_ious(pred, label, classes, ignore_index=255, only_present=Tr...
function compute_iou_batch (line 150) | def compute_iou_batch(outputs, labels, classes=None):
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/new_metrics.py
function pixel_accuracy (line 30) | def pixel_accuracy(outputs: torch.Tensor, targets: torch.Tensor, ignore_...
class PixelAccuracyCallback (line 48) | class PixelAccuracyCallback(MetricCallback):
method __init__ (line 52) | def __init__(
class ConfusionMatrixCallback (line 74) | class ConfusionMatrixCallback(Callback):
method __init__ (line 80) | def __init__(
method on_loader_start (line 104) | def on_loader_start(self, state):
method on_batch_end (line 108) | def on_batch_end(self, state: RunnerState):
method on_loader_end (line 123) | def on_loader_end(self, state):
class MacroF1Callback (line 214) | class MacroF1Callback(Callback):
method __init__ (line 219) | def __init__(
method on_batch_end (line 243) | def on_batch_end(self, state: RunnerState):
method on_loader_start (line 267) | def on_loader_start(self, state):
method on_loader_end (line 271) | def on_loader_end(self, state):
function binary_dice_iou_score (line 280) | def binary_dice_iou_score(
function multiclass_dice_iou_score (line 324) | def multiclass_dice_iou_score(
function multilabel_dice_iou_score (line 354) | def multilabel_dice_iou_score(
class IoUMetricsCallback (line 383) | class IoUMetricsCallback(Callback):
method __init__ (line 389) | def __init__(
method on_loader_start (line 457) | def on_loader_start(self, state):
method on_batch_end (line 461) | def on_batch_end(self, state: RunnerState):
method on_loader_end (line 477) | def on_loader_end(self, state):
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/optimizers.py
class RAdam (line 6) | class RAdam(Optimizer):
method __init__ (line 7) | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weig...
method __setstate__ (line 12) | def __setstate__(self, state):
method step (line 15) | def step(self, closure=None):
class PlainRAdam (line 93) | class PlainRAdam(Optimizer):
method __init__ (line 94) | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weig...
method __setstate__ (line 99) | def __setstate__(self, state):
method step (line 102) | def step(self, closure=None):
class AdamW (line 169) | class AdamW(Optimizer):
method __init__ (line 170) | def __init__(
method __setstate__ (line 178) | def __setstate__(self, state):
method step (line 181) | def step(self, closure=None):
method __init__ (line 276) | def __init__(
method __setstate__ (line 298) | def __setstate__(self, state):
method step (line 303) | def step(self, closure=None):
class AdamW (line 253) | class AdamW(Optimizer):
method __init__ (line 170) | def __init__(
method __setstate__ (line 178) | def __setstate__(self, state):
method step (line 181) | def step(self, closure=None):
method __init__ (line 276) | def __init__(
method __setstate__ (line 298) | def __setstate__(self, state):
method step (line 303) | def step(self, closure=None):
class Novograd (line 368) | class Novograd(Optimizer):
method __init__ (line 386) | def __init__(
method __setstate__ (line 415) | def __setstate__(self, state):
method step (line 420) | def step(self, closure=None):
class Lookahead (line 494) | class Lookahead(Optimizer):
method __init__ (line 495) | def __init__(self, base_optimizer, alpha=0.5, k=6):
method step (line 513) | def step(self, closure=None):
function LookaheadAdam (line 530) | def LookaheadAdam(params, alpha=0.5, k=6, *args, **kwargs):
class Ralamb (line 540) | class Ralamb(Optimizer):
method __init__ (line 541) | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weig...
method __setstate__ (line 546) | def __setstate__(self, state):
method step (line 549) | def step(self, closure=None):
function Over9000 (line 648) | def Over9000(params, alpha=0.5, k=6, *args, **kwargs):
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/training_helper.py
class Trainer_cv (line 31) | class Trainer_cv(object):
method __init__ (line 34) | def __init__(
method forward (line 90) | def forward(self, images, targets):
method iterate (line 97) | def iterate(self, epoch, phase):
method start (line 131) | def start(self):
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/utils.py
function mask2rle (line 16) | def mask2rle(img):
function make_mask (line 28) | def make_mask(row_id, df):
function plot (line 47) | def plot(scores, name, fold=0, safe_pic=True):
function set_seed (line 62) | def set_seed(seed=42):
function load_model_unet (line 70) | def load_model_unet(_model_weights, is_inference=False):
function load_model_fpn (line 111) | def load_model_fpn(_model_weights, is_inference=False):
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/inference.py
class Model (line 147) | class Model:
method __init__ (line 148) | def __init__(self, models):
method __call__ (line 151) | def __call__(self, x):
function create_transforms (line 186) | def create_transforms(additional):
class ConvBn2d (line 1032) | class ConvBn2d(nn.Module):
method __init__ (line 1033) | def __init__(self, in_channel, out_channel, kernel_size=3, padding=1, ...
method forward (line 1045) | def forward(self, x):
class BasicBlock (line 1058) | class BasicBlock(nn.Module):
method __init__ (line 1059) | def __init__(self, in_channel, channel, out_channel, stride=1, is_shor...
method forward (line 1075) | def forward(self, x):
class ResNet34 (line 1087) | class ResNet34(nn.Module):
method __init__ (line 1088) | def __init__(self, num_class=1000):
method forward (line 1124) | def forward(self, x):
class Resnet34_classification (line 1137) | class Resnet34_classification(nn.Module):
method __init__ (line 1138) | def __init__(self, num_class=4):
method forward (line 1146) | def forward(self, x):
class TestDataset (line 1170) | class TestDataset(Dataset):
method __init__ (line 1173) | def __init__(self, root, df, mean, std):
method __getitem__ (line 1180) | def __getitem__(self, idx):
method __len__ (line 1187) | def __len__(self):
function sharpen (line 1191) | def sharpen(p, t=0.5):
function get_classification_preds (line 1201) | def get_classification_preds(net, test_loader):
FILE: DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/model_resnet.py
function conv3x3 (line 10) | def conv3x3(in_planes, out_planes, stride=1):
class BasicBlock (line 17) | class BasicBlock(nn.Module):
method __init__ (line 20) | def __init__(self, inplanes, planes, stride=1, downsample=None, use_cb...
method forward (line 35) | def forward(self, x):
class Bottleneck (line 57) | class Bottleneck(nn.Module):
method __init__ (line 60) | def __init__(self, inplanes, planes, stride=1, downsample=None, use_cb...
method forward (line 79) | def forward(self, x):
class ResNet (line 105) | class ResNet(nn.Module):
method __init__ (line 106) | def __init__(self, block, layers, network_type, num_classes, att_type=...
method _make_layer (line 158) | def _make_layer(self, block, planes, blocks, stride=1, att_type=None):
method forward (line 184) | def forward(self, x):
function ResidualNet (line 214) | def ResidualNet(network_type, depth, num_classes, att_type):
FILE: DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/augs.py
function to_tensor (line 4) | def to_tensor(x, **kwargs):
function get_training_augmentation (line 19) | def get_training_augmentation(
function get_training_augmentation0 (line 44) | def get_training_augmentation0(image_size: tuple = (320, 640)):
function get_training_augmentation1 (line 66) | def get_training_augmentation1(image_size: tuple = (320, 640)):
function get_training_augmentation2 (line 87) | def get_training_augmentation2(image_size: tuple = (320, 640)):
function get_validation_augmentation (line 110) | def get_validation_augmentation(image_size: tuple = (320, 640)):
function get_preprocessing (line 123) | def get_preprocessing(preprocessing_fn):
FILE: DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/callbacks.py
function calculate_confusion_matrix_from_arrays (line 9) | def calculate_confusion_matrix_from_arrays(
function get_confusion_matrix (line 35) | def get_confusion_matrix(y_pred_logits: torch.Tensor, y_true: torch.Tens...
function calculate_tp_fp_fn (line 44) | def calculate_tp_fp_fn(confusion_matrix):
function calculate_dice (line 65) | def calculate_dice(tp_fp_fn_dict):
class MulticlassDiceMetricCallback (line 83) | class MulticlassDiceMetricCallback(Callback):
method __init__ (line 84) | def __init__(
method _reset_stats (line 102) | def _reset_stats(self):
method on_batch_end (line 105) | def on_batch_end(self, state: RunnerState):
method on_loader_end (line 116) | def on_loader_end(self, state: RunnerState):
class CustomSegmentationInferCallback (line 138) | class CustomSegmentationInferCallback(Callback):
method __init__ (line 139) | def __init__(self, return_valid: bool = False):
method on_batch_end (line 145) | def on_batch_end(self, state: RunnerState):
FILE: DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/dataset.py
function get_img (line 21) | def get_img(x: str = "img_name", folder: str = "train_images"):
function rle_decode (line 38) | def rle_decode(mask_rle: str = "", shape: tuple = (1400, 2100)):
function make_mask (line 60) | def make_mask(
function mask2rle (line 86) | def mask2rle(img):
class CloudDataset (line 103) | class CloudDataset(Dataset):
method __init__ (line 104) | def __init__(
method save_processed_ (line 171) | def save_processed_(self):
method __getitem__ (line 187) | def __getitem__(self, idx):
method __len__ (line 208) | def __len__(self):
class CloudDatasetClassification (line 212) | class CloudDatasetClassification(Dataset):
method __init__ (line 213) | def __init__(
method save_processed_ (line 281) | def save_processed_(self):
method __getitem__ (line 291) | def __getitem__(self, idx):
method __len__ (line 310) | def __len__(self):
function prepare_loaders (line 314) | def prepare_loaders(
FILE: DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/inference_blend.py
class Model (line 36) | class Model:
method __init__ (line 37) | def __init__(self, models):
method __call__ (line 40) | def __call__(self, x):
FILE: DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/losses/losses.py
class FocalLoss (line 9) | class FocalLoss(_Loss):
method __init__ (line 10) | def __init__(self, alpha=0.5, gamma=2, ignore_index=None):
method forward (line 24) | def forward(self, label_input, label_target):
class MulticlassDiceLoss (line 46) | class MulticlassDiceLoss(_Loss):
method __init__ (line 50) | def __init__(
method forward (line 62) | def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch...
class BCEMulticlassDiceLoss (line 110) | class BCEMulticlassDiceLoss(MulticlassDiceLoss):
method __init__ (line 113) | def __init__(self, eps=1e-7, activation="sigmoid"):
method forward (line 117) | def forward(self, y_pr, y_gt):
FILE: DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/losses/lovasz_losses.py
function lovasz_grad (line 19) | def lovasz_grad(gt_sorted):
function iou_binary (line 34) | def iou_binary(preds, labels, EMPTY=1.0, ignore=None, per_image=True):
function iou (line 54) | def iou(preds, labels, C, EMPTY=1.0, ignore=None, per_image=False):
function lovasz_hinge (line 81) | def lovasz_hinge(logits, labels, per_image=True, ignore=None):
function lovasz_hinge_flat (line 101) | def lovasz_hinge_flat(logits, labels):
function flatten_binary_scores (line 121) | def flatten_binary_scores(scores, labels, ignore=None):
class StableBCELoss (line 136) | class StableBCELoss(torch.nn.modules.Module):
method __init__ (line 137) | def __init__(self):
method forward (line 140) | def forward(self, input, target):
function binary_xloss (line 146) | def binary_xloss(logits, labels, ignore=None):
function lovasz_softmax (line 161) | def lovasz_softmax(probas, labels, classes="present", per_image=False, i...
function lovasz_softmax_flat (line 186) | def lovasz_softmax_flat(probas, labels, classes="present"):
function flatten_probas (line 217) | def flatten_probas(probas, labels, ignore=None):
function xloss (line 236) | def xloss(logits, labels, ignore=None):
function isnan (line 244) | def isnan(x):
function mean (line 248) | def mean(l, ignore_nan=False, empty=0):
FILE: DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/optimizers.py
class Ranger (line 11) | class Ranger(Optimizer):
method __init__ (line 15) | def __init__(
method __setstate__ (line 64) | def __setstate__(self, state):
method step (line 68) | def step(self, closure=None):
class RAdam (line 164) | class RAdam(Optimizer):
method __init__ (line 165) | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weig...
method __setstate__ (line 170) | def __setstate__(self, state):
method step (line 173) | def step(self, closure=None):
class Lookahead (line 249) | class Lookahead(Optimizer):
method __init__ (line 250) | def __init__(self, base_optimizer, alpha=0.5, k=6):
method step (line 268) | def step(self, closure=None):
class Ralamb (line 285) | class Ralamb(Optimizer):
method __init__ (line 290) | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weig...
method __setstate__ (line 295) | def __setstate__(self, state):
method step (line 298) | def step(self, closure=None):
function get_optimizer (line 391) | def get_optimizer(
FILE: DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/predict.py
function sigmoid (line 9) | def sigmoid(x):
function predict (line 13) | def predict(
function predict_blend (line 65) | def predict_blend(
FILE: DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/utils.py
function sigmoid (line 8) | def sigmoid(x):
function visualize (line 12) | def visualize(image, mask, original_image=None, original_mask=None, font...
function visualize_with_raw (line 56) | def visualize_with_raw(
function plot_with_augmentation (line 102) | def plot_with_augmentation(image, mask, augment):
function post_process (line 123) | def post_process(
function dice (line 151) | def dice(img1: np.array, img2: np.array) -> float:
function get_optimal_postprocess (line 169) | def get_optimal_postprocess(loaders=None, runner=None, logdir: str = ""):
FILE: classification/Kaggle Malware Prediction/kaggle.py
function fast_auc (line 21) | def fast_auc(y_true, y_prob):
function eval_auc (line 35) | def eval_auc(preds, dtrain):
class Kaggle (line 40) | class Kaggle:
method __init__ (line 49) | def __init__(self, data_path, metric="auc", mode=0):
method get_metric (line 148) | def get_metric(metric):
method read_train_data (line 155) | def read_train_data(
method read_test_data (line 189) | def read_test_data(self, test_name="test.csv"):
method reduce_mem_usage (line 195) | def reduce_mem_usage(data, verbose=True):
method create_validation_split (line 248) | def create_validation_split(self, n_folds=5, stratified=False):
method general_feature_engineering (line 280) | def general_feature_engineering(self, train_only=True):
method _categorical_preprocess (line 634) | def _categorical_preprocess(self, df, cat_feature, how="ohe"):
method fold_feature_engineering (line 655) | def fold_feature_engineering(self, train, test, total_test):
method get_predictions (line 743) | def get_predictions(self, model_name, params, X_train, y_train, X_test...
method plot_feature_importance (line 802) | def plot_feature_importance(self, feature_importance):
method run_single_model_validation_test_pred (line 822) | def run_single_model_validation_test_pred(
method run_single_model_validation (line 930) | def run_single_model_validation(
method run_stacked_model_validation (line 990) | def run_stacked_model_validation(
method find_optimal_params (line 1073) | def find_optimal_params(self, model_name="xgboost"):
method get_single_model_test_prediction (line 1092) | def get_single_model_test_prediction(
method get_stacked_model_test_prediction (line 1116) | def get_stacked_model_test_prediction(
FILE: classification/Kaggle Malware Prediction/models.py
function fast_auc (line 5) | def fast_auc(y_true, y_prob):
function eval_auc (line 19) | def eval_auc(preds, dtrain):
function predict_chunk (line 25) | def predict_chunk(model, test):
function train_model (line 40) | def train_model(
FILE: classification/Kaggle Malware Prediction/models_zoo.py
class BesXGboost (line 14) | class BesXGboost:
method __init__ (line 56) | def __init__(
method fit (line 96) | def fit(self, X_train, y_train):
method predict (line 140) | def predict(self, X_test):
method feature_importance (line 145) | def feature_importance(self):
method _optimize_single_param (line 162) | def _optimize_single_param(self):
method find_best_params (line 166) | def find_best_params(kag):
class BesLightGBM (line 360) | class BesLightGBM:
method __init__ (line 376) | def __init__(self, params, metric="auc", maximize=True, verbose=True, ...
method fit (line 383) | def fit(self, X_train, y_train):
method predict (line 426) | def predict(self, X_test):
method feature_importance (line 430) | def feature_importance(self):
method find_best_params (line 436) | def find_best_params(kag):
class BesCatBoost (line 440) | class BesCatBoost:
method __init__ (line 456) | def __init__(self, params, metric="AUC", maximize=True, verbose=True, ...
method fit (line 463) | def fit(self, X_train, y_train):
method predict (line 475) | def predict(self, X_test):
method feature_importance (line 479) | def feature_importance(self):
method find_best_params (line 483) | def find_best_params(kag):
FILE: classification/Kaggle Malware Prediction/target_encoding.py
class TargetEncoding (line 6) | class TargetEncoding(object):
method __init__ (line 12) | def __init__(self, C=10):
method fit (line 15) | def fit(self, data_train, data_test, total_test, feature, target):
FILE: classification/Kaggle Petfinder/8th-place-solution-code.py
class PetFinderParser (line 90) | class PetFinderParser(object):
method __init__ (line 91) | def __init__(self, debug=False):
method open_metadata_file (line 99) | def open_metadata_file(self, filename):
method open_sentiment_file (line 107) | def open_sentiment_file(self, filename):
method open_image_file (line 115) | def open_image_file(self, filename):
method parse_sentiment_file (line 122) | def parse_sentiment_file(self, file):
method parse_metadata_file (line 158) | def parse_metadata_file(self, file):
function confusion_matrix (line 205) | def confusion_matrix(rater_a, rater_b, min_rating=None, max_rating=None):
function histogram (line 221) | def histogram(ratings, min_rating=None, max_rating=None):
function quadratic_weighted_kappa (line 236) | def quadratic_weighted_kappa(y, y_pred):
class OptimizedRounder (line 268) | class OptimizedRounder(object):
method __init__ (line 269) | def __init__(self):
method _kappa_loss (line 272) | def _kappa_loss(self, coef, X, y):
method fit (line 289) | def fit(self, X, y):
method predict (line 296) | def predict(self, X, coef):
method coefficients (line 311) | def coefficients(self):
function rmse (line 315) | def rmse(actual, predicted):
function extract_additional_features (line 320) | def extract_additional_features(pet_id, mode="train"):
function set_seed (line 352) | def set_seed(seed=0):
function faith (line 362) | def faith(title):
function reduce_mem_usage (line 368) | def reduce_mem_usage(df, verbose=True):
function clean_name (line 408) | def clean_name(x):
function relative_age (line 426) | def relative_age(cols):
function VerifibalePhotoAmy (line 436) | def VerifibalePhotoAmy(number):
function seo_value (line 444) | def seo_value(cols):
function genuine_name (line 451) | def genuine_name(cols):
function rankbyG (line 463) | def rankbyG(alldata, group):
function get_new_columns (line 475) | def get_new_columns(name, aggs):
function agg_features (line 479) | def agg_features(df, groupby, agg, prefix):
function bounding_features (line 485) | def bounding_features(
function open_breeds_info_file (line 579) | def open_breeds_info_file(filename):
function parse_sentiment_file (line 585) | def parse_sentiment_file(file):
function resize_to_square (line 603) | def resize_to_square(im, img_size):
function load_image (line 620) | def load_image(path):
function load_image2 (line 627) | def load_image2(path, image_size):
function getSize (line 634) | def getSize(filename):
function getDimensions (line 639) | def getDimensions(filename):
function meta_nlp_feats (line 644) | def meta_nlp_feats(df, col):
function load_tabular_data (line 680) | def load_tabular_data():
function load_image_data (line 711) | def load_image_data():
function load_metadata (line 737) | def load_metadata():
function load_sentiment_data (line 763) | def load_sentiment_data():
function build_model (line 788) | def build_model(
function train_model (line 803) | def train_model(model, train, test, nn_params={"batch_size": 64, "img_si...
function image_feature (line 872) | def image_feature(model, train, test, nn_params={"batch_size": 64, "img_...
function basic_features (line 951) | def basic_features(train, test):
function image_dim_features (line 1287) | def image_dim_features(train, test):
function metadata_features (line 1349) | def metadata_features(train, test):
function breed_maps (line 1508) | def breed_maps(train_proc, test_proc, labels_breed):
function nlp_features (line 1555) | def nlp_features(X_temp):
function run_lgbm (line 1595) | def run_lgbm(X_temp, test):
function run_xgb (line 1738) | def run_xgb(X_temp, test):
function add_noise (line 1876) | def add_noise(series, noise_level):
function target_encode (line 1880) | def target_encode(
FILE: deployment/docker flask fit predict/hello.py
function hello (line 21) | def hello():
function squar_val (line 28) | def squar_val(username):
function average (line 33) | def average(lst):
function avg (line 38) | def avg(nums):
function fit_predict_iris (line 45) | def fit_predict_iris(params):
function show_image (line 56) | def show_image():
function add_message (line 62) | def add_message():
function bad_request (line 79) | def bad_request():
class MyForm (line 97) | class MyForm(FlaskForm):
function submit (line 103) | def submit():
function allowed_file (line 131) | def allowed_file(filename):
function upload_file (line 136) | def upload_file():
FILE: deployment/ds docker db template/docker/postgres/initdb.sql
type d_date (line 3) | CREATE TABLE d_date
type d_date_date_actual_idx (line 38) | CREATE INDEX d_date_date_actual_idx
FILE: general studies/get feature importance.py
function mutual_incoherence (line 39) | def mutual_incoherence(X_relevant, X_irelevant):
FILE: time series regression/ARIMA/AR.py
function plotds (line 11) | def plotds(xt, nlag=30, fig_size=(12, 10)):
FILE: time series regression/ARIMA/ARIMA.py
function plotds (line 13) | def plotds(xt, nlag=30, fig_size=(12, 10)):
FILE: time series regression/ARIMA/ARMA.py
function plotds (line 11) | def plotds(xt, nlag=30, fig_size=(12, 10)):
FILE: time series regression/ARIMA/MA.py
function plotds (line 10) | def plotds(xt, nlag=30, fig_size=(12, 10)):
FILE: time series regression/anomaly detection/anomaly-detection-using-facebook-s-prophet.py
function add_autoincrement (line 65) | def add_autoincrement(render_func):
function render (line 88) | def render(chart, id="vega-chart"):
function fit_predict_model (line 226) | def fit_predict_model(dataframe, interval_width=0.99, changepoint_range=...
function detect_anomalies (line 255) | def detect_anomalies(forecast):
function plot_anomalies (line 291) | def plot_anomalies(forecasted):
FILE: time series regression/autocorelation, mov avg etc/doubleExponentialSmoothing.py
function double_exp_smoothing (line 34) | def double_exp_smoothing(x, alpha, beta):
function single_exp_smoothing (line 59) | def single_exp_smoothing(x, alpha):
FILE: time series regression/autocorelation, mov avg etc/simpleExponentialSmoothing.py
function single_exp_smoothing (line 18) | def single_exp_smoothing(x, alpha):
FILE: time series regression/autocorelation, mov avg etc/tripleExponentialSmoothing.py
function initialize_T (line 21) | def initialize_T(x, seasonLength):
function initialize_seasonalilty (line 31) | def initialize_seasonalilty(x, seasonLength):
function triple_exp_smoothing (line 55) | def triple_exp_smoothing(x, seasonLength, alpha, beta, gamma, h):
Copy disabled (too large)
Download .json
Condensed preview — 247 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (12,438K chars).
[
{
"path": ".github/ISSUE_TEMPLATE/Bug_report.md",
"chars": 834,
"preview": "---\nname: Bug report\nabout: Create a report to help us improve\ntitle: ''\nlabels: ''\nassignees: ''\n\n---\n\n**Describe the b"
},
{
"path": ".github/ISSUE_TEMPLATE/Feature_request.md",
"chars": 595,
"preview": "---\nname: Feature request\nabout: Suggest an idea for this project\ntitle: ''\nlabels: ''\nassignees: ''\n\n---\n\n**Is your fea"
},
{
"path": ".github/ISSUE_TEMPLATE/custom.md",
"chars": 126,
"preview": "---\nname: Custom issue template\nabout: Describe this issue template's purpose here.\ntitle: ''\nlabels: ''\nassignees: ''\n\n"
},
{
"path": ".github/workflows/label.yml",
"chars": 489,
"preview": "# This workflow will triage pull requests and apply a label based on the\n# paths that are modified in the pull request.\n"
},
{
"path": ".gitignore",
"chars": 1151,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Data files\n*.csv\n*.tsv"
},
{
"path": ".pre-commit-config.yaml",
"chars": 128,
"preview": "repos:\n - repo: https://github.com/psf/black\n rev: 24.1.0\n hooks:\n - id: black\n language_version: pyt"
},
{
"path": "CODE_OF_CONDUCT.md",
"chars": 3209,
"preview": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, w"
},
{
"path": "CONTRIBUTING.md",
"chars": 1453,
"preview": "# Contributing to ML-DL-scripts\n\nThank you for your interest in contributing! This repository contains Machine Learning "
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/CODE_OF_CONDUCT.md",
"chars": 3338,
"preview": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, w"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/LICENSE",
"chars": 11357,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/README.md",
"chars": 6829,
"preview": "[](https://github.com/psf/black) [![Lice"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/README.MD",
"chars": 8786,
"preview": "REFERENCE (initial code): https://github.com/sdv-dev/CTGAN\n\n<p align=\"left\">\n<img width=15% src=\"https://dai.lids.mit.ed"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/__init__.py",
"chars": 277,
"preview": "# -*- coding: utf-8 -*-\n\n\"\"\"Top-level package for ctgan.\"\"\"\n\n__author__ = \"MIT Data To AI Lab\"\n__email__ = \"dailabmit@gm"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/__main__.py",
"chars": 1761,
"preview": "import argparse\n\nfrom ctgan.data import read_csv, read_tsv, write_tsv\nfrom ctgan.synthesizer import CTGANSynthesizer\n\n\nd"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/conditional.py",
"chars": 2956,
"preview": "import numpy as np\n\n\nclass ConditionalGenerator(object):\n def __init__(self, data, output_info, log_frequency):\n "
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/data.py",
"chars": 2344,
"preview": "import json\n\nimport numpy as np\nimport pandas as pd\n\n\ndef read_csv(csv_filename, meta_filename=None, header=True, discre"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/demo.py",
"chars": 155,
"preview": "import pandas as pd\n\nDEMO_URL = \"http://ctgan-data.s3.amazonaws.com/census.csv.gz\"\n\n\ndef load_demo():\n return pd.read"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/models.py",
"chars": 2321,
"preview": "import torch\nfrom torch.nn import BatchNorm1d, Dropout, LeakyReLU, Linear, Module, ReLU, Sequential\n\n\nclass Discriminato"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/sampler.py",
"chars": 1160,
"preview": "import numpy as np\n\n\nclass Sampler(object):\n \"\"\"docstring for Sampler.\"\"\"\n\n def __init__(self, data, output_info):"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/synthesizer.py",
"chars": 11191,
"preview": "import numpy as np\nimport torch\nfrom ctgan.conditional import ConditionalGenerator\nfrom ctgan.models import Discriminato"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/ctgan/transformer.py",
"chars": 5657,
"preview": "import numpy as np\nimport pandas as pd\nfrom sklearn.exceptions import ConvergenceWarning\nfrom sklearn.mixture import Bay"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/encoders.py",
"chars": 10696,
"preview": "from typing import List\n\nimport numpy as np\nimport pandas as pd\nfrom category_encoders.backward_difference import Backwa"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/model.py",
"chars": 4503,
"preview": "import numpy as np\nimport pandas as pd\nfrom lightgbm import LGBMClassifier\nfrom scipy.stats import rankdata\nfrom sklearn"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/results/fit_predict_scores.txt",
"chars": 21166,
"preview": "dataset_name\tEncoder\tvalidation_type\tsample_type\ttrain_shape\ttest_shape\tmean_target_before_sampling_train\tmean_target_af"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/run_experiment.py",
"chars": 4367,
"preview": "import time\n\nimport numpy as np\nimport pandas as pd\nfrom sklearn.metrics import roc_auc_score\nfrom sklearn.model_selecti"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/GAN-for-tabular-data/utils.py",
"chars": 7309,
"preview": "import gc\nfrom typing import List\n\nimport numpy as np\nimport pandas as pd\nfrom ctgan import CTGANSynthesizer\nfrom sklear"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/pytorch/CGAN/ConditionalGAN.py",
"chars": 6339,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # Implementation of Conditional GANs\n# Reference: https://arxiv.org/pdf/1411.17"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/pytorch/DCGAN/dcgan.py",
"chars": 11310,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# In[1]:\n\n\n# reference https://pytorch.org/tutorials/beginner/dcgan_faces_tutoria"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/pytorch/ProgressiveGAN/README.md",
"chars": 2401,
"preview": "reference code and repo https://github.com/odegeasslbc/Progressive-GAN-pytorch\n\n# Progressive-GAN-pytorch\nA pytorch impl"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/pytorch/ProgressiveGAN/progan_modules.py",
"chars": 8886,
"preview": "import torch\nfrom torch import nn\nfrom torch.nn import functional as F\n\nfrom math import sqrt\n\n\nclass EqualLR:\n def _"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/pytorch/ProgressiveGAN/train.py",
"chars": 11097,
"preview": "import argparse\nimport numpy as np\nimport random\nimport torch\nimport torch.nn.functional as F\nfrom PIL import Image\nfrom"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/Datasets.py",
"chars": 1340,
"preview": "import numpy as np\nimport torch\nfrom torch.utils.data import TensorDataset\nfrom torchvision import datasets, transforms\n"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/ImprovedGAN.py",
"chars": 10915,
"preview": "# -*- coding:utf-8 -*-\nfrom __future__ import print_function\n\nimport argparse\nimport numpy as np\nimport os\nimport pdb\nim"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/Nets.py",
"chars": 2567,
"preview": "import torch\nfrom torch.nn.parameter import Parameter\nfrom torch import nn\nfrom torch.nn import functional as F\nfrom tor"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/README.md",
"chars": 2474,
"preview": "reference https://github.com/Sleepychord/ImprovedGAN-pytorch\n\n# Improved GAN (Semi-supervised GAN)\nThis is an implementa"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/pytorch/Semi-supervised GAN/functional.py",
"chars": 1774,
"preview": "import math\nimport pdb\nimport torch\nimport torch.nn.functional as F\nfrom torch.nn.parameter import Parameter\n\n\ndef log_s"
},
{
"path": "DEEP LEARNING/Autoencoders GANS/pytorch/VAE/VAR mnist.py",
"chars": 6613,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# In[3]:\n\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimp"
},
{
"path": "DEEP LEARNING/Google Landmark Retrieval Challenge.py",
"chars": 4311,
"preview": "### Google Landmark Retrieval Challenge\n\n# export PATH=~/anaconda3/bin:$PATH\n# pip install --ignore-installed --upgrade "
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/README.MD",
"chars": 1570,
"preview": "## Solution to Avito Challenge 2018\n\nLink: https://www.kaggle.com/c/avito-demand-prediction\n\nWhen selling used goods onl"
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/avito_deepIQA/deepIQA/LICENSE",
"chars": 1073,
"preview": "MIT License\n\nCopyright (c) 2016 Dominique Maniry\n\nPermission is hereby granted, free of charge, to any person obtaining "
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/avito_deepIQA/deepIQA/README.md",
"chars": 963,
"preview": "# deepIQA\n\nThis is the reference implementation of [Deep Neural Networks for No-Reference and Full-Reference Image Quali"
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/avito_deepIQA/deepIQA/evaluate.py",
"chars": 3181,
"preview": "#!/usr/bin/python2\nimport argparse\nimport os\n\nimport cv2\nimport numpy as np\nimport pandas as pd\nimport six\nfrom chainer "
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/avito_deepIQA/deepIQA/evaluate_back.py",
"chars": 2442,
"preview": "#!/usr/bin/python2\nimport argparse\n\nimport cv2\nimport numpy as np\nimport six\nfrom chainer import cuda\nfrom chainer impor"
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/avito_deepIQA/deepIQA/fr_model.py",
"chars": 4054,
"preview": "import chainer\nimport chainer.functions as F\nimport chainer.links as L\nfrom chainer import Variable\nfrom chainer import "
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/avito_deepIQA/deepIQA/nr_model.py",
"chars": 3348,
"preview": "import chainer\nimport chainer.functions as F\nimport chainer.links as L\nfrom chainer import Variable\nfrom chainer import "
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/neural-image-assessment/README.md",
"chars": 4466,
"preview": "# NIMA: Neural Image Assessment\r\nImplementation of [NIMA: Neural Image Assessment](https://arxiv.org/abs/1709.05424) in "
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/neural-image-assessment/evaluate_inception_resnet.py",
"chars": 3257,
"preview": "import numpy as np\nimport argparse\nfrom path import Path\n\nfrom keras.models import Model\nfrom keras.layers import Dense,"
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/neural-image-assessment/evaluate_mobilenet.py",
"chars": 3214,
"preview": "import numpy as np\nimport argparse\nfrom path import Path\n\nfrom keras.models import Model\nfrom keras.layers import Dense,"
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/neural-image-assessment/evaluate_nasnet.py",
"chars": 2987,
"preview": "import numpy as np\nimport argparse\nfrom path import Path\n\nfrom keras.models import Model\nfrom keras.layers import Dense,"
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/neural-image-assessment/utils/check_dataset.py",
"chars": 2356,
"preview": "import numpy as np\nimport os\nimport glob\n\nimport tensorflow as tf\n\n\"\"\"\nChecks all images from the AVA dataset if they ha"
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/neural-image-assessment/utils/data_loader.py",
"chars": 7471,
"preview": "import numpy as np\nimport os\nimport glob\n\nimport tensorflow as tf\n\n# path to the images and the text file which holds th"
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/neural-image-assessment/utils/nasnet.py",
"chars": 38061,
"preview": "\"\"\"NASNet-A models for Keras\n\nNASNet refers to Neural Architecture Search Network, a family of models\nthat were designed"
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/neural-image-assessment/utils/score_utils.py",
"chars": 373,
"preview": "import numpy as np\n\n# calculate mean score for AVA dataset\ndef mean_score(scores):\n si = np.arange(1, 11, 1)\n mean"
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/image feat. extraction/nn_image_features.py",
"chars": 5918,
"preview": "# @kmike `s code\n\n# image feature extractions\n\nimport numpy as np\nimport pandas as pd\nimport os\nfrom tqdm import tqdm\nim"
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/stem to SVD.py",
"chars": 8064,
"preview": "#Thanks for the approach https://github.com/ML-Person/My-solution-to-Avito-Challenge-2018 (@nikita)\nimport pandas as pd\n"
},
{
"path": "DEEP LEARNING/Kaggle Avito Demand Prediction Challenge/text embeddings.py",
"chars": 2308,
"preview": "# @Kmike `s code\n# https://github.com/deepmipt/DeepPavlov/blob/a59703de60deda349fc39918a1fc1b242638b7f7/pretrained-vecto"
},
{
"path": "DEEP LEARNING/NLP/Kaggle Quora Insincere Questions Classification/3rd-place.py",
"chars": 17914,
"preview": "from __future__ import absolute_import, division\n\nimport os\nimport time\nimport numpy as np\nimport pandas as pd\nimport ge"
},
{
"path": "DEEP LEARNING/NLP/Kaggle Quora Insincere Questions Classification/README.MD",
"chars": 977,
"preview": "## Solution to Quora Insincere Questions Classification\n\n\nLink: https://www.kaggle.com/c/microsoft-malware-prediction\n\nT"
},
{
"path": "DEEP LEARNING/NLP/Kaggle Quora Insincere Questions Classification/fix misspellings.py",
"chars": 4996,
"preview": "import io\nimport collections\nimport matplotlib.pyplot as plt\nimport nltk\nimport enchant\n \nwords = []\nwith io.open('cor"
},
{
"path": "DEEP LEARNING/NLP/LSTM RNN/Next Chars pytorch/Char level RNN/data/anna.txt",
"chars": 1985223,
"preview": "Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverything was in confusion i"
},
{
"path": "DEEP LEARNING/NLP/LSTM RNN/Next Chars pytorch/project-tv-script-generation/data/Seinfeld_Scripts.txt",
"chars": 3471464,
"preview": "jerry: do you know what this is all about? do you know, why were here? to be out, this is out...and out is one of the si"
},
{
"path": "DEEP LEARNING/NLP/LSTM RNN/Next Chars pytorch/project-tv-script-generation/helper.py",
"chars": 1431,
"preview": "import os\nimport pickle\nimport torch\n\n\nSPECIAL_WORDS = {\"PADDING\": \"<PAD>\"}\n\n\ndef load_data(path):\n \"\"\"\n Load Data"
},
{
"path": "DEEP LEARNING/NLP/LSTM RNN/Next Chars pytorch/project-tv-script-generation/problem_unittests.py",
"chars": 9665,
"preview": "from unittest.mock import MagicMock, patch\nimport numpy as np\nimport torch\n\n\nclass _TestNN(torch.nn.Module):\n def __i"
},
{
"path": "DEEP LEARNING/NLP/LSTM RNN/Sentiment pytorch/labels.txt",
"chars": 225000,
"preview": "positive\nnegative\npositive\nnegative\npositive\nnegative\npositive\nnegative\npositive\nnegative\npositive\nnegative\npositive\nneg"
},
{
"path": "DEEP LEARNING/NLP/WSDM - Fake News Classification/Berd generate embeddings/0_bert_encode_en_train.py",
"chars": 1861,
"preview": "# Please run bert-serving-start before running this notebook\n# Setup: https://github.com/hanxiao/bert-as-service\n# Examp"
},
{
"path": "DEEP LEARNING/NLP/WSDM - Fake News Classification/Berd generate embeddings/1_bert_encode_en_test.py",
"chars": 1851,
"preview": "# Please run bert-serving-start before running this notebook\n# Setup: https://github.com/hanxiao/bert-as-service\n# Examp"
},
{
"path": "DEEP LEARNING/NLP/WSDM - Fake News Classification/Berd generate embeddings/2_bert_encode_ch_train.py",
"chars": 1867,
"preview": "# Please run bert-serving-start before running this notebook\n# Setup: https://github.com/hanxiao/bert-as-service\n# Examp"
},
{
"path": "DEEP LEARNING/NLP/WSDM - Fake News Classification/Berd generate embeddings/3_bert_encode_ch_test.py",
"chars": 1857,
"preview": "# Please run bert-serving-start before running this notebook\n# Setup: https://github.com/hanxiao/bert-as-service\n# Examp"
},
{
"path": "DEEP LEARNING/NLP/WSDM - Fake News Classification/Berd generate embeddings/4_gen_encoded_dfs.py",
"chars": 2359,
"preview": "# Please run 0-1-2-3 files before running this one + put raw data to ../data/raw\n\nimport numpy as np\nimport pandas as pd"
},
{
"path": "DEEP LEARNING/NLP/elmo EMBEDDINGS/Sentence encode.html",
"chars": 3005880,
"preview": "<html><head><meta charset=\"utf-8\" /></head><body><script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'l"
},
{
"path": "DEEP LEARNING/NLP/text analyses/Logistic regression with words and char n-grams.py",
"chars": 2042,
"preview": "import numpy as np\nimport pandas as pd\n\nfrom sklearn.feature_extraction.text import TfidfVectorizer\nfrom sklearn.linear_"
},
{
"path": "DEEP LEARNING/Object detection/YOLO Object Localization Keras/.gitignore",
"chars": 50,
"preview": "*.hdf5\n*.h5\n*.ipynb_checkpoints\n*.HDF5\n__pycache__"
},
{
"path": "DEEP LEARNING/Object detection/YOLO Object Localization Keras/README.md",
"chars": 1923,
"preview": "# [Gentle guide on how YOLO Object Localization works with Keras](https://heartbeat.fritz.ai/gentle-guide-on-how-yolo-ob"
},
{
"path": "DEEP LEARNING/Object detection/YOLO Object Localization Keras/font/SIL Open Font License.txt",
"chars": 4432,
"preview": "Copyright (c) 2014, Mozilla Foundation https://mozilla.org/ with Reserved Font Name Fira Mono.\n\nCopyright (c) 2014, Tele"
},
{
"path": "DEEP LEARNING/Object detection/YOLO Object Localization Keras/model_data/coco_classes.txt",
"chars": 625,
"preview": "person\nbicycle\ncar\nmotorbike\naeroplane\nbus\ntrain\ntruck\nboat\ntraffic light\nfire hydrant\nstop sign\nparking meter\nbench\nbir"
},
{
"path": "DEEP LEARNING/Object detection/YOLO Object Localization Keras/model_data/object_classes.txt",
"chars": 3,
"preview": "car"
},
{
"path": "DEEP LEARNING/Object detection/YOLO Object Localization Keras/model_data/yolo_anchors.txt",
"chars": 90,
"preview": "0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828\n"
},
{
"path": "DEEP LEARNING/Object detection/YOLO Object Localization Keras/requirements.txt",
"chars": 47,
"preview": "scipy\nnumpy\nkeras\npandas\nh5py\nmatplotlib\npillow"
},
{
"path": "DEEP LEARNING/Object detection/YOLO Object Localization Keras/yad2k/utils/__init__.py",
"chars": 21,
"preview": "from .utils import *\n"
},
{
"path": "DEEP LEARNING/Object detection/YOLO Object Localization Keras/yad2k/utils/utils.py",
"chars": 473,
"preview": "\"\"\"Miscellaneous utility functions.\"\"\"\n\nfrom functools import reduce\n\n\ndef compose(*funcs):\n \"\"\"Compose arbitrarily m"
},
{
"path": "DEEP LEARNING/Object detection/YOLO Object Localization Keras/yolo_run.py",
"chars": 816,
"preview": "import argparse\nimport os\nimport matplotlib.pyplot as plt\nfrom matplotlib.pyplot import imshow\nimport scipy.io\nimport sc"
},
{
"path": "DEEP LEARNING/Object detection/YOLO Object Localization Keras/yolo_utils.py",
"chars": 3243,
"preview": "import colorsys\nimport imghdr\nimport os\nimport random\nfrom keras import backend as K\n\nimport numpy as np\nfrom PIL import"
},
{
"path": "DEEP LEARNING/Object detection/keras retinanet/train.py",
"chars": 21747,
"preview": "#!/usr/bin/env python\n\n\"\"\"\nCopyright 2017-2018 Fizyr (https://fizyr.com)\n\nLicensed under the Apache License, Version 2.0"
},
{
"path": "DEEP LEARNING/Pytorch from scratch/CNN/project-dog-classification/README.md",
"chars": 4111,
"preview": "[//]: # (Image References)\n\n[image1]: ./images/sample_dog_output.png \"Sample Output\"\n[image2]: ./images/vgg16_model.png "
},
{
"path": "DEEP LEARNING/Pytorch from scratch/CNN/project-dog-classification/haarcascades/haarcascade_frontalface_alt.xml",
"chars": 676709,
"preview": "<?xml version=\"1.0\"?>\n<!--\n Stump-based 20x20 gentle adaboost frontal face detector.\n Created by Rainer Lienhart.\n"
},
{
"path": "DEEP LEARNING/Pytorch from scratch/MLP/fc_model.py",
"chars": 3423,
"preview": "import torch\nfrom torch import nn\nimport torch.nn.functional as F\n\n\nclass Network(nn.Module):\n def __init__(self, inp"
},
{
"path": "DEEP LEARNING/Pytorch from scratch/MLP/helper.py",
"chars": 2678,
"preview": "import matplotlib.pyplot as plt\nimport numpy as np\nfrom torch import nn, optim\nfrom torch.autograd import Variable\n\n\ndef"
},
{
"path": "DEEP LEARNING/Pytorch from scratch/TODO/GAN/cycle-gan/helpers.py",
"chars": 2798,
"preview": "# helper functions for saving sample data and models\n\n# import data loading libraries\nimport os\nimport pdb\nimport pickle"
},
{
"path": "DEEP LEARNING/Pytorch from scratch/TODO/GAN/cycle-gan/samples_cyclegan/samples_dir.txt",
"chars": 73,
"preview": "An empty samples directory for saving generated samples during training.\n"
},
{
"path": "DEEP LEARNING/Pytorch from scratch/TODO/GAN/project-face-generation/problem_unittests.py",
"chars": 2185,
"preview": "from unittest.mock import MagicMock, patch\nimport numpy as np\nimport torch\n\n\ndef _print_success_message():\n print(\"Te"
},
{
"path": "DEEP LEARNING/Pytorch from scratch/word2vec-embeddings/data/download_data.txt",
"chars": 142,
"preview": "Download the text8.zip file as per the instructions in the exercise notebooks. Extract that data in this directory so th"
},
{
"path": "DEEP LEARNING/Pytorch from scratch/word2vec-embeddings/utils.py",
"chars": 1512,
"preview": "import re\nfrom collections import Counter\n\n\ndef preprocess(text):\n\n # Replace punctuation with tokens so we can use t"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/README.MD",
"chars": 1113,
"preview": "## Solution to TGS Salt Identification Challenge 2018\n\nLink: https://www.kaggle.com/c/tgs-salt-identification-challenge\n"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/data_loader.py",
"chars": 5121,
"preview": "import torch\nfrom torch.utils.data import Dataset\nfrom torch.utils.data import DataLoader\nfrom data_process.transform im"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/data_process/10fold/test.txt",
"chars": 270000,
"preview": "943df308b6.png\n00cdefa5d6.png\n648d1ae4be.png\n3c7d16efb4.png\nb59bb4f42b.png\n60b26ed15e.png\n5f6eb0452b.png\nb57b8d5c7e.png\n"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/data_process/transform.py",
"chars": 11993,
"preview": "# from include import *\n# from utility.draw import *\n# from utility.file import *\nimport cv2\nimport numpy as np\nimport o"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/evaluate.py",
"chars": 1552,
"preview": "import numpy as np\n\n### metric #################################################################################\n# https"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/loss/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/loss/bce_losses.py",
"chars": 2185,
"preview": "import numpy as np\nimport torch\nimport torch.optim as optim\nfrom torch.autograd import Variable\nimport torch.nn as nn\nfr"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/loss/cyclic_lr.py",
"chars": 3145,
"preview": "import torch\nimport math\nfrom torch.optim.lr_scheduler import _LRScheduler\n\n\nclass CosineAnnealingLR_with_Restart(_LRSch"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/loss/lovasz_losses.py",
"chars": 7835,
"preview": "\"\"\"\nLovasz-Softmax and Jaccard hinge loss in PyTorch\nMaxim Berman 2018 ESAT-PSI KU Leuven (MIT License)\n\"\"\"\n\nfrom __futu"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/main.py",
"chars": 23706,
"preview": "import argparse\nfrom data_loader import *\nfrom data_process.transform import *\nfrom loss.bce_losses import *\nfrom loss.c"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/model/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/model/ibnnet.py",
"chars": 7529,
"preview": "from __future__ import division\n\n\"\"\" \nCreates a ResNeXt Model as defined in:\nXie, S., Girshick, R., Dollar, P., Tu, Z., "
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/model/model.py",
"chars": 14322,
"preview": "import torch\nimport torch.nn as nn\nimport torchvision\nimport torch.nn.functional as F\nfrom ibnnet import resnext101_ibn_"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/model/senet.py",
"chars": 17057,
"preview": "\"\"\"\nResNet code gently borrowed from\nhttps://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py\n\"\"\"\nfrom"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v1/utils.py",
"chars": 7552,
"preview": "import logging\nimport os\n\n# import pathlib\nimport random\nimport sys\nimport time\nfrom itertools import chain\nfrom collect"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/augmentation.py",
"chars": 9199,
"preview": "import cv2\nimport numpy as np\nimport imgaug as ia\nfrom imgaug import augmenters as iaa\n\nfrom .utils import get_crop_pad_"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/callbacks.py",
"chars": 21327,
"preview": "from functools import partial\nimport os\nfrom datetime import datetime, timedelta\n\nimport numpy as np\nimport torch\nimport"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/loaders.py",
"chars": 22481,
"preview": "import numpy as np\nimport torch\nimport torchvision.transforms as transforms\nfrom PIL import Image\nfrom attrdict import A"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/metrics.py",
"chars": 1866,
"preview": "import numpy as np\nfrom tqdm import tqdm\nfrom pycocotools import mask as cocomask\n\nfrom .utils import get_segmentations\n"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/models.py",
"chars": 16379,
"preview": "import numpy as np\nimport torch\nimport torch.optim as optim\nfrom torch.autograd import Variable\nimport torch.nn as nn\nfr"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/pipelines.py",
"chars": 8283,
"preview": "from functools import partial\n\nfrom steppy.base import Step, IdentityOperation\nfrom steppy.adapter import Adapter, E\n\nfr"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/pnasnet.py",
"chars": 17786,
"preview": "from collections import OrderedDict\n\n# link to the github repository https://github.com/Cadene/pretrained-models.pytorch"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/postprocessing.py",
"chars": 1326,
"preview": "import numpy as np\nfrom scipy import ndimage as ndi\nfrom skimage.transform import resize\n\nfrom .utils import get_crop_pa"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/preprocessing.py",
"chars": 99,
"preview": "import numpy as np\n\n\ndef img_cumsum(img):\n return (np.float32(img) - img.mean()).cumsum(axis=0)\n"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/resnext.py",
"chars": 5583,
"preview": "\"\"\"\nNew for ResNeXt:\n1. Wider bottleneck\n2. Add group for conv2\n\"\"\"\n\nimport torch.nn as nn\nimport math\n\n__all__ = [\"ResN"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/unet_models.py",
"chars": 45907,
"preview": "from torch import nn\n\n# from torch.nn import functional as F\nimport torch\nfrom torchvision import models\nimport torchvis"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/common_blocks/utils.py",
"chars": 15674,
"preview": "import logging\nimport os\nimport pathlib\nimport random\nimport sys\nimport time\nfrom itertools import chain\nfrom collection"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/configs/neptune.yaml",
"chars": 1651,
"preview": "project: neptune-ml/Salt-Detection\n\nname: tgs_salt_identification_challenge\ntags: [solution-3]\n\nmetric:\n channel: 'IOUT"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/__init__.py",
"chars": 129,
"preview": "from .bn import ABN, InPlaceABN, InPlaceABNWrapper\nfrom .misc import GlobalAvgPool2d\nfrom .residual import IdentityResid"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/bn.py",
"chars": 3896,
"preview": "from collections import OrderedDict, Iterable\nfrom itertools import repeat\n\nimport torch\nimport torch.nn as nn\n\nimport t"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/build.py",
"chars": 544,
"preview": "import os\n\nfrom torch.utils.ffi import create_extension\n\nsources = [\"src/lib_cffi.cpp\"]\nheaders = [\"src/lib_cffi.h\"]\next"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/build.sh",
"chars": 298,
"preview": "#!/bin/bash\n\n# Configuration\nCUDA_GENCODE=\"\\\n-gencode=arch=compute_61,code=sm_61 \\\n-gencode=arch=compute_52,code=sm_52 \\"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/functions.py",
"chars": 10930,
"preview": "import torch.autograd as autograd\nimport torch.cuda.comm as comm\nfrom torch.autograd.function import once_differentiable"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/misc.py",
"chars": 336,
"preview": "import torch.nn as nn\n\n\nclass GlobalAvgPool2d(nn.Module):\n def __init__(self):\n \"\"\"Global average pooling over"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/residual.py",
"chars": 4503,
"preview": "from collections import OrderedDict\n\nimport torch.nn as nn\n\nfrom .bn import ABN\n\n\nclass IdentityResidualBlock(nn.Module)"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/src/common.h",
"chars": 2692,
"preview": "#pragma once\n\n#include <cuda_runtime_api.h>\n\n/*\n * General settings\n */\nconst int WARP_SIZE = 32;\nconst int MAX_BLOCK_SI"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/src/inplace_abn.cpp",
"chars": 2398,
"preview": "#include <torch/torch.h>\n\n#include <vector>\n\n#include \"inplace_abn.h\"\n\nstd::vector<at::Tensor> mean_var(at::Tensor x) {\n"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/src/inplace_abn.h",
"chars": 1484,
"preview": "#pragma once\n\n#include <ATen/ATen.h>\n\n#include <vector>\n\nstd::vector<at::Tensor> mean_var_cpu(at::Tensor x);\nstd::vector"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/src/inplace_abn_cpu.cpp",
"chars": 3309,
"preview": "#include <ATen/ATen.h>\n\n#include <vector>\n\n#include \"inplace_abn.h\"\n\nat::Tensor reduce_sum(at::Tensor x) {\n if (x.ndime"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/src/inplace_abn_cuda.cu",
"chars": 9966,
"preview": "#include <ATen/ATen.h>\n\n#include <thrust/device_ptr.h>\n#include <thrust/transform.h>\n\n#include <vector>\n\n#include \"commo"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/v2/modules/wider_resnet.py",
"chars": 2959,
"preview": "from collections import OrderedDict\nimport torch.nn as nn\n\nfrom modules import IdentityResidualBlock, ABN, GlobalAvgPool"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/vanilla unet/utils/cyclelr_callback.py",
"chars": 5415,
"preview": "from keras.callbacks import *\n\n\nclass CyclicLR(Callback):\n \"\"\"This callback implements a cyclical learning rate polic"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/vanilla unet/utils/lovasz_losses_tf.py",
"chars": 6452,
"preview": "# https://github.com/bermanmaxim/LovaszSoftmax/blob/master/tensorflow/lovasz_losses_tf.py\n\n\"\"\"\nLovasz-Softmax and Jaccar"
},
{
"path": "DEEP LEARNING/segmentation/Kaggle TGS Salt Identification Challenge/vanilla unet/utils/zf_unet_224_model.py",
"chars": 4962,
"preview": "# coding: utf-8\n\"\"\"\n - \"ZF_UNET_224\" Model based on UNET code from following paper: https://arxiv.org/abs/1505.04597\n"
},
{
"path": "DEEP LEARNING/segmentation/Segmentation pipeline/README.MD",
"chars": 645,
"preview": "### Segmentation model\n\n- Data Massachusetts Roads Dataset from https://www.cs.toronto.edu/~vmnih/data/\n- **get dataset."
},
{
"path": "DEEP LEARNING/segmentation/Segmentation pipeline/get dataset.py",
"chars": 2616,
"preview": "# code from https://github.com/BBarbosa/tflearn-image-recognition-toolkit/blob/4a0528dcfb206b1e45997f2fbc097aafacfa0fa0/"
},
{
"path": "DEEP LEARNING/segmentation/Segmentation pipeline/segmentation pipeline.html",
"chars": 1141697,
"preview": "<!DOCTYPE html>\n<html>\n<head><meta charset=\"utf-8\" />\n\n<title>segmentation pipeline</title>\n\n<script src=\"https://cdnjs."
},
{
"path": "DEEP LEARNING/segmentation/Segmentation pipeline/segmentation pipeline.py",
"chars": 12886,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# ## Install libraries first\n# \n# Be sure keras with tensorflow installed\n# `!co"
},
{
"path": "DEEP LEARNING/segmentation/Segmentation pipeline/weights/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/.gitignore",
"chars": 867,
"preview": "# internal and data folders\ninput/\nmodel_weights/\n\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py."
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/README.md",
"chars": 3691,
"preview": "# Severstal-Steel-Defect-Detection\nCan you detect and classify defects in steel? Segmentation in Pytorch\nhttps://www.kag"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/classification_pytorch_dummy.py",
"chars": 46591,
"preview": "# -*- coding: utf-8 -*-\n\"\"\"classification pytorch.ipynb\n\nAutomatically generated by Colaboratory.\n\nOriginal file is loca"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/bam.py",
"chars": 3109,
"preview": "import torch\nimport math\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass Flatten(nn.Module):\n def forwa"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/cbam.py",
"chars": 4323,
"preview": "import torch\nimport math\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass BasicConv(nn.Module):\n def __i"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/dataloader.py",
"chars": 8430,
"preview": "import os\nfrom sklearn.model_selection import StratifiedKFold, KFold\nimport cv2\nimport joblib\nimport pdb\nimport time\nimp"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/generate_folds.py",
"chars": 829,
"preview": "import pandas as pd\nfrom sklearn.model_selection import StratifiedKFold, KFold\nimport joblib\n\ntotal_folds = 10\n\ndf = pd."
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/logger.py",
"chars": 3800,
"preview": "# This file defines a decorator '@log_to()' that logs every call to a\n# function, along with the arguments that function"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/losses.py",
"chars": 4435,
"preview": "import torch.nn as nn\n\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom torch.nn import functional as F\n\n\ncla"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/lovasz_losses.py",
"chars": 8243,
"preview": "\"\"\"\nLovasz-Softmax and Jaccard hinge loss in PyTorch\nMaxim Berman 2018 ESAT-PSI KU Leuven (MIT License)\n\"\"\"\n\nimport torc"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/metric.py",
"chars": 5372,
"preview": "import os\nimport random\nimport warnings\n\nimport numpy as np\nimport torch\nimport torch.backends.cudnn as cudnn\n\nwarnings."
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/new_metrics.py",
"chars": 15642,
"preview": "from functools import partial\n\nimport numpy as np\nimport torch\nfrom catalyst.dl import Callback, RunnerState, MetricCall"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/optimizers.py",
"chars": 24570,
"preview": "import math\nimport torch\nfrom torch.optim.optimizer import Optimizer\n\n\nclass RAdam(Optimizer):\n def __init__(self, pa"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/training_helper.py",
"chars": 10583,
"preview": "import os\nfrom sklearn.model_selection import StratifiedKFold\nimport cv2\nimport pdb\nimport time\nimport warnings\nimport r"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/common_blocks/utils.py",
"chars": 4380,
"preview": "import numpy as np\nimport matplotlib.pyplot as plt\nimport torch\nimport random\nimport numpy as np\nimport os\nfrom segmenta"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/configs/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/configs/train_params.py",
"chars": 735,
"preview": "sample_submission_path = (\n \"./input/severstal-steel-defect-detection/sample_submission.csv\"\n)\ntrain_df_path = \"./inp"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/inference.py",
"chars": 36203,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n\nget_ipython().system(\" python ../input/mlcomp/mlcomp/mlcomp/setup.py\")\nget_ipyth"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/model_resnet.py",
"chars": 7030,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport math\nfrom torch.nn import init\nfrom cbam impor"
},
{
"path": "DEEP LEARNING/segmentation/Severstal-Steel-Defect-Detection-master/train.py",
"chars": 693,
"preview": "import gc\nfrom common_blocks.training_helper import Trainer_cv\nfrom common_blocks.utils import plot, set_seed\nfrom confi"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/.gitattributes",
"chars": 66,
"preview": "# Auto detect text files and perform LF normalization\n* text=auto\n"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/.gitignore",
"chars": 1731,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# project specific\n*.csv"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/README.md",
"chars": 518,
"preview": "# Understanding Clouds from Satellite Images\n \nCode for https://www.kaggle.com/c/understanding_cloud_organization compet"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/augs.py",
"chars": 3112,
"preview": "import albumentations as albu\n\n\ndef to_tensor(x, **kwargs):\n \"\"\"\n Convert image or mask.\n\n Args:\n x:\n "
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/callbacks.py",
"chars": 5037,
"preview": "from typing import Dict\nimport torch\nimport numpy as np\nfrom catalyst.dl.core import Callback, RunnerState, CallbackOrde"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/config.py",
"chars": 0,
"preview": ""
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/dataset.py",
"chars": 15045,
"preview": "import os\nimport cv2\n\n\nimport numpy as np\nimport pandas as pd\n\nfrom sklearn.model_selection import train_test_split\nfrom"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/inference_blend.py",
"chars": 7976,
"preview": "import torch\nimport torch.nn as nn\nfrom torch.optim.lr_scheduler import ReduceLROnPlateau\nfrom catalyst.dl.runner import"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/losses/losses.py",
"chars": 3659,
"preview": "import torch\nfrom typing import List\nfrom pytorch_toolbelt.losses.functional import sigmoid_focal_loss\nfrom torch.nn.mod"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/losses/lovasz_losses.py",
"chars": 8487,
"preview": "\"\"\"\nLovasz-Softmax and Jaccard hinge loss in PyTorch\nMaxim Berman 2018 ESAT-PSI KU Leuven (MIT License)\n\"\"\"\n\n# from __fu"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/optimizers.py",
"chars": 15550,
"preview": "import torch\nimport warnings\nfrom torch.optim.optimizer import Optimizer\nimport math\nimport itertools as it\nimport torch"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/predict.py",
"chars": 3213,
"preview": "import cv2\nimport numpy as np\nimport pandas as pd\nfrom utils import post_process\nfrom dataset import mask2rle\nfrom tqdm "
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/schedulers.py",
"chars": 471,
"preview": "# import torch\n\n# import warnings\n# warnings.filterwarnings(\"once\")\n#\n# from torch.optim.optimizer import Optimizer\n# im"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/train.py",
"chars": 9355,
"preview": "import torch\nimport torch.nn as nn\nfrom torch.optim.lr_scheduler import ReduceLROnPlateau\nfrom catalyst.dl.runner import"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/train.sh",
"chars": 204,
"preview": "python train.py \n--encoder resnet50 \n--bs 8 \n--gradient_accumulation 4 \n--lr 1e-5\n--num_epochs 2\n--lr_e 10e-5 \n--separat"
},
{
"path": "DEEP LEARNING/segmentation/Understanding-Clouds-from-Satellite-Images-master/utils.py",
"chars": 7244,
"preview": "import matplotlib.pyplot as plt\nimport cv2\nimport numpy as np\nfrom catalyst.dl.callbacks import InferCallback, Checkpoin"
},
{
"path": "LICENSE",
"chars": 11357,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 4391,
"preview": "# Machine Learning and Deep Learning Scripts\n\n[:\n \"\"\""
},
{
"path": "classification/Kaggle Malware Prediction/test_preds_level_1/readme.md",
"chars": 0,
"preview": ""
},
{
"path": "classification/Kaggle Malware Prediction/test_preds_level_2/readme.md",
"chars": 0,
"preview": ""
},
{
"path": "classification/Kaggle Petfinder/8th-place-solution-code.py",
"chars": 76296,
"preview": "import cv2\nimport os\nimport time\nimport gc\nimport glob\nimport json\nimport pprint\nimport joblib\nimport warnings\nimport ra"
},
{
"path": "classification/Kaggle Petfinder/README.MD",
"chars": 1425,
"preview": "## Solution to PetFinder.my Adoption Prediction Challenge\n\n\nLink: https://www.kaggle.com/c/microsoft-malware-prediction\n"
},
{
"path": "classification/Kaggle red hat user/README.MD",
"chars": 813,
"preview": "## Solution to Predicting Red Hat Business Value 2016\n\nLink: https://www.kaggle.com/c/predicting-red-hat-business-value\n"
},
{
"path": "deployment/docker flask fit predict/Dockerfile",
"chars": 125,
"preview": "FROM python:3.7-slim\nCOPY . /root\nWORKDIR /root \nRUN pip install flask gunicorn numpy sklearn scipy flask_wtf WTForms pa"
},
{
"path": "deployment/docker flask fit predict/README.MD",
"chars": 785,
"preview": "This is baseline for model deployment via docker and flask.\n\n\n1. Install\n```\nsudo docker-compose up\n```\n\n2. Build\n```\nsu"
},
{
"path": "deployment/docker flask fit predict/docker-compose.yml",
"chars": 306,
"preview": "version: \"3.3\"\nservices:\n webapp:\n build: .\n command: gunicorn -w 4 -b 0.0.0.0:5000 hello:app --reload\n #comma"
},
{
"path": "deployment/docker flask fit predict/hello.py",
"chars": 4068,
"preview": "from flask import (\n Flask,\n escape,\n flash,\n request,\n jsonify,\n redirect,\n url_for,\n render_te"
},
{
"path": "deployment/docker flask fit predict/templates/submit.html",
"chars": 240,
"preview": "<form method=\"POST\" action=\"/submit\" enctype=multipart/form-data>\n {{ form.hidden_tag() }}\n {{ form.name.label }} "
},
{
"path": "deployment/docker flask fit predict/train_model.py",
"chars": 542,
"preview": "import numpy as np\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\n\nnp.random.seed(0)\n# import some d"
},
{
"path": "deployment/ds docker db template/README.md",
"chars": 1766,
"preview": "Taken from here: https://github.com/glebmikha/data-science-project-template\n# Data Science Project Template\n\nYoutube ins"
}
]
// ... and 47 more files (download for full content)
About this extraction
This page contains the full source code of the Diyago/ML-DL-scripts GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 247 files (43.6 MB), approximately 3.0M tokens, and a symbol index with 1371 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.